Merge branch 'main' into develop_own_bot

konfuzio-ai · Dec 4, 2023 · 07f0f46 · 07f0f46
2 parents 9ea0225 + 6184dee
commit 07f0f46
Show file tree

Hide file tree

Showing 10 changed files with 185 additions and 17 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -18,6 +18,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
+        pip install tensorrt --extra-index-url https://pypi.nvidia.com
         pip install -e .
 
     - name: Run the script

diff --git a/README.md b/README.md
@@ -1,5 +1,16 @@
 # Welcome to the "AI Comedy Club" Challenge
 
+
+At Helm & Nagel GmbH, renowned for our innovative AI product [Konfuzio](https://konfuzio.com/en/), we have cultivated an inclusive and dynamic environment with our AI Comedy Club.
+
+This platform is open to everyone – not just those seeking programming positions but to all who have a passion for AI and a good sense of humor. Whether you're an expert coder, a novice, or simply someone intrigued by the blend of comedy and technology, you're welcome here.
+
+In our AI Comedy Club, coding goes beyond mere technical skills. It's about creating an engaging, fun, and innovative experience. We're not just writing code; we're crafting moments of joy and entertainment through technology.
+
+Moreover, we're on the lookout for a community maintainer – someone who shares our vision of combining laughter and learning. 
+
+## The Challenge
+
 You are about to step into our virtual comedy club, but there's a twist - the stage is open only for AI performers!
 In this challenge, your task is to develop a unique AI performer (a bot), who can not only tell jokes but also appreciates humor by rating jokes told by its fellow AI comedians.
 

diff --git a/bots/GPunT2/joke_bot.py b/bots/GPunT2/joke_bot.py
@@ -0,0 +1,133 @@
+from transformers import AutoTokenizer, AutoModel
+from transformers import pipeline
+from transformers.utils import ModelOutput
+import torch
+import re
+
+
+# From : https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
+# Mean Pooling - Take attention mask into account for correct averaging
+
+
+def mean_pooling(model_output: ModelOutput,
+                 attention_mask: torch.Tensor) -> torch.Tensor:
+
+    # First element of model_output contains all token embeddings
+    token_embeddings: torch.Tensor = model_output[0]
+    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()  # noqa
+    return (torch.sum(token_embeddings * input_mask_expanded, 1) /
+            torch.clamp(input_mask_expanded.sum(1), min=1e-9))
+
+
+# From : https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
+def compute_normalized_emb_from_sentence(sentence: str) -> torch.Tensor:
+    # Load model from HuggingFace Hub
+    tokenizer = AutoTokenizer.from_pretrained(
+        'sentence-transformers/all-MiniLM-L6-v2')
+    model = AutoModel.from_pretrained(
+        'sentence-transformers/all-MiniLM-L6-v2')
+    # Tokenize sentences
+    encoded_input = tokenizer(sentence,
+                              padding=True,
+                              truncation=True,
+                              return_tensors='pt')
+    # Compute token embeddings
+    with torch.no_grad():
+        model_output = model(**encoded_input)
+    # Perform pooling
+    sentence_embeddings = mean_pooling(model_output,
+                                       encoded_input['attention_mask'])
+    # Normalize embeddings
+    sentence_embeddings = torch.nn.functional.normalize(
+        sentence_embeddings, p=2, dim=1)
+    return sentence_embeddings
+
+
+MODEL = 'gpt2'
+TASK = 'text-generation'
+
+
+class Bot:
+
+    name: str = 'Complete Sentences By GPT-2'
+
+    def __init__(self) -> None:
+        self.joke_generator = pipeline(TASK, model=MODEL)
+
+    def _generate_joke(self, prefix: str, max_length: int) -> str:
+        """Use the GPT-2 model to generate a text (joke) based on `prefix`.
+
+        Args:
+            prefix (str): Text prefix.
+            max_length (int): Max length of the generated text.
+
+        Returns:
+            str: Text generated from the GPT-2 model.
+        """
+        output_dict = self.joke_generator(
+            f'{prefix}',
+            max_length=max_length,
+            do_sample=True,
+            pad_token_id=self.joke_generator.model.config.eos_token_id
+        )[0]
+        joke: str = output_dict['generated_text']
+        return joke
+
+    def tell_joke(self, prefix: str | None = None) -> str:
+        """Use GPT-2 model to tell a joke.
+
+        Generates 1-3 sentences text (joke) based on `prefix`. We iterate
+        the text generation process until either:
+            - './!/?' is found at the end of the text.
+            - Max sentence formed with at least 2 './!/?' count.
+
+        Args:
+            prefix (str | None): Text prefix for the joke. If None, take a
+                random one from self.joke_prefixes.
+
+        Returns:
+            str: Text generated from the GPT-2 model.
+        """
+        joke = "Here is a pun:"
+        max_length = len(joke) + 25
+        while True:
+            joke = self._generate_joke(joke, max_length)
+            max_length += 25
+            if joke[-1] in ['.', '!', '?']:
+                break
+            if 0 < joke.count('.') + joke.count('!') + joke.count('?') >= 2:
+                target_index = min([joke[::-1].find(i)
+                                    for i in ['.', '!', '?']
+                                    if joke[::-1].find(i) >= 0])
+                joke = joke[::-1][target_index:][::-1]
+                break
+        return joke
+
+    def rate_joke(self, joke: str) -> float:
+        """Rate the generated joke using GPT-2.
+
+        Args:
+            joke (str): Text generated from the GPT-2 model.
+
+        Returns:
+            float: Rating generated from the GPT-2 model [0...10].
+        """
+        joke = f"I would rate from 0 to 10 the joke '{joke}' as: "
+        max_length = len(joke) + 2
+        pattern = r"[-+]?\d*\.\d+|\d+"
+        while True:
+            temp = self._generate_joke(joke, max_length)
+            print(temp)
+            numbers = re.findall(pattern, temp)
+            if len(numbers) >= 3:
+                try:
+                    third_number = float(numbers[2])
+                    if third_number >= 0 and 10 >= third_number:
+                        return int(third_number)
+                    else:
+                        continue
+                except Exception as e:
+                    print(temp, e)
+                    continue
+            else:
+                continue
diff --git a/bots/GPunT2/requirements.txt b/bots/GPunT2/requirements.txt
@@ -0,0 +1,2 @@
+torch
+transformers
diff --git a/bots/GPunT2/test_bot.py b/bots/GPunT2/test_bot.py
@@ -0,0 +1,22 @@
+import pytest
+from joke_bot import Bot
+
+
+@pytest.fixture
+def bot() -> Bot:
+    return Bot()
+
+
+def test_tell_joke(bot: Bot) -> None:
+    joke = bot.tell_joke()
+    assert isinstance(joke, str), "Joke is not a string."
+    criteria1 = joke[-1] == '.'
+    criteria2 = 0 < joke.count('.') + joke.count('!') + joke.count('?') >= 2
+    assert criteria1 or criteria2, "Joke sentence creation criterias not met."
+
+
+def test_rate_joke(bot: Bot) -> None:
+    joke = "Why was the computer cold at the office? Because it left its Windows open."  # noqa
+    rating = bot.rate_joke(joke)
+    assert isinstance(rating, (int, float)), "Rating is not a number."
+    assert 0 <= rating <= 10, "Rating is not within the correct range."
diff --git a/bots/TheJester/requirements.txt b/bots/TheJester/requirements.txt
@@ -1,5 +1,5 @@
 aiofiles==23.2.1
-aiohttp==3.8.5
+aiohttp==3.9.0
 aiosignal==1.3.1
 anyio==3.7.1
 async-timeout==4.0.3
@@ -40,7 +40,7 @@ opentelemetry-exporter-otlp==1.20.0
 opentelemetry-exporter-otlp-proto-common==1.20.0
 opentelemetry-exporter-otlp-proto-grpc==1.20.0
 opentelemetry-exporter-otlp-proto-http==1.20.0
-opentelemetry-instrumentation==0.40b0
+opentelemetry-instrumentation==0.41b0
 opentelemetry-proto==1.20.0
 opentelemetry-sdk==1.20.0
 opentelemetry-semantic-conventions==0.41b0
@@ -67,7 +67,7 @@ tqdm==4.66.1
 typing-inspect==0.9.0
 typing_extensions==4.7.1
 uptrace==1.20.0
-urllib3==2.0.4
+urllib3==2.0.7
 uvicorn==0.23.2
 watchfiles==0.20.0
 websockets==11.0.3

diff --git a/bots/entertAIn/requirements.txt b/bots/entertAIn/requirements.txt
@@ -1,5 +1,5 @@
 absl-py==1.4.0
-aiohttp==3.8.4
+aiohttp==3.9.0
 aiosignal==1.3.1
 alabaster==0.7.13
 arrow==1.2.3
@@ -29,7 +29,7 @@ colorama==0.4.6
 comm==0.1.3
 contourpy==1.0.7
 cookiecutter==2.1.1
-cryptography==40.0.2
+cryptography==41.0.6
 cycler==0.11.0
 debugpy==1.6.7
 decorator==5.1.1
@@ -110,7 +110,7 @@ parso==0.8.3
 pathspec==0.11.1
 pexpect==4.8.0
 pickleshare==0.7.5
-Pillow==9.5.0
+Pillow==10.0.1
 platformdirs==3.5.1
 pluggy==1.0.0
 profanity-check==1.0.3
@@ -204,12 +204,12 @@ transformers==4.30.2
 typing_extensions==4.6.2
 tzdata==2023.3
 ujson==5.7.0
-urllib3==1.26.16
+urllib3==1.26.18
 wasabi==0.10.1
 watchdog==3.0.0
 wcwidth==0.2.6
 webencodings==0.5.1
-Werkzeug==2.3.4
+Werkzeug==3.0.1
 whatthepatch==1.0.5
 wrapt==1.14.1
 yapf==0.33.0

diff --git a/bots/zumabot/data/requirements.txt b/bots/zumabot/data/requirements.txt
@@ -12,7 +12,7 @@ exceptiongroup==1.1.1
 filelock==3.12.2
 fsspec==2023.6.0
 gitdb==4.0.10
-GitPython==3.1.31
+GitPython==3.1.37
 gTTS==2.3.2
 huggingface-hub==0.15.1
 idna==3.4
@@ -33,7 +33,7 @@ numpy==1.25.0
 openpyxl==3.1.2
 packaging==23.1
 pandas==2.0.2
-Pillow==9.5.0
+Pillow==10.0.1
 pluggy==1.0.0
 protobuf==4.23.3
 pyarrow==12.0.1
@@ -76,7 +76,7 @@ typing-inspect==0.9.0
 typing_extensions==4.6.3
 tzdata==2023.3
 tzlocal==4.3.1
-urllib3==2.0.3
+urllib3==2.0.7
 validators==0.20.0
 watchdog==3.0.0
 xformers==0.0.20

diff --git a/main.py b/main.py
@@ -27,7 +27,7 @@ def check_test_pass(directory):
     # Only add the bot if its tests pass
 
     if not check_test_pass(os.path.join(bots_dir, bot_dir)):
-        print(f"Skipping our ai comedy guest '{bot_dir}' because its tests do not pass.")
+        print(f"Skipping our ai comedy guest '{bot_dir}' because it's tests do not pass.")
         continue
 
     # Dynamically load the bot's module
@@ -48,7 +48,6 @@ def check_test_pass(directory):
     else:
         print(f"ERROR: bot dir without .Bot att: {bot_module, bot_dir}")
 
-
 # Scorecard for each bot
 print(f"printing bots list: {bots}")
 scorecard = {bot.name: [] for bot in bots}

diff --git a/requirements.txt b/requirements.txt
@@ -12,7 +12,7 @@ exceptiongroup==1.1.1
 filelock==3.12.2
 fsspec==2023.6.0
 gitdb==4.0.10
-GitPython==3.1.31
+GitPython==3.1.37
 gTTS==2.3.2
 huggingface-hub==0.15.1
 idna==3.4
@@ -33,10 +33,10 @@ numpy==1.25.0
 openpyxl==3.1.2
 packaging==23.1
 pandas==2.0.2
-Pillow==9.5.0
+Pillow==10.0.1
 pluggy==1.0.0
 protobuf==4.23.3
-pyarrow==12.0.1
+pyarrow==14.0.1
 pydeck==0.8.1b0
 Pygments==2.15.1
 Pympler==1.0.1
@@ -76,7 +76,7 @@ typing-inspect==0.9.0
 typing_extensions==4.6.3
 tzdata==2023.3
 tzlocal==4.3.1
-urllib3==2.0.3
+urllib3==2.0.7
 validators==0.20.0
 watchdog==3.0.0
 xformers==0.0.20