Skip to content

Commit

Permalink
Merge pull request #205 from ufal/better-logging
Browse files Browse the repository at this point in the history
Better logging
  • Loading branch information
kasnerz authored Feb 4, 2025
2 parents fd3954c + 4966e27 commit 5f89da9
Show file tree
Hide file tree
Showing 20 changed files with 141 additions and 48 deletions.
2 changes: 1 addition & 1 deletion factgenie/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

sys.path.append(os.path.join(os.path.dirname(__file__), ".."))

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


def generate_example_index(app, campaign):
Expand Down
4 changes: 2 additions & 2 deletions factgenie/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
app.db["announcers"] = {}
app.wsgi_app = ProxyFix(app.wsgi_app, x_host=1)

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


# -----------------
Expand Down Expand Up @@ -228,7 +228,7 @@ def browse():

if dataset_id and split and example_idx:
display_example = {"dataset": dataset_id, "split": split, "example_idx": int(example_idx)}
logger.info(f"Browsing {display_example}")
logger.info(f"Serving permalink {dataset_id} / {split} / {example_idx}")
else:
display_example = None

Expand Down
80 changes: 63 additions & 17 deletions factgenie/bin/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,39 +279,85 @@ def run_llm_campaign(campaign_id: str):
)


def create_app(**kwargs):
import yaml
def setup_logging(config):
import logging
import coloredlogs
import os
import re
from datetime import datetime

from factgenie import ROOT_DIR

class PlainTextFormatter(logging.Formatter):
def format(self, record):
msg = super().format(record)
return remove_ansi_codes(msg)

def remove_ansi_codes(text):
"""Removes ANSI escape sequences from text."""
import re

ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
return ansi_escape.sub("", text)

os.makedirs(f"{ROOT_DIR}/logs", exist_ok=True)
datetime_str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Define log format
log_format = "%(asctime)s - %(levelname)s - %(message)s"

# Create loggers
logger = logging.getLogger("factgenie")

# Get logging level from config
logging_level = config.get("logging", {}).get("level", "INFO")
logger.setLevel(logging_level)

# File handler for errors and warnings
error_handler = logging.FileHandler(f"{ROOT_DIR}/logs/{datetime_str}_error.log")
error_handler.setLevel(logging.WARNING)
error_handler.setFormatter(PlainTextFormatter(log_format))

# File handler for info messages only
info_handler = logging.FileHandler(f"{ROOT_DIR}/logs/{datetime_str}_info.log")
info_handler.setLevel(logging.INFO)
info_handler.addFilter(lambda record: record.levelno == logging.INFO)
info_handler.setFormatter(PlainTextFormatter(log_format))

# # Console handler with colored output
# console_handler = logging.StreamHandler()
# console_handler.setLevel(logging_level)
coloredlogs.install(level=logging_level, logger=logger, fmt=log_format)
# console_handler.setFormatter(logging.Formatter(log_format)) # ANSI codes preserved

# Add handlers to logger
logger.addHandler(error_handler)
logger.addHandler(info_handler)
# logger.addHandler(console_handler)

return logger


def create_app(**kwargs):
import yaml
import os
import shutil
import logging
import factgenie.workflows as workflows
from apscheduler.schedulers.background import BackgroundScheduler
from datetime import datetime
from factgenie.utils import check_login
from factgenie import ROOT_DIR, MAIN_CONFIG_PATH, MAIN_CONFIG_TEMPLATE_PATH, CAMPAIGN_DIR, INPUT_DIR, OUTPUT_DIR

file_handler = logging.FileHandler("error.log")
file_handler.setLevel(logging.ERROR)

if not MAIN_CONFIG_PATH.exists():
print("Activating the default configuration.")
shutil.copy(MAIN_CONFIG_TEMPLATE_PATH, MAIN_CONFIG_PATH)

with open(MAIN_CONFIG_PATH) as f:
config = yaml.safe_load(f)

logging_level = config.get("logging", {}).get("level", "INFO")
logging.basicConfig(
format="%(levelname)s (%(filename)s:%(lineno)d) - %(message)s",
level=logging_level,
handlers=[file_handler, logging.StreamHandler()],
)
logger = logging.getLogger(__name__)
coloredlogs.install(
level=logging_level,
logger=logger,
fmt="%(asctime)s %(levelname)s %(filename)s:%(lineno)d %(message)s",
)
logger = setup_logging(config)

config["host_prefix"] = os.getenv("FACTGENIE_HOST_PREFIX", config["host_prefix"])
config["login"]["active"] = os.getenv("FACTGENIE_LOGIN_ACTIVE", config["login"]["active"])
config["login"]["lock_view_pages"] = os.getenv(
Expand Down
2 changes: 1 addition & 1 deletion factgenie/campaign.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from datetime import datetime
from factgenie import CAMPAIGN_DIR

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


class CampaignMode:
Expand Down
2 changes: 1 addition & 1 deletion factgenie/crowdsourcing.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from factgenie import CAMPAIGN_DIR, PREVIEW_STUDY_ID, TEMPLATES_DIR
from factgenie.campaign import CampaignMode, ExampleStatus

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


def create_crowdsourcing_campaign(app, campaign_id, config, campaign_data):
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/basic.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import logging

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")

from factgenie.datasets.dataset import Dataset
from pathlib import Path
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from factgenie import INPUT_DIR, OUTPUT_DIR

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


def get_dataset_classes():
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/gsmarena.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import requests

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")

from factgenie.datasets.basic import JSONDataset
from factgenie.datasets.quintd import QuintdDataset
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/hf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from factgenie.datasets.dataset import Dataset

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


class HFDataset(Dataset):
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/ice_hockey.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import logging

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")

from datetime import datetime
from tinyhtml import h
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/logicnlg.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os
import requests

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")
from factgenie.datasets.hf_dataset import HFDataset
from tinyhtml import h

Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/openweather.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import logging

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")

import json
from factgenie.datasets.basic import JSONDataset
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/owid.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import logging

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")

import dateutil.parser
from factgenie.datasets.dataset import Dataset
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/propaganda_techniques.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from factgenie.utils import resumable_download
from factgenie.datasets.dataset import Dataset

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")

PTC_DATASET_ID = "propaganda-techniques"
PTC_CAMPAIGN_ID = "ptc"
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/quintd.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from factgenie.utils import resumable_download
from pathlib import Path

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


class QuintdDataset(Dataset):
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/wikidata.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import logging

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")
from factgenie.datasets.basic import JSONDataset
from factgenie.datasets.quintd import QuintdDataset
from tinyhtml import h
Expand Down
19 changes: 17 additions & 2 deletions factgenie/llm_campaign.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from factgenie import CAMPAIGN_DIR, OUTPUT_DIR, TEMPLATES_DIR

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


def create_llm_campaign(app, mode, campaign_id, config, campaign_data, datasets, overwrite=False):
Expand Down Expand Up @@ -149,7 +149,20 @@ def run_llm_campaign(app, mode, campaign_id, announcer, campaign, datasets, mode
campaign.update_metadata()

provider = campaign.metadata["config"].get("type", None)
logger.info(f"Starting LLM campaign {campaign_id} ({provider})")

logger.info(f"Starting LLM campaign \033[1m{campaign_id}\033[0m | {provider}")

logger.info(f"=" * 50)
logger.info(f"\033[1mModel\033[0m: {campaign.metadata['config']['model']}")
logger.info(f"\033[1mAPI URL\033[0m: {campaign.metadata['config'].get('api_url')}")
logger.info(f"\033[1mModel args\033[0m: {campaign.metadata['config'].get('model_args')}")
logger.info(f"\033[1mSystem message\033[0m: \"{campaign.metadata['config'].get('system_msg')}\"")
logger.info(f"\033[1mAnnotation span categories\033[0m:")

for cat in campaign.metadata["config"].get("annotation_span_categories", []):
logger.info(f"[{cat['name']}] {cat['description']}")

logger.info(f"=" * 50)

# regenerate output index
workflows.get_output_index(app, force_reload=True)
Expand Down Expand Up @@ -211,7 +224,9 @@ def run_llm_campaign(app, mode, campaign_id, announcer, campaign, datasets, mode
payload = {"campaign_id": campaign_id, "stats": stats, "type": "result", "response": response}

utils.announce(announcer, payload)
logger.info(f"-" * 50)
logger.info(f"{campaign_id}: {stats['finished']}/{stats['total']} examples")
logger.info(f"-" * 50)

# if all examples are finished, set the campaign status to finished
if len(db.status.unique()) == 1 and db.status.unique()[0] == ExampleStatus.FINISHED:
Expand Down
54 changes: 43 additions & 11 deletions factgenie/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@
logging.getLogger("LiteLLM Proxy").setLevel(logging.ERROR)
logging.getLogger("LiteLLM Router").setLevel(logging.ERROR)

logger = logging.getLogger(__name__)
# disable requests logging
logging.getLogger("httpx").setLevel(logging.ERROR)

logger = logging.getLogger("factgenie")

DIR_PATH = os.path.dirname(__file__)
LLM_ANNOTATION_DIR = os.path.join(DIR_PATH, "annotations")
Expand Down Expand Up @@ -163,18 +166,42 @@ def parse_annotations(self, text, annotations_json):
annotations_obj = OutputAnnotations.model_validate_json(annotations_json)
annotations = annotations_obj.annotations
except ValidationError as e:
logger.error(f"Model response is not in the expected format: {e}\n\nResponse: {annotations_json=}")
logger.error("Parsing error: ", json.loads(e.json())[0]["msg"])
try:
logger.error(f"Model response does not follow the schema.")
parsed_json = json.loads(annotations_json)
logger.error(f"Model response: {parsed_json}")
except json.JSONDecodeError:
logger.error(f"Model response is not a valid JSON.")
logger.error(f"Model response: {annotations_json}")

return []

annotation_list = []
current_pos = 0

for annotation in annotations:
logger.info("Annotated text:")
logger.info(f"\033[34m{text}\033[0m")

logger.info(f"Received {len(annotations)} annotations.")

for i, annotation in enumerate(annotations):
annotated_span = annotation.text.lower()

if len(text) == 0:
logger.warning(f"❌ Span EMPTY.")
continue

# find the `start` index of the error in the text
start_pos = text.lower().find(annotation.text.lower(), current_pos)
start_pos = text.lower().find(annotated_span, current_pos)

if start_pos == -1:
logger.warning(f'Cannot find {annotation=} in text "{text[current_pos:]}"')
if text.lower().find(annotated_span) != -1:
# The annotation was found earlier in the text. That may be an accident, therefore we ignore it. The model should be instructed to order the annotation sequentially.
logger.warning(f'❌ Span OUT OF ORDER: "{annotated_span}"')
else:
# The annotation was not found in the text.
logger.warning(f'❌ Span NOT FOUND: "{annotated_span}"')
continue

annotation_d = annotation.model_dump()
Expand All @@ -185,6 +212,17 @@ def parse_annotations(self, text, annotations_json):

# Save the start position of the annotation
annotation_d["start"] = start_pos

try:
annotation_type_str = self.config["annotation_span_categories"][annotation_d["type"]]["name"]
except:
logger.error(f"Annotation type {annotation_d['type']} not found in the annotation_span_categories.")
continue

logger.info(
f'[\033[32m\033[1m{annotation_type_str}\033[0m] "\033[32m{annotation.text}\033[0m" ({start_pos}:{start_pos + len(annotation.text)})'
)

annotation_list.append(annotation_d)

overlap_allowed = self.config.get("annotation_overlap_allowed", False)
Expand Down Expand Up @@ -247,20 +285,14 @@ def annotate_example(self, data, text):
try:
prompt = self.prompt(data, text)

logger.debug(f"Calling LiteLLM API.")
logger.debug(f"Prompt: {prompt}")
logger.debug(f"Model: {model}")
logger.debug(f"Model args: {self.config.get('model_args', {})}")
logger.debug(f"API URL: {self._api_url()}")
logger.debug(f"System message: {self.config['system_msg']}")

response = self.get_model_response(prompt, model_service)

logger.debug(f"Prompt tokens: {response.usage.prompt_tokens}")
logger.debug(f"Response tokens: {response.usage.completion_tokens}")

annotation_str = response.choices[0].message.content
logger.info(annotation_str)

return {
"prompt": prompt,
Expand Down
2 changes: 1 addition & 1 deletion factgenie/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
MAIN_CONFIG_PATH,
)

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


# https://maxhalford.github.io/blog/flask-sse-no-deps/
Expand Down
Loading

0 comments on commit 5f89da9

Please sign in to comment.