Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update pre commit hooks and tests #51

Merged
merged 5 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 27 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,35 @@
repos:
# taken from: https://black.readthedocs.io/en/stable/integrations/source_version_control.html
# Using this mirror lets us use mypyc-compiled black, which is about 2x faster

# black formatter
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 23.9.1
hooks:
- id: black
# It is recommended to specify the latest version of Python
# supported by your project here, or alternatively use
# pre-commit's default_language_version, see
# https://pre-commit.com/#top_level-default_language_version
language_version: python3.10
# taken from: https://pypi.org/project/ruff/

# ruff linter
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.291
rev: v0.3.4
hooks:
- id: ruff
- id: ruff

# mypy static type checker
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.8.0
hooks:
- id: mypy
name: mypy-frontend
files: ^frontend/
args: [--config-file=frontend/mypy.ini]
additional_dependencies: [types-requests]
- id: mypy
name: mypy-backend
files: ^backend/
args: [--config-file=backend/mypy.ini]
additional_dependencies: [types-requests]
- id: mypy
name: mypy-test
files: ^tests/
args: [--config-file=tests/mypy.ini]
additional_dependencies: [types-requests]

186 changes: 66 additions & 120 deletions backend/fastapi_app.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
# command to run: uvicorn backend.fastapi_app:app --reload
import os
import re
from typing import List
import logging
import sys
from pathlib import Path

from fastapi import FastAPI, HTTPException, UploadFile, Form
from llama_index import ServiceContext
from pydantic import BaseModel, Field

# from llama_index.callbacks import CallbackManager, TokenCountingHandler
from requests.exceptions import MissingSchema
import logging
import sys
from dotenv import load_dotenv
from pathlib import Path
import errno
import certifi

Expand All @@ -21,16 +18,23 @@
AITextDocument,
AIPdfDocument,
AIHtmlDocument,
CustomLlamaIndexChatEngineWrapper,
set_up_text_chatbot,
)

from .script_SQL_querying import (
AIDataBase,
DataChatBotWrapper,
set_up_database_chatbot,
)

from .models import (
DoubleUploadException,
NoUploadException,
EmptyQuestionException,
TextSummaryModel,
QuestionModel,
QAResponseModel,
TextResponseModel,
MultipleChoiceTest,
ErrorResponse,
)
from .helpers import load_aws_secrets

# workaround for mac to solve "SSL: CERTIFICATE_VERIFY_FAILED Error"
Expand Down Expand Up @@ -69,124 +73,60 @@
# Set-up Chat Engine:
# - LlamaIndex CondenseQuestionChatEngine with RetrieverQueryEngine for text files
# - or querying a database with langchain SQLDatabaseChain and Runnables
app.chat_engine: CustomLlamaIndexChatEngineWrapper | DataChatBotWrapper | None = None
app.callback_manager = None
app.token_counter = None


class DoubleUploadException(Exception):
pass


class NoUploadException(Exception):
pass


class EmptyQuestionException(Exception):
pass


class TextSummaryModel(BaseModel):
file_name: str
text_category: str
summary: str
used_tokens: int


class QuestionModel(BaseModel):
prompt: str
temperature: float


class QAResponseModel(BaseModel):
user_question: str
ai_answer: str
used_tokens: int


class TextResponseModel(BaseModel):
message: str


class MultipleChoiceQuestion(BaseModel):
"""Data Model for a multiple choice question"""

question: str = Field(
...,
description="""An interesting and unique question related to the main
subject of the article.
""",
)
correct_answer: str = Field(..., description="Correct answer to question")
wrong_answer_1: str = Field(
..., description="a unique wrong answer to the question"
)
wrong_answer_2: str = Field(
...,
description="""a unique wrong answer to the question which is different
from wrong_answer_1 and not an empty string
""",
)


class MultipleChoiceTest(BaseModel):
"""Data Model for a multiple choice test"""

questions: List[MultipleChoiceQuestion] = []


class ErrorResponse(BaseModel):
detail: str
app.state.chat_engine = None
app.state.callback_manager = None
app.state.token_counter = None


def load_text_chat_engine() -> None:
if not app.chat_engine or app.chat_engine.data_category == "database":
if not app.state.chat_engine or app.state.chat_engine.data_category == "database":
logging.debug("setting up text chatbot")
logging.debug(f"Debug: {DEBUG_MODE}")
(
app.chat_engine,
app.callback_manager,
app.token_counter,
app.state.chat_engine,
app.state.callback_manager,
app.state.token_counter,
) = set_up_text_chatbot()


def load_database_chat_engine() -> None:
if not app.chat_engine or app.chat_engine.data_category != "database":
if not app.state.chat_engine or app.state.chat_engine.data_category != "database":
logging.debug("setting up database chatbot")
(
app.chat_engine,
app.callback_manager, # is None in database mode
app.token_counter,
app.state.chat_engine,
app.state.callback_manager, # is None in database mode
app.state.token_counter,
) = set_up_database_chatbot()


async def handle_uploadfile(
upload_file: UploadFile,
) -> AITextDocument | AIDataBase | None:
file_name = upload_file.filename
) -> AITextDocument | AIDataBase | AIPdfDocument | None:
if not (file_name := Path(upload_file.filename).name):
return None
with open(cfd / data_dir / file_name, "wb") as f:
f.write(await upload_file.read())
match upload_file.filename.split(".")[-1]:
case "txt":
load_text_chat_engine()
return AITextDocument(file_name, LLM_NAME, app.callback_manager)
return AITextDocument(file_name, LLM_NAME, app.state.callback_manager)
case "pdf":
load_text_chat_engine()
return AIPdfDocument(file_name, LLM_NAME, app.callback_manager)
return AIPdfDocument(file_name, LLM_NAME, app.state.callback_manager)
case "sqlite" | "db":
uri = f"sqlite:///{app_dir}/{data_dir}/{file_name}"
logging.debug(f"uri: {uri} debug {DEBUG_MODE}")
load_database_chat_engine()
document: AIDataBase = AIDataBase.from_uri(uri)
return document
return AIDataBase.from_uri(uri)
return None


async def handle_upload_url(upload_url) -> None:
async def handle_upload_url(upload_url: str) -> AITextDocument | AIHtmlDocument:
match re.split(r"[./]", upload_url):
case [*_, dir, file_name, "txt"] if dir == "data":
try:
load_text_chat_engine()
return AITextDocument(file_name, LLM_NAME, app.callback_manager)
return AITextDocument(file_name, LLM_NAME, app.state.callback_manager)
except OSError:
raise FileNotFoundError(
errno.ENOENT,
Expand All @@ -195,24 +135,30 @@ async def handle_upload_url(upload_url) -> None:
)
case [http, *_] if "http" in http.lower():
load_text_chat_engine()
return AIHtmlDocument(upload_url, LLM_NAME, app.callback_manager)
return AIHtmlDocument(upload_url, LLM_NAME, app.state.callback_manager)
case _:
raise MissingSchema


@app.post("/upload", response_model=TextSummaryModel)
async def upload_file(
upload_file: UploadFile | None = None, upload_url: str = Form("")
):
) -> TextSummaryModel:
message = ""
text_category = ""
file_name = ""
file_name: str | None = ""
used_tokens = 0
try:
if upload_file:
if upload_url:
raise DoubleUploadException("You can not provide both, file and URL.")
file_name = upload_file.filename
if not (file_name := upload_file.filename):
return TextSummaryModel(
file_name="",
text_category=text_category,
summary=message,
used_tokens=used_tokens,
)
destination_file = Path(cfd / "data" / file_name)
destination_file.parent.mkdir(exist_ok=True, parents=True)
document = await handle_uploadfile(upload_file)
Expand All @@ -224,11 +170,11 @@ async def upload_file(
raise NoUploadException(
"You must provide either a file or URL to upload.",
)
if app.chat_engine and document:
app.chat_engine.add_document(document)
if app.state.chat_engine and document:
app.state.chat_engine.add_document(document)
message = document.summary
text_category = document.category
used_tokens = app.token_counter.total_llm_token_count
used_tokens = app.state.token_counter.total_llm_token_count
except MissingSchema:
raise HTTPException(
status_code=400,
Expand All @@ -243,7 +189,7 @@ async def upload_file(
status_code=400,
detail=f"There was an unexpected OSError on uploading the file:{e}",
)
logging.debug(f"engine_up?: {app.chat_engine is not None}")
logging.debug(f"engine_up?: {app.state.chat_engine is not None}")
logging.debug(f"message: {message}")
return TextSummaryModel(
file_name=file_name,
Expand All @@ -254,18 +200,18 @@ async def upload_file(


@app.post("/qa_text", response_model=QAResponseModel)
async def qa_text(question: QuestionModel):
logging.debug(f"engine_up?: {app.chat_engine is not None}")
async def qa_text(question: QuestionModel) -> QAResponseModel:
logging.debug(f"engine_up?: {app.state.chat_engine is not None}")
if not question.prompt:
raise EmptyQuestionException(
"Your Question is empty, please type a message and resend it."
)
if app.chat_engine:
app.token_counter.reset_counts()
app.chat_engine.update_temp(question.temperature)
response = app.chat_engine.answer_question(question)
if app.state.chat_engine:
app.state.token_counter.reset_counts()
app.state.chat_engine.update_temp(question.temperature)
response = app.state.chat_engine.answer_question(question)
ai_answer = str(response)
used_tokens = app.token_counter.total_llm_token_count
used_tokens = app.state.token_counter.total_llm_token_count
else:
ai_answer = "Sorry, no context loaded. Please upload a file or url."
used_tokens = 0
Expand All @@ -279,22 +225,22 @@ async def qa_text(question: QuestionModel):

@app.get("/clear_storage", response_model=TextResponseModel)
async def clear_storage():
if app.chat_engine:
app.chat_engine.clear_data_storage()
if app.state.chat_engine:
app.state.chat_engine.clear_data_storage()
logging.info("chat engine cleared...")
if (cfd / "data").exists():
for file in Path(cfd / "data").iterdir():
os.remove(file)
app.chat_engine = None
app.token_counter = None
app.callback_manager = None
app.state.chat_engine = None
app.state.token_counter = None
app.state.callback_manager = None
return TextResponseModel(message="Knowledge base succesfully cleared")


@app.get("/clear_history", response_model=TextResponseModel)
async def clear_history():
if app.chat_engine:
message = app.chat_engine.clear_chat_history()
if app.state.chat_engine:
message = app.state.chat_engine.clear_chat_history()
# logging.debug("chat history cleared...")
return TextResponseModel(message=message)
return TextResponseModel(
Expand All @@ -310,13 +256,13 @@ async def clear_history():
},
)
def get_quiz():
if not app.chat_engine or not app.chat_engine.vector_index.ref_doc_info:
if not app.state.chat_engine or not app.state.chat_engine.vector_index.ref_doc_info:
raise HTTPException(
status_code=400,
detail="No context provided, please provide a url or a text file!",
)

if app.chat_engine.data_category == "database":
if app.state.chat_engine.data_category == "database":
raise HTTPException(
status_code=400,
detail="""A database is loaded, but no valid context for a quiz.
Expand Down Expand Up @@ -344,7 +290,7 @@ def generate_quiz_from_context():
from llama_index.prompts import PromptTemplate
from llama_index.response import Response

vector_index = app.chat_engine.vector_index
vector_index = app.state.chat_engine.vector_index
lc_output_parser = PydanticOutputParser(pydantic_object=MultipleChoiceTest)
output_parser = LangchainOutputParser(lc_output_parser)

Expand Down
Loading
Loading