Skip to content

Commit

Permalink
Merge pull request #51 from mt7180/50-update-pre-commit-hooks
Browse files Browse the repository at this point in the history
update pre commit hooks and tests
  • Loading branch information
mt7180 authored Mar 27, 2024
2 parents 216e86a + a32a39c commit 7ff410d
Show file tree
Hide file tree
Showing 15 changed files with 328 additions and 241 deletions.
36 changes: 27 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,35 @@
repos:
# taken from: https://black.readthedocs.io/en/stable/integrations/source_version_control.html
# Using this mirror lets us use mypyc-compiled black, which is about 2x faster

# black formatter
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 23.9.1
hooks:
- id: black
# It is recommended to specify the latest version of Python
# supported by your project here, or alternatively use
# pre-commit's default_language_version, see
# https://pre-commit.com/#top_level-default_language_version
language_version: python3.10
# taken from: https://pypi.org/project/ruff/

# ruff linter
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.291
rev: v0.3.4
hooks:
- id: ruff
- id: ruff

# mypy static type checker
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.8.0
hooks:
- id: mypy
name: mypy-frontend
files: ^frontend/
args: [--config-file=frontend/mypy.ini]
additional_dependencies: [types-requests]
- id: mypy
name: mypy-backend
files: ^backend/
args: [--config-file=backend/mypy.ini]
additional_dependencies: [types-requests]
- id: mypy
name: mypy-test
files: ^tests/
args: [--config-file=tests/mypy.ini]
additional_dependencies: [types-requests]

186 changes: 66 additions & 120 deletions backend/fastapi_app.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
# command to run: uvicorn backend.fastapi_app:app --reload
import os
import re
from typing import List
import logging
import sys
from pathlib import Path

from fastapi import FastAPI, HTTPException, UploadFile, Form
from llama_index import ServiceContext
from pydantic import BaseModel, Field

# from llama_index.callbacks import CallbackManager, TokenCountingHandler
from requests.exceptions import MissingSchema
import logging
import sys
from dotenv import load_dotenv
from pathlib import Path
import errno
import certifi

Expand All @@ -21,16 +18,23 @@
AITextDocument,
AIPdfDocument,
AIHtmlDocument,
CustomLlamaIndexChatEngineWrapper,
set_up_text_chatbot,
)

from .script_SQL_querying import (
AIDataBase,
DataChatBotWrapper,
set_up_database_chatbot,
)

from .models import (
DoubleUploadException,
NoUploadException,
EmptyQuestionException,
TextSummaryModel,
QuestionModel,
QAResponseModel,
TextResponseModel,
MultipleChoiceTest,
ErrorResponse,
)
from .helpers import load_aws_secrets

# workaround for mac to solve "SSL: CERTIFICATE_VERIFY_FAILED Error"
Expand Down Expand Up @@ -69,124 +73,60 @@
# Set-up Chat Engine:
# - LlamaIndex CondenseQuestionChatEngine with RetrieverQueryEngine for text files
# - or querying a database with langchain SQLDatabaseChain and Runnables
app.chat_engine: CustomLlamaIndexChatEngineWrapper | DataChatBotWrapper | None = None
app.callback_manager = None
app.token_counter = None


class DoubleUploadException(Exception):
pass


class NoUploadException(Exception):
pass


class EmptyQuestionException(Exception):
pass


class TextSummaryModel(BaseModel):
file_name: str
text_category: str
summary: str
used_tokens: int


class QuestionModel(BaseModel):
prompt: str
temperature: float


class QAResponseModel(BaseModel):
user_question: str
ai_answer: str
used_tokens: int


class TextResponseModel(BaseModel):
message: str


class MultipleChoiceQuestion(BaseModel):
"""Data Model for a multiple choice question"""

question: str = Field(
...,
description="""An interesting and unique question related to the main
subject of the article.
""",
)
correct_answer: str = Field(..., description="Correct answer to question")
wrong_answer_1: str = Field(
..., description="a unique wrong answer to the question"
)
wrong_answer_2: str = Field(
...,
description="""a unique wrong answer to the question which is different
from wrong_answer_1 and not an empty string
""",
)


class MultipleChoiceTest(BaseModel):
"""Data Model for a multiple choice test"""

questions: List[MultipleChoiceQuestion] = []


class ErrorResponse(BaseModel):
detail: str
app.state.chat_engine = None
app.state.callback_manager = None
app.state.token_counter = None


def load_text_chat_engine() -> None:
if not app.chat_engine or app.chat_engine.data_category == "database":
if not app.state.chat_engine or app.state.chat_engine.data_category == "database":
logging.debug("setting up text chatbot")
logging.debug(f"Debug: {DEBUG_MODE}")
(
app.chat_engine,
app.callback_manager,
app.token_counter,
app.state.chat_engine,
app.state.callback_manager,
app.state.token_counter,
) = set_up_text_chatbot()


def load_database_chat_engine() -> None:
if not app.chat_engine or app.chat_engine.data_category != "database":
if not app.state.chat_engine or app.state.chat_engine.data_category != "database":
logging.debug("setting up database chatbot")
(
app.chat_engine,
app.callback_manager, # is None in database mode
app.token_counter,
app.state.chat_engine,
app.state.callback_manager, # is None in database mode
app.state.token_counter,
) = set_up_database_chatbot()


async def handle_uploadfile(
upload_file: UploadFile,
) -> AITextDocument | AIDataBase | None:
file_name = upload_file.filename
) -> AITextDocument | AIDataBase | AIPdfDocument | None:
if not (file_name := Path(upload_file.filename).name):
return None
with open(cfd / data_dir / file_name, "wb") as f:
f.write(await upload_file.read())
match upload_file.filename.split(".")[-1]:
case "txt":
load_text_chat_engine()
return AITextDocument(file_name, LLM_NAME, app.callback_manager)
return AITextDocument(file_name, LLM_NAME, app.state.callback_manager)
case "pdf":
load_text_chat_engine()
return AIPdfDocument(file_name, LLM_NAME, app.callback_manager)
return AIPdfDocument(file_name, LLM_NAME, app.state.callback_manager)
case "sqlite" | "db":
uri = f"sqlite:///{app_dir}/{data_dir}/{file_name}"
logging.debug(f"uri: {uri} debug {DEBUG_MODE}")
load_database_chat_engine()
document: AIDataBase = AIDataBase.from_uri(uri)
return document
return AIDataBase.from_uri(uri)
return None


async def handle_upload_url(upload_url) -> None:
async def handle_upload_url(upload_url: str) -> AITextDocument | AIHtmlDocument:
match re.split(r"[./]", upload_url):
case [*_, dir, file_name, "txt"] if dir == "data":
try:
load_text_chat_engine()
return AITextDocument(file_name, LLM_NAME, app.callback_manager)
return AITextDocument(file_name, LLM_NAME, app.state.callback_manager)
except OSError:
raise FileNotFoundError(
errno.ENOENT,
Expand All @@ -195,24 +135,30 @@ async def handle_upload_url(upload_url) -> None:
)
case [http, *_] if "http" in http.lower():
load_text_chat_engine()
return AIHtmlDocument(upload_url, LLM_NAME, app.callback_manager)
return AIHtmlDocument(upload_url, LLM_NAME, app.state.callback_manager)
case _:
raise MissingSchema


@app.post("/upload", response_model=TextSummaryModel)
async def upload_file(
upload_file: UploadFile | None = None, upload_url: str = Form("")
):
) -> TextSummaryModel:
message = ""
text_category = ""
file_name = ""
file_name: str | None = ""
used_tokens = 0
try:
if upload_file:
if upload_url:
raise DoubleUploadException("You can not provide both, file and URL.")
file_name = upload_file.filename
if not (file_name := upload_file.filename):
return TextSummaryModel(
file_name="",
text_category=text_category,
summary=message,
used_tokens=used_tokens,
)
destination_file = Path(cfd / "data" / file_name)
destination_file.parent.mkdir(exist_ok=True, parents=True)
document = await handle_uploadfile(upload_file)
Expand All @@ -224,11 +170,11 @@ async def upload_file(
raise NoUploadException(
"You must provide either a file or URL to upload.",
)
if app.chat_engine and document:
app.chat_engine.add_document(document)
if app.state.chat_engine and document:
app.state.chat_engine.add_document(document)
message = document.summary
text_category = document.category
used_tokens = app.token_counter.total_llm_token_count
used_tokens = app.state.token_counter.total_llm_token_count
except MissingSchema:
raise HTTPException(
status_code=400,
Expand All @@ -243,7 +189,7 @@ async def upload_file(
status_code=400,
detail=f"There was an unexpected OSError on uploading the file:{e}",
)
logging.debug(f"engine_up?: {app.chat_engine is not None}")
logging.debug(f"engine_up?: {app.state.chat_engine is not None}")
logging.debug(f"message: {message}")
return TextSummaryModel(
file_name=file_name,
Expand All @@ -254,18 +200,18 @@ async def upload_file(


@app.post("/qa_text", response_model=QAResponseModel)
async def qa_text(question: QuestionModel):
logging.debug(f"engine_up?: {app.chat_engine is not None}")
async def qa_text(question: QuestionModel) -> QAResponseModel:
logging.debug(f"engine_up?: {app.state.chat_engine is not None}")
if not question.prompt:
raise EmptyQuestionException(
"Your Question is empty, please type a message and resend it."
)
if app.chat_engine:
app.token_counter.reset_counts()
app.chat_engine.update_temp(question.temperature)
response = app.chat_engine.answer_question(question)
if app.state.chat_engine:
app.state.token_counter.reset_counts()
app.state.chat_engine.update_temp(question.temperature)
response = app.state.chat_engine.answer_question(question)
ai_answer = str(response)
used_tokens = app.token_counter.total_llm_token_count
used_tokens = app.state.token_counter.total_llm_token_count
else:
ai_answer = "Sorry, no context loaded. Please upload a file or url."
used_tokens = 0
Expand All @@ -279,22 +225,22 @@ async def qa_text(question: QuestionModel):

@app.get("/clear_storage", response_model=TextResponseModel)
async def clear_storage():
if app.chat_engine:
app.chat_engine.clear_data_storage()
if app.state.chat_engine:
app.state.chat_engine.clear_data_storage()
logging.info("chat engine cleared...")
if (cfd / "data").exists():
for file in Path(cfd / "data").iterdir():
os.remove(file)
app.chat_engine = None
app.token_counter = None
app.callback_manager = None
app.state.chat_engine = None
app.state.token_counter = None
app.state.callback_manager = None
return TextResponseModel(message="Knowledge base succesfully cleared")


@app.get("/clear_history", response_model=TextResponseModel)
async def clear_history():
if app.chat_engine:
message = app.chat_engine.clear_chat_history()
if app.state.chat_engine:
message = app.state.chat_engine.clear_chat_history()
# logging.debug("chat history cleared...")
return TextResponseModel(message=message)
return TextResponseModel(
Expand All @@ -310,13 +256,13 @@ async def clear_history():
},
)
def get_quiz():
if not app.chat_engine or not app.chat_engine.vector_index.ref_doc_info:
if not app.state.chat_engine or not app.state.chat_engine.vector_index.ref_doc_info:
raise HTTPException(
status_code=400,
detail="No context provided, please provide a url or a text file!",
)

if app.chat_engine.data_category == "database":
if app.state.chat_engine.data_category == "database":
raise HTTPException(
status_code=400,
detail="""A database is loaded, but no valid context for a quiz.
Expand Down Expand Up @@ -344,7 +290,7 @@ def generate_quiz_from_context():
from llama_index.prompts import PromptTemplate
from llama_index.response import Response

vector_index = app.chat_engine.vector_index
vector_index = app.state.chat_engine.vector_index
lc_output_parser = PydanticOutputParser(pydantic_object=MultipleChoiceTest)
output_parser = LangchainOutputParser(lc_output_parser)

Expand Down
Loading

0 comments on commit 7ff410d

Please sign in to comment.