From 04be813540e7fd49cd3f94889df90d1d1b8f26ed Mon Sep 17 00:00:00 2001 From: John Shiu Date: Mon, 27 May 2024 13:59:17 -0700 Subject: [PATCH 01/23] feat: added file_function_lineno_map preloading upon initialization of the Repository object --- .../modules/code_analyzer/analyzers/python.py | 11 ++++++++++- src/test_creation/modules/code_analyzer/repo.py | 16 +++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/test_creation/modules/code_analyzer/analyzers/python.py b/src/test_creation/modules/code_analyzer/analyzers/python.py index 43f2385..eec85ae 100644 --- a/src/test_creation/modules/code_analyzer/analyzers/python.py +++ b/src/test_creation/modules/code_analyzer/analyzers/python.py @@ -1,5 +1,6 @@ import ast from functools import wraps +from collections import defaultdict from . import CodeAnalyzer @@ -24,6 +25,14 @@ def read(self, file_path: str): self.content = f.read() self._tree = ast.parse(self.content) + @assert_have_read_content + def _get_function_lineno_map(self): # FIXME: when to use _xxx? when to use xxx? + function_lineno_map = defaultdict(int) + for node in ast.walk(self._tree): + if isinstance(node, ast.FunctionDef): + function_lineno_map[node.name] = node.lineno + return function_lineno_map + @assert_have_read_content def list_imported_packages(self): packages = set() @@ -36,7 +45,7 @@ def list_imported_packages(self): @assert_have_read_content def list_all_functions(self): - raise NotImplementedError() + return self._get_function_lineno_map().keys() @assert_have_read_content def contains_test(self): diff --git a/src/test_creation/modules/code_analyzer/repo.py b/src/test_creation/modules/code_analyzer/repo.py index 6326f64..f78606b 100644 --- a/src/test_creation/modules/code_analyzer/repo.py +++ b/src/test_creation/modules/code_analyzer/repo.py @@ -24,6 +24,7 @@ def __init__(self, path: str): '.c': 'C' } self.lf_map = self._get_language_file_map() + self.ffl_map = self._get_file_function_lineno_map() # file-function-lineno map def _get_all_files(self, include_git_dir: bool = False): file_paths = [] @@ -44,7 +45,19 @@ def _get_language_file_map(self): for k, v in self.fileext_language_map.items(): if file.endswith(k): file_language_map[v].append(file) - return file_language_map + return file_language_map # FIXME: why is it called file_language_map instead of language_file_map? + + def _get_file_function_lineno_map(self): + file_function_lineno_map = defaultdict(lambda: defaultdict(int)) + files = self.lf_map.get("Python", []) + ast = PythonASTCodeAnalyzer() # FIXME: only support Python ATS, what's the implication? + for file in files: + try: + ast.read(file) + file_function_lineno_map[file] = ast._get_function_lineno_map() + except Exception as e: + logger.info("Exception occurred when parsing using ast (Python 2 code?) Using naive parser...") + return file_function_lineno_map def list_languages(self): return list(self.lf_map.keys()) @@ -78,3 +91,4 @@ def list_test_files(self): if naive.contains_test(): testfiles["Python"].append(file) return testfiles + From 95749768f858c168eb00caf2fdac7e12a2c79792 Mon Sep 17 00:00:00 2001 From: John Shiu Date: Mon, 27 May 2024 14:05:13 -0700 Subject: [PATCH 02/23] addressed Tiff comment: maintaining the same format as demonstrated last week --- src/test_creation/modules/workflow/parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test_creation/modules/workflow/parse.py b/src/test_creation/modules/workflow/parse.py index 9b38a96..ba0dec7 100644 --- a/src/test_creation/modules/workflow/parse.py +++ b/src/test_creation/modules/workflow/parse.py @@ -22,7 +22,7 @@ def get_completeness_score(self, score_format: str = 'fraction', verbose: bool = if verbose: print("Report:") - print(report_df) + print(report_df[['is_Satisfied', 'n_files_tested']]) print() print(f'Score: {score}') print() From 2177285db97b7256869351ccbddc69b10a274030 Mon Sep 17 00:00:00 2001 From: John Shiu Date: Mon, 27 May 2024 14:46:46 -0700 Subject: [PATCH 03/23] feat: added Line Numbers in report --- src/test_creation/analyze.py | 1 + src/test_creation/demo.ipynb | 541 +++----------------- src/test_creation/modules/workflow/parse.py | 6 +- 3 files changed, 78 insertions(+), 470 deletions(-) diff --git a/src/test_creation/analyze.py b/src/test_creation/analyze.py index ceba852..8e9b54b 100644 --- a/src/test_creation/analyze.py +++ b/src/test_creation/analyze.py @@ -105,6 +105,7 @@ def evaluate(self, verbose: bool = False) -> List[dict]: report = response['results'] for item in report: item['file'] = fp + item['lineno'] = [self.file_extractor._repo.ffl_map[fp][func] for func in item['Functions']] result += [{ 'file': fp, 'report': report, diff --git a/src/test_creation/demo.ipynb b/src/test_creation/demo.ipynb index 58dc3c7..6b085e6 100644 --- a/src/test_creation/demo.ipynb +++ b/src/test_creation/demo.ipynb @@ -13,6 +13,18 @@ { "cell_type": "code", "execution_count": 2, + "id": "f500b822-6e75-4819-bda3-bf74e028d1ac", + "metadata": {}, + "outputs": [], + "source": [ + "llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n", + "checklist = Checklist('../../checklist/checklist_sys.csv/', checklist_format=ChecklistFormat.CSV)\n", + "extractor = PythonTestFileExtractor(Repository('../../data/raw/openja/lightfm_demo'))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "id": "ad0a59a9-185c-4f17-a0dd-fa2534958ecb", "metadata": {}, "outputs": [ @@ -20,262 +32,76 @@ "name": "stderr", "output_type": "stream", "text": [ - " 0%| | 0/3 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IDTitlefileEvaluationScore
35.1Validate Model Input and Output Compatibilitytests/test_cross_validation.pySatisfied1.0
75.1Validate Model Input and Output Compatibilitytests/test_evaluation.pySatisfied1.0
115.1Validate Model Input and Output Compatibilitytests/test_data.pySatisfied1.0
\n", - "" - ], - "text/plain": [ - " ID Title \\\n", - "3 5.1 Validate Model Input and Output Compatibility \n", - "7 5.1 Validate Model Input and Output Compatibility \n", - "11 5.1 Validate Model Input and Output Compatibility \n", - "\n", - " file Evaluation Score \n", - "3 tests/test_cross_validation.py Satisfied 1.0 \n", - "7 tests/test_evaluation.py Satisfied 1.0 \n", - "11 tests/test_data.py Satisfied 1.0 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "reports = pd.DataFrame(evaluator.evaluation_result)['report'].explode('report').apply(pd.Series)\n", - "reports['file'] = reports['file'].str[35:]\n", - "reports.query('ID == \"5.1\"')[['ID', 'Title', 'file', 'Evaluation', 'Score']]#.to_dict('records')" + "parser = ResponseParser(response)" ] }, { "cell_type": "code", "execution_count": 5, - "id": "5a682a42-8807-48c6-9de4-0558838e3ccd", + "id": "6c2ce7a8-4e75-497e-bc9f-a2ef7b637add", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Report:\n", + " is_Satisfied \\\n", + "ID Title \n", + "2.1 Test Data Fetching and File Reading 0.0 \n", + "3.1 Validate Data Shape and Values 0.0 \n", + "3.2 Check for Duplicate Records in Data 0.0 \n", + "4.1 Verify Data Split Proportion 1.0 \n", + "5.1 Test Model Output Shape 0.5 \n", + "6.1 Verify Evaluation Metrics Implementation 0.5 \n", + "6.2 Evaluate Model's Performance Against Thresholds 0.5 \n", + "8.1 Validate Outliers Detection and Handling 0.0 \n", + "\n", + " n_files_tested \n", + "ID Title \n", + "2.1 Test Data Fetching and File Reading 2 \n", + "3.1 Validate Data Shape and Values 2 \n", + "3.2 Check for Duplicate Records in Data 2 \n", + "4.1 Verify Data Split Proportion 3 \n", + "5.1 Test Model Output Shape 2 \n", + "6.1 Verify Evaluation Metrics Implementation 2 \n", + "6.2 Evaluate Model's Performance Against Thresholds 2 \n", + "8.1 Validate Outliers Detection and Handling 2 \n", + "\n", + "Score: 2.5/8\n", + "\n" + ] + }, { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IDTitlefileEvaluationScore
22.1Ensure Data File Loads as Expectedtests/test_cross_validation.pyPartially Satisfied0.5
62.1Ensure Data File Loads as Expectedtests/test_evaluation.pyNot Satisfied0.0
102.1Ensure Data File Loads as Expectedtests/test_data.pyNot Satisfied0.0
\n", - "
" - ], "text/plain": [ - " ID Title file \\\n", - "2 2.1 Ensure Data File Loads as Expected tests/test_cross_validation.py \n", - "6 2.1 Ensure Data File Loads as Expected tests/test_evaluation.py \n", - "10 2.1 Ensure Data File Loads as Expected tests/test_data.py \n", - "\n", - " Evaluation Score \n", - "2 Partially Satisfied 0.5 \n", - "6 Not Satisfied 0.0 \n", - "10 Not Satisfied 0.0 " + "'2.5/8'" ] }, "execution_count": 5, @@ -284,253 +110,24 @@ } ], "source": [ - "reports.query('ID == \"2.1\"')[['ID', 'Title', 'file', 'Evaluation', 'Score']]" + "parser.get_completeness_score(verbose=True)" ] }, { "cell_type": "code", "execution_count": 6, - "id": "889fd144-c4c1-4365-81f5-317f3cf6c4a9", + "id": "ed40b82c-9c5b-4554-83a1-cd3e009b72fa", "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IDTitleRequirementObservationFunctionsEvaluationScorefile
01.1Write Descriptive Test NamesEach test function should have a clear, descri...The test function 'test_random_train_test_spli...[test_random_train_test_split]Satisfied1.0tests/test_cross_validation.py
11.2Keep Tests FocusedEach test should focus on a single scenario, u...The test function 'test_random_train_test_spli...[test_random_train_test_split]Satisfied1.0tests/test_cross_validation.py
22.1Ensure Data File Loads as ExpectedEnsure that data-loading functions correctly l...The test function 'test_random_train_test_spli...[test_random_train_test_split]Partially Satisfied0.5tests/test_cross_validation.py
35.1Validate Model Input and Output CompatibilityConfirm that the model accepts inputs of the c...The test function 'test_random_train_test_spli...[test_random_train_test_split]Satisfied1.0tests/test_cross_validation.py
41.1Write Descriptive Test NamesEach test function should have a clear, descri...The test functions have clear and descriptive ...[test_precision_at_k, test_precision_at_k_with...Satisfied1.0tests/test_evaluation.py
51.2Keep Tests FocusedEach test should focus on a single scenario, u...Each test focuses on a single scenario, using ...[test_precision_at_k, test_precision_at_k_with...Satisfied1.0tests/test_evaluation.py
62.1Ensure Data File Loads as ExpectedEnsure that data-loading functions correctly l...The provided test functions do not involve loa...[]Not Satisfied0.0tests/test_evaluation.py
75.1Validate Model Input and Output CompatibilityConfirm that the model accepts inputs of the c...The test functions validate the model's input ...[test_precision_at_k, test_precision_at_k_with...Satisfied1.0tests/test_evaluation.py
81.1Write Descriptive Test NamesEach test function should have a clear, descri...The test functions have clear and descriptive ...[test_fitting, test_fitting_no_identity, test_...Satisfied1.0tests/test_data.py
91.2Keep Tests FocusedEach test should focus on a single scenario, u...The test functions focus on specific scenarios...[test_fitting, test_fitting_no_identity, test_...Satisfied1.0tests/test_data.py
102.1Ensure Data File Loads as ExpectedEnsure that data-loading functions correctly l...The test functions do not directly involve dat...[]Not Satisfied0.0tests/test_data.py
115.1Validate Model Input and Output CompatibilityConfirm that the model accepts inputs of the c...The test functions validate the shapes and typ...[test_fitting, test_fitting_no_identity, test_...Satisfied1.0tests/test_data.py
\n", - "
" - ], "text/plain": [ - " ID Title \\\n", - "0 1.1 Write Descriptive Test Names \n", - "1 1.2 Keep Tests Focused \n", - "2 2.1 Ensure Data File Loads as Expected \n", - "3 5.1 Validate Model Input and Output Compatibility \n", - "4 1.1 Write Descriptive Test Names \n", - "5 1.2 Keep Tests Focused \n", - "6 2.1 Ensure Data File Loads as Expected \n", - "7 5.1 Validate Model Input and Output Compatibility \n", - "8 1.1 Write Descriptive Test Names \n", - "9 1.2 Keep Tests Focused \n", - "10 2.1 Ensure Data File Loads as Expected \n", - "11 5.1 Validate Model Input and Output Compatibility \n", - "\n", - " Requirement \\\n", - "0 Each test function should have a clear, descri... \n", - "1 Each test should focus on a single scenario, u... \n", - "2 Ensure that data-loading functions correctly l... \n", - "3 Confirm that the model accepts inputs of the c... \n", - "4 Each test function should have a clear, descri... \n", - "5 Each test should focus on a single scenario, u... \n", - "6 Ensure that data-loading functions correctly l... \n", - "7 Confirm that the model accepts inputs of the c... \n", - "8 Each test function should have a clear, descri... \n", - "9 Each test should focus on a single scenario, u... \n", - "10 Ensure that data-loading functions correctly l... \n", - "11 Confirm that the model accepts inputs of the c... \n", - "\n", - " Observation \\\n", - "0 The test function 'test_random_train_test_spli... \n", - "1 The test function 'test_random_train_test_spli... \n", - "2 The test function 'test_random_train_test_spli... \n", - "3 The test function 'test_random_train_test_spli... \n", - "4 The test functions have clear and descriptive ... \n", - "5 Each test focuses on a single scenario, using ... \n", - "6 The provided test functions do not involve loa... \n", - "7 The test functions validate the model's input ... \n", - "8 The test functions have clear and descriptive ... \n", - "9 The test functions focus on specific scenarios... \n", - "10 The test functions do not directly involve dat... \n", - "11 The test functions validate the shapes and typ... \n", - "\n", - " Functions Evaluation \\\n", - "0 [test_random_train_test_split] Satisfied \n", - "1 [test_random_train_test_split] Satisfied \n", - "2 [test_random_train_test_split] Partially Satisfied \n", - "3 [test_random_train_test_split] Satisfied \n", - "4 [test_precision_at_k, test_precision_at_k_with... Satisfied \n", - "5 [test_precision_at_k, test_precision_at_k_with... Satisfied \n", - "6 [] Not Satisfied \n", - "7 [test_precision_at_k, test_precision_at_k_with... Satisfied \n", - "8 [test_fitting, test_fitting_no_identity, test_... Satisfied \n", - "9 [test_fitting, test_fitting_no_identity, test_... Satisfied \n", - "10 [] Not Satisfied \n", - "11 [test_fitting, test_fitting_no_identity, test_... Satisfied \n", - "\n", - " Score file \n", - "0 1.0 tests/test_cross_validation.py \n", - "1 1.0 tests/test_cross_validation.py \n", - "2 0.5 tests/test_cross_validation.py \n", - "3 1.0 tests/test_cross_validation.py \n", - "4 1.0 tests/test_evaluation.py \n", - "5 1.0 tests/test_evaluation.py \n", - "6 0.0 tests/test_evaluation.py \n", - "7 1.0 tests/test_evaluation.py \n", - "8 1.0 tests/test_data.py \n", - "9 1.0 tests/test_data.py \n", - "10 0.0 tests/test_data.py \n", - "11 1.0 tests/test_data.py " + "[{'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py',\n", + " 'Functions': ['_precision_at_k', '_recall_at_k', '_auc'],\n", + " 'Line Numbers': [34, 78, 122]},\n", + " {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py',\n", + " 'Functions': [],\n", + " 'Line Numbers': []}]" ] }, "execution_count": 6, @@ -539,7 +136,17 @@ } ], "source": [ - "reports" + "parser.evaluation_report['Function References'].iloc[5]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "15722d59-0764-43dd-aeb6-5f01deaed9c5", + "metadata": {}, + "outputs": [], + "source": [ + "parser.export_evaluation_report(\"report.html\", \"html\", exist_ok=True)" ] }, { diff --git a/src/test_creation/modules/workflow/parse.py b/src/test_creation/modules/workflow/parse.py index baeab61..073bc99 100644 --- a/src/test_creation/modules/workflow/parse.py +++ b/src/test_creation/modules/workflow/parse.py @@ -15,8 +15,8 @@ def get_completeness_score(self, score_format: str = 'fraction', verbose: bool = Compute Evaluation Report and Completeness Score """ report_df = pd.DataFrame(self.response)['report'].explode('report').apply(pd.Series) - report_df = report_df.rename(columns={"file": "File Path"}) - report_df['Function References'] = report_df[['File Path', 'Functions']].to_dict(orient='records') + report_df = report_df.rename(columns={"file": "File Path", "lineno": "Line Numbers"}) + report_df['Function References'] = report_df[['File Path', 'Functions', "Line Numbers"]].to_dict(orient='records') report_df['Observation'] = '(' + report_df['File Path'].apply(lambda x: os.path.split(x)[-1]) + ') ' + \ report_df['Observation'] report_df = report_df.groupby(['ID', 'Title']).agg({ @@ -94,4 +94,4 @@ def export_evaluation_report(self, output_path, format='html', exist_ok: bool = self.export_html(output_path, exist_ok) elif format=='pdf': self.export_pdf(output_path, exist_ok) - return \ No newline at end of file + return From b9c61bf7e8070e33276871997d3f6aeba83f362f Mon Sep 17 00:00:00 2001 From: John Shiu Date: Tue, 28 May 2024 11:17:21 -0700 Subject: [PATCH 04/23] feat: added hyperlink in the report --- src/test_creation/analyze.py | 7 ++- .../modules/code_analyzer/repo.py | 61 ++++++++++++++++++- src/test_creation/modules/workflow/parse.py | 2 +- 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/src/test_creation/analyze.py b/src/test_creation/analyze.py index 8e9b54b..fa9df35 100644 --- a/src/test_creation/analyze.py +++ b/src/test_creation/analyze.py @@ -103,9 +103,14 @@ def evaluate(self, verbose: bool = False) -> List[dict]: raise RuntimeError(f"Unable to obtain valid response from LLM within {self.retries} attempts") report = response['results'] + repo = self.file_extractor._repo for item in report: item['file'] = fp - item['lineno'] = [self.file_extractor._repo.ffl_map[fp][func] for func in item['Functions']] + item['lineno'] = [repo.ffl_map[fp][func] for func in item['Functions']] + item['lineno_href'] = [ + f"[{lineno}]({repo._get_git_direct_link(repo._get_relative_path(fp), lineno)})" + for lineno in item['lineno'] + ] result += [{ 'file': fp, 'report': report, diff --git a/src/test_creation/modules/code_analyzer/repo.py b/src/test_creation/modules/code_analyzer/repo.py index f78606b..0721ea2 100644 --- a/src/test_creation/modules/code_analyzer/repo.py +++ b/src/test_creation/modules/code_analyzer/repo.py @@ -1,6 +1,8 @@ import os +import re import logging from collections import defaultdict +from configparser import ConfigParser from .analyzers.python import PythonNaiveCodeAnalyzer, PythonASTCodeAnalyzer @@ -10,6 +12,15 @@ class Repository: def __init__(self, path: str): self.path = path + + # git metadata + self.url = '' + self.mode = '' + self.service = '' + self.user = '' + self.name = '' + self.main_branch = '' + self.files = [] self.fileext_language_map = { '.js': 'JavaScript', @@ -24,8 +35,56 @@ def __init__(self, path: str): '.c': 'C' } self.lf_map = self._get_language_file_map() - self.ffl_map = self._get_file_function_lineno_map() # file-function-lineno map + self.ffl_map = self._get_file_function_lineno_map() + try: + self._get_git_metadata() + except Exception as e: + logger.info(e) + + def _get_git_metadata(self): + config = ConfigParser() + if os.path.exists(self.path + '/.git/config'): + config.read(self.path + '/.git/config') + else: + raise FileNotFoundError('/.git/config does not exist') + + self.url = config['remote "origin"']['url'] + + if 'git@' in self.url: + self.mode = 'ssh' + pattern = 'git@(.*?):(.*?)/(.*?).git' + elif 'https' in self.url: + self.mode = 'https' + pattern = 'https://(.*?)/(.*?)/(.*?).git' + + self.service, self.user, self.name = re.search(pattern, self.url).group(1,2,3) + + if 'branch "master"' in list(config): + self.main_branch = 'master' + elif 'branch "main"' in list(config): + self.main_branch = 'main' + + return { + 'mode': self.mode, + 'service': self.service, + 'user': self.user, + 'name': self.name, + 'main_branch': self.main_branch + } + + def _get_git_direct_link(self, file: str, lineno: int = None): + link = f'https://{self.service}/{self.user}/{self.name}/blob/{self.main_branch}/{file}' + if lineno: + link += f'#L{lineno}' + return link + def _get_relative_path(self, file: str): + path = file.replace(self.path, '', 1) + if path[0] == '/': + return path + else: + return '/' + path + def _get_all_files(self, include_git_dir: bool = False): file_paths = [] results = list(os.walk(self.path)) diff --git a/src/test_creation/modules/workflow/parse.py b/src/test_creation/modules/workflow/parse.py index 073bc99..4c3e427 100644 --- a/src/test_creation/modules/workflow/parse.py +++ b/src/test_creation/modules/workflow/parse.py @@ -15,7 +15,7 @@ def get_completeness_score(self, score_format: str = 'fraction', verbose: bool = Compute Evaluation Report and Completeness Score """ report_df = pd.DataFrame(self.response)['report'].explode('report').apply(pd.Series) - report_df = report_df.rename(columns={"file": "File Path", "lineno": "Line Numbers"}) + report_df = report_df.rename(columns={"file": "File Path", "lineno_href": "Line Numbers"}) report_df['Function References'] = report_df[['File Path', 'Functions', "Line Numbers"]].to_dict(orient='records') report_df['Observation'] = '(' + report_df['File Path'].apply(lambda x: os.path.split(x)[-1]) + ') ' + \ report_df['Observation'] From e266afd73d50c200b9d4217a5550d3d93d2b61b4 Mon Sep 17 00:00:00 2001 From: John Shiu Date: Tue, 28 May 2024 11:17:42 -0700 Subject: [PATCH 05/23] saved demo --- src/test_creation/demo.ipynb | 582 +++++++++++++++++++++++++++++++++-- 1 file changed, 548 insertions(+), 34 deletions(-) diff --git a/src/test_creation/demo.ipynb b/src/test_creation/demo.ipynb index 6b085e6..ad92f50 100644 --- a/src/test_creation/demo.ipynb +++ b/src/test_creation/demo.ipynb @@ -13,58 +13,540 @@ { "cell_type": "code", "execution_count": 2, - "id": "f500b822-6e75-4819-bda3-bf74e028d1ac", + "id": "ad0a59a9-185c-4f17-a0dd-fa2534958ecb", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:38<00:00, 12.94s/it]\n" + ] + } + ], "source": [ "llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n", - "checklist = Checklist('../../checklist/checklist_sys.csv/', checklist_format=ChecklistFormat.CSV)\n", - "extractor = PythonTestFileExtractor(Repository('../../data/raw/openja/lightfm_demo'))" + "checklist = Checklist('../../checklist/checklist_sys.csv', checklist_format=ChecklistFormat.CSV)\n", + "extractor = PythonTestFileExtractor(Repository('../../data/raw/openja/lightfm_demo'))\n", + "\n", + "evaluator = TestEvaluator(llm, extractor, checklist)\n", + "response = evaluator.evaluate()" ] }, { "cell_type": "code", "execution_count": 3, - "id": "ad0a59a9-185c-4f17-a0dd-fa2534958ecb", + "id": "5887705f-9308-4c7c-aef2-9571d3c8b6b7", "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:26<00:00, 8.77s/it]\n" + "Report:\n", + " is_Satisfied \\\n", + "ID Title \n", + "2.1 Test Data Fetching and File Reading 0.0 \n", + "3.1 Validate Data Shape and Values 0.0 \n", + "3.2 Check for Duplicate Records in Data 0.0 \n", + "4.1 Verify Data Split Proportion 0.5 \n", + "5.1 Test Model Output Shape 0.5 \n", + "6.1 Verify Evaluation Metrics Implementation 0.5 \n", + "6.2 Evaluate Model's Performance Against Thresholds 0.0 \n", + "8.1 Validate Outliers Detection and Handling 0.0 \n", + "\n", + " n_files_tested \n", + "ID Title \n", + "2.1 Test Data Fetching and File Reading 3 \n", + "3.1 Validate Data Shape and Values 3 \n", + "3.2 Check for Duplicate Records in Data 3 \n", + "4.1 Verify Data Split Proportion 3 \n", + "5.1 Test Model Output Shape 3 \n", + "6.1 Verify Evaluation Metrics Implementation 3 \n", + "6.2 Evaluate Model's Performance Against Thresholds 3 \n", + "8.1 Validate Outliers Detection and Handling 3 \n", + "\n", + "Score: 1.5/8\n", + "\n" ] } ], "source": [ - "evaluator = TestEvaluator(llm, extractor, checklist)\n", - "response = evaluator.evaluate()" + "parser = ResponseParser(response)\n", + "parser.get_completeness_score(verbose=True)\n", + "parser.export_evaluation_report('report.html', 'html', exist_ok=True)" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "7326d7f5-9afd-406e-86ee-9b1f09c8adcd", + "execution_count": 3, + "id": "d717ba5d-dc9d-477d-a9db-ccb993f48f09", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Report:\n", + " is_Satisfied \\\n", + "ID Title \n", + "1.1 Write Descriptive Test Names 1.0 \n", + "1.2 Keep Tests Focused 1.0 \n", + "2.1 Ensure Data File Loads as Expected 0.5 \n", + "5.1 Validate Model Input and Output Compatibility 1.0 \n", + "\n", + " n_files_tested \\\n", + "ID Title \n", + "1.1 Write Descriptive Test Names 3 \n", + "1.2 Keep Tests Focused 3 \n", + "2.1 Ensure Data File Loads as Expected 3 \n", + "5.1 Validate Model Input and Output Compatibility 3 \n", + "\n", + " functions \n", + "ID Title \n", + "1.1 Write Descriptive Test Names [test_random_train_test_split, test_precision_... \n", + "1.2 Keep Tests Focused [test_random_train_test_split, test_precision_... \n", + "2.1 Ensure Data File Loads as Expected [test_random_train_test_split] \n", + "5.1 Validate Model Input and Output Compatibility [test_random_train_test_split, test_precision_... \n", + "\n", + "Score: 3.5/4\n", + "\n" + ] + } + ], "source": [ - "#response" + "score = evaluator.get_completeness_score()" ] }, { "cell_type": "code", "execution_count": 4, - "id": "32b31a37-06c6-41af-b426-284070311911", + "id": "273db18c-13c4-4c86-a4c8-f42e0b0e37c5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDTitlefileEvaluationScore
35.1Validate Model Input and Output Compatibilitytests/test_cross_validation.pySatisfied1.0
75.1Validate Model Input and Output Compatibilitytests/test_evaluation.pySatisfied1.0
115.1Validate Model Input and Output Compatibilitytests/test_data.pySatisfied1.0
\n", + "
" + ], + "text/plain": [ + " ID Title \\\n", + "3 5.1 Validate Model Input and Output Compatibility \n", + "7 5.1 Validate Model Input and Output Compatibility \n", + "11 5.1 Validate Model Input and Output Compatibility \n", + "\n", + " file Evaluation Score \n", + "3 tests/test_cross_validation.py Satisfied 1.0 \n", + "7 tests/test_evaluation.py Satisfied 1.0 \n", + "11 tests/test_data.py Satisfied 1.0 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "parser = ResponseParser(response)" + "reports = pd.DataFrame(evaluator.evaluation_result)['report'].explode('report').apply(pd.Series)\n", + "reports['file'] = reports['file'].str[35:]\n", + "reports.query('ID == \"5.1\"')[['ID', 'Title', 'file', 'Evaluation', 'Score']]#.to_dict('records')" ] }, { "cell_type": "code", "execution_count": 5, - "id": "6c2ce7a8-4e75-497e-bc9f-a2ef7b637add", + "id": "5a682a42-8807-48c6-9de4-0558838e3ccd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDTitlefileEvaluationScore
22.1Ensure Data File Loads as Expectedtests/test_cross_validation.pyPartially Satisfied0.5
62.1Ensure Data File Loads as Expectedtests/test_evaluation.pyNot Satisfied0.0
102.1Ensure Data File Loads as Expectedtests/test_data.pyNot Satisfied0.0
\n", + "
" + ], + "text/plain": [ + " ID Title file \\\n", + "2 2.1 Ensure Data File Loads as Expected tests/test_cross_validation.py \n", + "6 2.1 Ensure Data File Loads as Expected tests/test_evaluation.py \n", + "10 2.1 Ensure Data File Loads as Expected tests/test_data.py \n", + "\n", + " Evaluation Score \n", + "2 Partially Satisfied 0.5 \n", + "6 Not Satisfied 0.0 \n", + "10 Not Satisfied 0.0 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reports.query('ID == \"2.1\"')[['ID', 'Title', 'file', 'Evaluation', 'Score']]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "889fd144-c4c1-4365-81f5-317f3cf6c4a9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDTitleRequirementObservationFunctionsEvaluationScorefile
01.1Write Descriptive Test NamesEach test function should have a clear, descri...The test function 'test_random_train_test_spli...[test_random_train_test_split]Satisfied1.0tests/test_cross_validation.py
11.2Keep Tests FocusedEach test should focus on a single scenario, u...The test function 'test_random_train_test_spli...[test_random_train_test_split]Satisfied1.0tests/test_cross_validation.py
22.1Ensure Data File Loads as ExpectedEnsure that data-loading functions correctly l...The test function 'test_random_train_test_spli...[test_random_train_test_split]Partially Satisfied0.5tests/test_cross_validation.py
35.1Validate Model Input and Output CompatibilityConfirm that the model accepts inputs of the c...The test function 'test_random_train_test_spli...[test_random_train_test_split]Satisfied1.0tests/test_cross_validation.py
41.1Write Descriptive Test NamesEach test function should have a clear, descri...The test functions have clear and descriptive ...[test_precision_at_k, test_precision_at_k_with...Satisfied1.0tests/test_evaluation.py
51.2Keep Tests FocusedEach test should focus on a single scenario, u...Each test focuses on a single scenario, using ...[test_precision_at_k, test_precision_at_k_with...Satisfied1.0tests/test_evaluation.py
62.1Ensure Data File Loads as ExpectedEnsure that data-loading functions correctly l...The provided test functions do not involve loa...[]Not Satisfied0.0tests/test_evaluation.py
75.1Validate Model Input and Output CompatibilityConfirm that the model accepts inputs of the c...The test functions validate the model's input ...[test_precision_at_k, test_precision_at_k_with...Satisfied1.0tests/test_evaluation.py
81.1Write Descriptive Test NamesEach test function should have a clear, descri...The test functions have clear and descriptive ...[test_fitting, test_fitting_no_identity, test_...Satisfied1.0tests/test_data.py
91.2Keep Tests FocusedEach test should focus on a single scenario, u...The test functions focus on specific scenarios...[test_fitting, test_fitting_no_identity, test_...Satisfied1.0tests/test_data.py
102.1Ensure Data File Loads as ExpectedEnsure that data-loading functions correctly l...The test functions do not directly involve dat...[]Not Satisfied0.0tests/test_data.py
115.1Validate Model Input and Output CompatibilityConfirm that the model accepts inputs of the c...The test functions validate the shapes and typ...[test_fitting, test_fitting_no_identity, test_...Satisfied1.0tests/test_data.py
\n", + "
" + ], + "text/plain": [ + " ID Title \\\n", + "0 1.1 Write Descriptive Test Names \n", + "1 1.2 Keep Tests Focused \n", + "2 2.1 Ensure Data File Loads as Expected \n", + "3 5.1 Validate Model Input and Output Compatibility \n", + "4 1.1 Write Descriptive Test Names \n", + "5 1.2 Keep Tests Focused \n", + "6 2.1 Ensure Data File Loads as Expected \n", + "7 5.1 Validate Model Input and Output Compatibility \n", + "8 1.1 Write Descriptive Test Names \n", + "9 1.2 Keep Tests Focused \n", + "10 2.1 Ensure Data File Loads as Expected \n", + "11 5.1 Validate Model Input and Output Compatibility \n", + "\n", + " Requirement \\\n", + "0 Each test function should have a clear, descri... \n", + "1 Each test should focus on a single scenario, u... \n", + "2 Ensure that data-loading functions correctly l... \n", + "3 Confirm that the model accepts inputs of the c... \n", + "4 Each test function should have a clear, descri... \n", + "5 Each test should focus on a single scenario, u... \n", + "6 Ensure that data-loading functions correctly l... \n", + "7 Confirm that the model accepts inputs of the c... \n", + "8 Each test function should have a clear, descri... \n", + "9 Each test should focus on a single scenario, u... \n", + "10 Ensure that data-loading functions correctly l... \n", + "11 Confirm that the model accepts inputs of the c... \n", + "\n", + " Observation \\\n", + "0 The test function 'test_random_train_test_spli... \n", + "1 The test function 'test_random_train_test_spli... \n", + "2 The test function 'test_random_train_test_spli... \n", + "3 The test function 'test_random_train_test_spli... \n", + "4 The test functions have clear and descriptive ... \n", + "5 Each test focuses on a single scenario, using ... \n", + "6 The provided test functions do not involve loa... \n", + "7 The test functions validate the model's input ... \n", + "8 The test functions have clear and descriptive ... \n", + "9 The test functions focus on specific scenarios... \n", + "10 The test functions do not directly involve dat... \n", + "11 The test functions validate the shapes and typ... \n", + "\n", + " Functions Evaluation \\\n", + "0 [test_random_train_test_split] Satisfied \n", + "1 [test_random_train_test_split] Satisfied \n", + "2 [test_random_train_test_split] Partially Satisfied \n", + "3 [test_random_train_test_split] Satisfied \n", + "4 [test_precision_at_k, test_precision_at_k_with... Satisfied \n", + "5 [test_precision_at_k, test_precision_at_k_with... Satisfied \n", + "6 [] Not Satisfied \n", + "7 [test_precision_at_k, test_precision_at_k_with... Satisfied \n", + "8 [test_fitting, test_fitting_no_identity, test_... Satisfied \n", + "9 [test_fitting, test_fitting_no_identity, test_... Satisfied \n", + "10 [] Not Satisfied \n", + "11 [test_fitting, test_fitting_no_identity, test_... Satisfied \n", + "\n", + " Score file \n", + "0 1.0 tests/test_cross_validation.py \n", + "1 1.0 tests/test_cross_validation.py \n", + "2 0.5 tests/test_cross_validation.py \n", + "3 1.0 tests/test_cross_validation.py \n", + "4 1.0 tests/test_evaluation.py \n", + "5 1.0 tests/test_evaluation.py \n", + "6 0.0 tests/test_evaluation.py \n", + "7 1.0 tests/test_evaluation.py \n", + "8 1.0 tests/test_data.py \n", + "9 1.0 tests/test_data.py \n", + "10 0.0 tests/test_data.py \n", + "11 1.0 tests/test_data.py " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reports" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "07875448-9c58-4ec0-94b8-de9be8870011", "metadata": {}, "outputs": [ { @@ -104,55 +586,87 @@ "'2.5/8'" ] }, - "execution_count": 5, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "parser.get_completeness_score(verbose=True)" + "parser = ResponseParser(response)\n", + "parser.get_completeness_score(verbose=True)\n", + "#parser.export_evaluation_report(report_output_path, report_output_format, exist_ok=True)" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "ed40b82c-9c5b-4554-83a1-cd3e009b72fa", + "execution_count": 18, + "id": "60b2e5a4-76fb-449c-8745-d5538302d20c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py',\n", - " 'Functions': ['_precision_at_k', '_recall_at_k', '_auc'],\n", - " 'Line Numbers': [34, 78, 122]},\n", - " {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py',\n", - " 'Functions': [],\n", - " 'Line Numbers': []}]" + "\"# Test Evaluation Report\\n\\n## Summary\\n\\n**Completeness Score**: 2.5/8\\n\\n**Completeness Score per Checklist Item**: \\n\\n| ID | Title | is_Satisfied | n_files_tested |\\n|-----:|:------------------------------------------------|---------------:|-----------------:|\\n| 2.1 | Test Data Fetching and File Reading | 0 | 2 |\\n| 3.1 | Validate Data Shape and Values | 0 | 2 |\\n| 3.2 | Check for Duplicate Records in Data | 0 | 2 |\\n| 4.1 | Verify Data Split Proportion | 1 | 3 |\\n| 5.1 | Test Model Output Shape | 0.5 | 2 |\\n| 6.1 | Verify Evaluation Metrics Implementation | 0.5 | 2 |\\n| 6.2 | Evaluate Model's Performance Against Thresholds | 0.5 | 2 |\\n| 8.1 | Validate Outliers Detection and Handling | 0 | 2 |\\n\\n## Details\\n\\n### 2.1 Test Data Fetching and File Reading\\n\\n**Requirement**: Verify that the data fetching API or data file reading functionality works correctly. Ensure that proper error handling is in place for scenarios such as missing files, incorrect file formats, and network errors.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not directly address data fetching or file reading. It focuses on generating synthetic data for testing.\\n - (test_data.py) The code does not involve data fetching or file reading operations.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': [], 'Line Numbers': []}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': [], 'Line Numbers': []}\\n\\n### 3.1 Validate Data Shape and Values\\n\\n**Requirement**: Check that the data has the expected shape and that all values meet domain-specific constraints, such as non-negative distances.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not validate data shape or values. It focuses on generating synthetic data for testing.\\n - (test_data.py) The code does not validate data shape or values.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': [], 'Line Numbers': []}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': [], 'Line Numbers': []}\\n\\n### 3.2 Check for Duplicate Records in Data\\n\\n**Requirement**: Check for duplicate records in the dataset and ensure that there are none.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not check for duplicate records in the dataset.\\n - (test_data.py) The code does not check for duplicate records in the data.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': [], 'Line Numbers': []}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': [], 'Line Numbers': []}\\n\\n### 4.1 Verify Data Split Proportion\\n\\n**Requirement**: Check that the data is split into training and testing sets in the expected proportion.\\n\\n**Observations:**\\n\\n - (test_cross_validation.py) The code includes a test function test_random_train_test_split that verifies the data split proportion.\\n - (test_evaluation.py) The code generates synthetic data for testing but does not explicitly verify the data split proportion.\\n - (test_data.py) The code does not verify data split proportion.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_cross_validation.py', 'Functions': ['test_random_train_test_split'], 'Line Numbers': [17]}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': [], 'Line Numbers': []}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': [], 'Line Numbers': []}\\n\\n### 5.1 Test Model Output Shape\\n\\n**Requirement**: Validate that the model's output has the expected shape.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not directly test the model's output shape.\\n - (test_data.py) The code validates the model's output shape.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': [], 'Line Numbers': []}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': ['test_fitting'], 'Line Numbers': [7]}\\n\\n### 6.1 Verify Evaluation Metrics Implementation\\n\\n**Requirement**: Verify that the evaluation metrics are correctly implemented and appropriate for the model's task.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code includes alternative test implementations for precision, recall, and AUC metrics.\\n - (test_data.py) The code does not verify evaluation metrics implementation.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': ['_precision_at_k', '_recall_at_k', '_auc'], 'Line Numbers': [34, 78, 122]}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': [], 'Line Numbers': []}\\n\\n### 6.2 Evaluate Model's Performance Against Thresholds\\n\\n**Requirement**: Compute evaluation metrics for both the training and testing datasets and ensure that these metrics exceed predefined threshold values, indicating acceptable model performance.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code includes tests for precision, recall, and AUC metrics but does not explicitly compare them against predefined threshold values.\\n - (test_data.py) The code does not evaluate model performance against thresholds.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': [], 'Line Numbers': []}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': [], 'Line Numbers': []}\\n\\n### 8.1 Validate Outliers Detection and Handling\\n\\n**Requirement**: Detect outliers in the dataset. Ensure that the outlier detection mechanism is sensitive enough to flag true outliers while ignoring minor anomalies.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not address outliers detection and handling.\\n - (test_data.py) The code does not validate outliers detection and handling.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': [], 'Line Numbers': []}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': [], 'Line Numbers': []}\\n\\n\"" ] }, - "execution_count": 6, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "parser.evaluation_report['Function References'].iloc[5]" + "tmp = parser.as_markdown()\n", + "tmp" ] }, { "cell_type": "code", - "execution_count": 7, - "id": "15722d59-0764-43dd-aeb6-5f01deaed9c5", + "execution_count": 19, + "id": "061d9d78-843e-4b70-9a45-12912792bb98", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'# Test Evaluation Report\\n\\n## Summary\\n\\n**Completeness Score**: 2.5/8\\n\\n**Completeness Score per Checklist Item**: \\n\\n| ID | Title | is_Satisfied | n_files_tested |\\n|-----:|:------------------------------------------------|---------------:|-----------------:|\\n| 2.1 | Test Data Fetching and File Reading | 0 | 2 |\\n| 3.1 | Validate Data Shape and Values | 0 | 2 |\\n| 3.2 | Check for Duplicate Records in Data | 0 | 2 |\\n| 4.1 | Verify Data Split Proportion | 1 | 3 |\\n| 5.1 | Test Model Output Shape | 0.5 | 2 |\\n| 6.1 | Verify Evaluation Metrics Implementation | 0.5 | 2 |\\n| 6.2 | Evaluate Model\\'s Performance Against Thresholds | 0.5 | 2 |\\n| 8.1 | Validate Outliers Detection and Handling | 0 | 2 |\\n\\n## Details\\n\\n### 2.1 Test Data Fetching and File Reading\\n\\n**Requirement**: Verify that the data fetching API or data file reading functionality works correctly. Ensure that proper error handling is in place for scenarios such as missing files, incorrect file formats, and network errors.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not directly address data fetching or file reading. It focuses on generating synthetic data for testing.\\n - (test_data.py) The code does not involve data fetching or file reading operations.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n\\n### 3.1 Validate Data Shape and Values\\n\\n**Requirement**: Check that the data has the expected shape and that all values meet domain-specific constraints, such as non-negative distances.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not validate data shape or values. It focuses on generating synthetic data for testing.\\n - (test_data.py) The code does not validate data shape or values.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n\\n### 3.2 Check for Duplicate Records in Data\\n\\n**Requirement**: Check for duplicate records in the dataset and ensure that there are none.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not check for duplicate records in the dataset.\\n - (test_data.py) The code does not check for duplicate records in the data.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n\\n### 4.1 Verify Data Split Proportion\\n\\n**Requirement**: Check that the data is split into training and testing sets in the expected proportion.\\n\\n**Observations:**\\n\\n - (test_cross_validation.py) The code includes a test function test_random_train_test_split that verifies the data split proportion.\\n - (test_evaluation.py) The code generates synthetic data for testing but does not explicitly verify the data split proportion.\\n - (test_data.py) The code does not verify data split proportion.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_cross_validation.py\\', \\'Functions\\': [\\'test_random_train_test_split\\'], \\'Line Numbers\\': [\"[17](https://github.com/lyst/lightfm/blob/master/tests/test_cross_validation.py#L17)\"]}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n\\n### 5.1 Test Model Output Shape\\n\\n**Requirement**: Validate that the model\\'s output has the expected shape.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not directly test the model\\'s output shape.\\n - (test_data.py) The code validates the model\\'s output shape.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [\\'test_fitting\\'], \\'Line Numbers\\': [7]}\\n\\n### 6.1 Verify Evaluation Metrics Implementation\\n\\n**Requirement**: Verify that the evaluation metrics are correctly implemented and appropriate for the model\\'s task.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code includes alternative test implementations for precision, recall, and AUC metrics.\\n - (test_data.py) The code does not verify evaluation metrics implementation.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [\\'_precision_at_k\\', \\'_recall_at_k\\', \\'_auc\\'], \\'Line Numbers\\': [34, 78, 122]}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n\\n### 6.2 Evaluate Model\\'s Performance Against Thresholds\\n\\n**Requirement**: Compute evaluation metrics for both the training and testing datasets and ensure that these metrics exceed predefined threshold values, indicating acceptable model performance.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code includes tests for precision, recall, and AUC metrics but does not explicitly compare them against predefined threshold values.\\n - (test_data.py) The code does not evaluate model performance against thresholds.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n\\n### 8.1 Validate Outliers Detection and Handling\\n\\n**Requirement**: Detect outliers in the dataset. Ensure that the outlier detection mechanism is sensitive enough to flag true outliers while ignoring minor anomalies.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not address outliers detection and handling.\\n - (test_data.py) The code does not validate outliers detection and handling.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n\\n'" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "parser.export_evaluation_report(\"report.html\", \"html\", exist_ok=True)" + "tmp = tmp.replace('17', '\"[17](https://github.com/lyst/lightfm/blob/master/tests/test_cross_validation.py#L17)\"')\n", + "tmp" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "5a980ec3-b4bf-41ce-ad27-acf04eb076c7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "''" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pypandoc\n", + "pypandoc.convert_text(tmp.replace(\"'\", \"\\\\'\"), 'html', format='md', outputfile='test.html')" ] }, { "cell_type": "code", "execution_count": null, - "id": "07875448-9c58-4ec0-94b8-de9be8870011", + "id": "457726d8-b0e7-4017-9676-6dba16b18eca", "metadata": {}, "outputs": [], "source": [] From 60a6dec901dfa4ca55dc614072f29a9ec33095a6 Mon Sep 17 00:00:00 2001 From: John Shiu Date: Tue, 28 May 2024 11:18:17 -0700 Subject: [PATCH 06/23] cleaned up demo --- src/test_creation/demo.ipynb | 586 ----------------------------------- 1 file changed, 586 deletions(-) diff --git a/src/test_creation/demo.ipynb b/src/test_creation/demo.ipynb index ad92f50..ed86bd9 100644 --- a/src/test_creation/demo.ipynb +++ b/src/test_creation/demo.ipynb @@ -77,592 +77,6 @@ "parser.export_evaluation_report('report.html', 'html', exist_ok=True)" ] }, - { - "cell_type": "code", - "execution_count": 3, - "id": "d717ba5d-dc9d-477d-a9db-ccb993f48f09", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Report:\n", - " is_Satisfied \\\n", - "ID Title \n", - "1.1 Write Descriptive Test Names 1.0 \n", - "1.2 Keep Tests Focused 1.0 \n", - "2.1 Ensure Data File Loads as Expected 0.5 \n", - "5.1 Validate Model Input and Output Compatibility 1.0 \n", - "\n", - " n_files_tested \\\n", - "ID Title \n", - "1.1 Write Descriptive Test Names 3 \n", - "1.2 Keep Tests Focused 3 \n", - "2.1 Ensure Data File Loads as Expected 3 \n", - "5.1 Validate Model Input and Output Compatibility 3 \n", - "\n", - " functions \n", - "ID Title \n", - "1.1 Write Descriptive Test Names [test_random_train_test_split, test_precision_... \n", - "1.2 Keep Tests Focused [test_random_train_test_split, test_precision_... \n", - "2.1 Ensure Data File Loads as Expected [test_random_train_test_split] \n", - "5.1 Validate Model Input and Output Compatibility [test_random_train_test_split, test_precision_... \n", - "\n", - "Score: 3.5/4\n", - "\n" - ] - } - ], - "source": [ - "score = evaluator.get_completeness_score()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "273db18c-13c4-4c86-a4c8-f42e0b0e37c5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IDTitlefileEvaluationScore
35.1Validate Model Input and Output Compatibilitytests/test_cross_validation.pySatisfied1.0
75.1Validate Model Input and Output Compatibilitytests/test_evaluation.pySatisfied1.0
115.1Validate Model Input and Output Compatibilitytests/test_data.pySatisfied1.0
\n", - "
" - ], - "text/plain": [ - " ID Title \\\n", - "3 5.1 Validate Model Input and Output Compatibility \n", - "7 5.1 Validate Model Input and Output Compatibility \n", - "11 5.1 Validate Model Input and Output Compatibility \n", - "\n", - " file Evaluation Score \n", - "3 tests/test_cross_validation.py Satisfied 1.0 \n", - "7 tests/test_evaluation.py Satisfied 1.0 \n", - "11 tests/test_data.py Satisfied 1.0 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "reports = pd.DataFrame(evaluator.evaluation_result)['report'].explode('report').apply(pd.Series)\n", - "reports['file'] = reports['file'].str[35:]\n", - "reports.query('ID == \"5.1\"')[['ID', 'Title', 'file', 'Evaluation', 'Score']]#.to_dict('records')" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "5a682a42-8807-48c6-9de4-0558838e3ccd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IDTitlefileEvaluationScore
22.1Ensure Data File Loads as Expectedtests/test_cross_validation.pyPartially Satisfied0.5
62.1Ensure Data File Loads as Expectedtests/test_evaluation.pyNot Satisfied0.0
102.1Ensure Data File Loads as Expectedtests/test_data.pyNot Satisfied0.0
\n", - "
" - ], - "text/plain": [ - " ID Title file \\\n", - "2 2.1 Ensure Data File Loads as Expected tests/test_cross_validation.py \n", - "6 2.1 Ensure Data File Loads as Expected tests/test_evaluation.py \n", - "10 2.1 Ensure Data File Loads as Expected tests/test_data.py \n", - "\n", - " Evaluation Score \n", - "2 Partially Satisfied 0.5 \n", - "6 Not Satisfied 0.0 \n", - "10 Not Satisfied 0.0 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "reports.query('ID == \"2.1\"')[['ID', 'Title', 'file', 'Evaluation', 'Score']]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "889fd144-c4c1-4365-81f5-317f3cf6c4a9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IDTitleRequirementObservationFunctionsEvaluationScorefile
01.1Write Descriptive Test NamesEach test function should have a clear, descri...The test function 'test_random_train_test_spli...[test_random_train_test_split]Satisfied1.0tests/test_cross_validation.py
11.2Keep Tests FocusedEach test should focus on a single scenario, u...The test function 'test_random_train_test_spli...[test_random_train_test_split]Satisfied1.0tests/test_cross_validation.py
22.1Ensure Data File Loads as ExpectedEnsure that data-loading functions correctly l...The test function 'test_random_train_test_spli...[test_random_train_test_split]Partially Satisfied0.5tests/test_cross_validation.py
35.1Validate Model Input and Output CompatibilityConfirm that the model accepts inputs of the c...The test function 'test_random_train_test_spli...[test_random_train_test_split]Satisfied1.0tests/test_cross_validation.py
41.1Write Descriptive Test NamesEach test function should have a clear, descri...The test functions have clear and descriptive ...[test_precision_at_k, test_precision_at_k_with...Satisfied1.0tests/test_evaluation.py
51.2Keep Tests FocusedEach test should focus on a single scenario, u...Each test focuses on a single scenario, using ...[test_precision_at_k, test_precision_at_k_with...Satisfied1.0tests/test_evaluation.py
62.1Ensure Data File Loads as ExpectedEnsure that data-loading functions correctly l...The provided test functions do not involve loa...[]Not Satisfied0.0tests/test_evaluation.py
75.1Validate Model Input and Output CompatibilityConfirm that the model accepts inputs of the c...The test functions validate the model's input ...[test_precision_at_k, test_precision_at_k_with...Satisfied1.0tests/test_evaluation.py
81.1Write Descriptive Test NamesEach test function should have a clear, descri...The test functions have clear and descriptive ...[test_fitting, test_fitting_no_identity, test_...Satisfied1.0tests/test_data.py
91.2Keep Tests FocusedEach test should focus on a single scenario, u...The test functions focus on specific scenarios...[test_fitting, test_fitting_no_identity, test_...Satisfied1.0tests/test_data.py
102.1Ensure Data File Loads as ExpectedEnsure that data-loading functions correctly l...The test functions do not directly involve dat...[]Not Satisfied0.0tests/test_data.py
115.1Validate Model Input and Output CompatibilityConfirm that the model accepts inputs of the c...The test functions validate the shapes and typ...[test_fitting, test_fitting_no_identity, test_...Satisfied1.0tests/test_data.py
\n", - "
" - ], - "text/plain": [ - " ID Title \\\n", - "0 1.1 Write Descriptive Test Names \n", - "1 1.2 Keep Tests Focused \n", - "2 2.1 Ensure Data File Loads as Expected \n", - "3 5.1 Validate Model Input and Output Compatibility \n", - "4 1.1 Write Descriptive Test Names \n", - "5 1.2 Keep Tests Focused \n", - "6 2.1 Ensure Data File Loads as Expected \n", - "7 5.1 Validate Model Input and Output Compatibility \n", - "8 1.1 Write Descriptive Test Names \n", - "9 1.2 Keep Tests Focused \n", - "10 2.1 Ensure Data File Loads as Expected \n", - "11 5.1 Validate Model Input and Output Compatibility \n", - "\n", - " Requirement \\\n", - "0 Each test function should have a clear, descri... \n", - "1 Each test should focus on a single scenario, u... \n", - "2 Ensure that data-loading functions correctly l... \n", - "3 Confirm that the model accepts inputs of the c... \n", - "4 Each test function should have a clear, descri... \n", - "5 Each test should focus on a single scenario, u... \n", - "6 Ensure that data-loading functions correctly l... \n", - "7 Confirm that the model accepts inputs of the c... \n", - "8 Each test function should have a clear, descri... \n", - "9 Each test should focus on a single scenario, u... \n", - "10 Ensure that data-loading functions correctly l... \n", - "11 Confirm that the model accepts inputs of the c... \n", - "\n", - " Observation \\\n", - "0 The test function 'test_random_train_test_spli... \n", - "1 The test function 'test_random_train_test_spli... \n", - "2 The test function 'test_random_train_test_spli... \n", - "3 The test function 'test_random_train_test_spli... \n", - "4 The test functions have clear and descriptive ... \n", - "5 Each test focuses on a single scenario, using ... \n", - "6 The provided test functions do not involve loa... \n", - "7 The test functions validate the model's input ... \n", - "8 The test functions have clear and descriptive ... \n", - "9 The test functions focus on specific scenarios... \n", - "10 The test functions do not directly involve dat... \n", - "11 The test functions validate the shapes and typ... \n", - "\n", - " Functions Evaluation \\\n", - "0 [test_random_train_test_split] Satisfied \n", - "1 [test_random_train_test_split] Satisfied \n", - "2 [test_random_train_test_split] Partially Satisfied \n", - "3 [test_random_train_test_split] Satisfied \n", - "4 [test_precision_at_k, test_precision_at_k_with... Satisfied \n", - "5 [test_precision_at_k, test_precision_at_k_with... Satisfied \n", - "6 [] Not Satisfied \n", - "7 [test_precision_at_k, test_precision_at_k_with... Satisfied \n", - "8 [test_fitting, test_fitting_no_identity, test_... Satisfied \n", - "9 [test_fitting, test_fitting_no_identity, test_... Satisfied \n", - "10 [] Not Satisfied \n", - "11 [test_fitting, test_fitting_no_identity, test_... Satisfied \n", - "\n", - " Score file \n", - "0 1.0 tests/test_cross_validation.py \n", - "1 1.0 tests/test_cross_validation.py \n", - "2 0.5 tests/test_cross_validation.py \n", - "3 1.0 tests/test_cross_validation.py \n", - "4 1.0 tests/test_evaluation.py \n", - "5 1.0 tests/test_evaluation.py \n", - "6 0.0 tests/test_evaluation.py \n", - "7 1.0 tests/test_evaluation.py \n", - "8 1.0 tests/test_data.py \n", - "9 1.0 tests/test_data.py \n", - "10 0.0 tests/test_data.py \n", - "11 1.0 tests/test_data.py " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "reports" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "07875448-9c58-4ec0-94b8-de9be8870011", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Report:\n", - " is_Satisfied \\\n", - "ID Title \n", - "2.1 Test Data Fetching and File Reading 0.0 \n", - "3.1 Validate Data Shape and Values 0.0 \n", - "3.2 Check for Duplicate Records in Data 0.0 \n", - "4.1 Verify Data Split Proportion 1.0 \n", - "5.1 Test Model Output Shape 0.5 \n", - "6.1 Verify Evaluation Metrics Implementation 0.5 \n", - "6.2 Evaluate Model's Performance Against Thresholds 0.5 \n", - "8.1 Validate Outliers Detection and Handling 0.0 \n", - "\n", - " n_files_tested \n", - "ID Title \n", - "2.1 Test Data Fetching and File Reading 2 \n", - "3.1 Validate Data Shape and Values 2 \n", - "3.2 Check for Duplicate Records in Data 2 \n", - "4.1 Verify Data Split Proportion 3 \n", - "5.1 Test Model Output Shape 2 \n", - "6.1 Verify Evaluation Metrics Implementation 2 \n", - "6.2 Evaluate Model's Performance Against Thresholds 2 \n", - "8.1 Validate Outliers Detection and Handling 2 \n", - "\n", - "Score: 2.5/8\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "'2.5/8'" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "parser = ResponseParser(response)\n", - "parser.get_completeness_score(verbose=True)\n", - "#parser.export_evaluation_report(report_output_path, report_output_format, exist_ok=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "60b2e5a4-76fb-449c-8745-d5538302d20c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\"# Test Evaluation Report\\n\\n## Summary\\n\\n**Completeness Score**: 2.5/8\\n\\n**Completeness Score per Checklist Item**: \\n\\n| ID | Title | is_Satisfied | n_files_tested |\\n|-----:|:------------------------------------------------|---------------:|-----------------:|\\n| 2.1 | Test Data Fetching and File Reading | 0 | 2 |\\n| 3.1 | Validate Data Shape and Values | 0 | 2 |\\n| 3.2 | Check for Duplicate Records in Data | 0 | 2 |\\n| 4.1 | Verify Data Split Proportion | 1 | 3 |\\n| 5.1 | Test Model Output Shape | 0.5 | 2 |\\n| 6.1 | Verify Evaluation Metrics Implementation | 0.5 | 2 |\\n| 6.2 | Evaluate Model's Performance Against Thresholds | 0.5 | 2 |\\n| 8.1 | Validate Outliers Detection and Handling | 0 | 2 |\\n\\n## Details\\n\\n### 2.1 Test Data Fetching and File Reading\\n\\n**Requirement**: Verify that the data fetching API or data file reading functionality works correctly. Ensure that proper error handling is in place for scenarios such as missing files, incorrect file formats, and network errors.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not directly address data fetching or file reading. It focuses on generating synthetic data for testing.\\n - (test_data.py) The code does not involve data fetching or file reading operations.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': [], 'Line Numbers': []}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': [], 'Line Numbers': []}\\n\\n### 3.1 Validate Data Shape and Values\\n\\n**Requirement**: Check that the data has the expected shape and that all values meet domain-specific constraints, such as non-negative distances.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not validate data shape or values. It focuses on generating synthetic data for testing.\\n - (test_data.py) The code does not validate data shape or values.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': [], 'Line Numbers': []}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': [], 'Line Numbers': []}\\n\\n### 3.2 Check for Duplicate Records in Data\\n\\n**Requirement**: Check for duplicate records in the dataset and ensure that there are none.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not check for duplicate records in the dataset.\\n - (test_data.py) The code does not check for duplicate records in the data.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': [], 'Line Numbers': []}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': [], 'Line Numbers': []}\\n\\n### 4.1 Verify Data Split Proportion\\n\\n**Requirement**: Check that the data is split into training and testing sets in the expected proportion.\\n\\n**Observations:**\\n\\n - (test_cross_validation.py) The code includes a test function test_random_train_test_split that verifies the data split proportion.\\n - (test_evaluation.py) The code generates synthetic data for testing but does not explicitly verify the data split proportion.\\n - (test_data.py) The code does not verify data split proportion.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_cross_validation.py', 'Functions': ['test_random_train_test_split'], 'Line Numbers': [17]}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': [], 'Line Numbers': []}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': [], 'Line Numbers': []}\\n\\n### 5.1 Test Model Output Shape\\n\\n**Requirement**: Validate that the model's output has the expected shape.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not directly test the model's output shape.\\n - (test_data.py) The code validates the model's output shape.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': [], 'Line Numbers': []}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': ['test_fitting'], 'Line Numbers': [7]}\\n\\n### 6.1 Verify Evaluation Metrics Implementation\\n\\n**Requirement**: Verify that the evaluation metrics are correctly implemented and appropriate for the model's task.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code includes alternative test implementations for precision, recall, and AUC metrics.\\n - (test_data.py) The code does not verify evaluation metrics implementation.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': ['_precision_at_k', '_recall_at_k', '_auc'], 'Line Numbers': [34, 78, 122]}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': [], 'Line Numbers': []}\\n\\n### 6.2 Evaluate Model's Performance Against Thresholds\\n\\n**Requirement**: Compute evaluation metrics for both the training and testing datasets and ensure that these metrics exceed predefined threshold values, indicating acceptable model performance.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code includes tests for precision, recall, and AUC metrics but does not explicitly compare them against predefined threshold values.\\n - (test_data.py) The code does not evaluate model performance against thresholds.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': [], 'Line Numbers': []}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': [], 'Line Numbers': []}\\n\\n### 8.1 Validate Outliers Detection and Handling\\n\\n**Requirement**: Detect outliers in the dataset. Ensure that the outlier detection mechanism is sensitive enough to flag true outliers while ignoring minor anomalies.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not address outliers detection and handling.\\n - (test_data.py) The code does not validate outliers detection and handling.\\n\\n**Function References:**\\n\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_evaluation.py', 'Functions': [], 'Line Numbers': []}\\n - {'File Path': '../../data/raw/openja/lightfm_demo/tests/test_data.py', 'Functions': [], 'Line Numbers': []}\\n\\n\"" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tmp = parser.as_markdown()\n", - "tmp" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "061d9d78-843e-4b70-9a45-12912792bb98", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'# Test Evaluation Report\\n\\n## Summary\\n\\n**Completeness Score**: 2.5/8\\n\\n**Completeness Score per Checklist Item**: \\n\\n| ID | Title | is_Satisfied | n_files_tested |\\n|-----:|:------------------------------------------------|---------------:|-----------------:|\\n| 2.1 | Test Data Fetching and File Reading | 0 | 2 |\\n| 3.1 | Validate Data Shape and Values | 0 | 2 |\\n| 3.2 | Check for Duplicate Records in Data | 0 | 2 |\\n| 4.1 | Verify Data Split Proportion | 1 | 3 |\\n| 5.1 | Test Model Output Shape | 0.5 | 2 |\\n| 6.1 | Verify Evaluation Metrics Implementation | 0.5 | 2 |\\n| 6.2 | Evaluate Model\\'s Performance Against Thresholds | 0.5 | 2 |\\n| 8.1 | Validate Outliers Detection and Handling | 0 | 2 |\\n\\n## Details\\n\\n### 2.1 Test Data Fetching and File Reading\\n\\n**Requirement**: Verify that the data fetching API or data file reading functionality works correctly. Ensure that proper error handling is in place for scenarios such as missing files, incorrect file formats, and network errors.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not directly address data fetching or file reading. It focuses on generating synthetic data for testing.\\n - (test_data.py) The code does not involve data fetching or file reading operations.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n\\n### 3.1 Validate Data Shape and Values\\n\\n**Requirement**: Check that the data has the expected shape and that all values meet domain-specific constraints, such as non-negative distances.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not validate data shape or values. It focuses on generating synthetic data for testing.\\n - (test_data.py) The code does not validate data shape or values.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n\\n### 3.2 Check for Duplicate Records in Data\\n\\n**Requirement**: Check for duplicate records in the dataset and ensure that there are none.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not check for duplicate records in the dataset.\\n - (test_data.py) The code does not check for duplicate records in the data.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n\\n### 4.1 Verify Data Split Proportion\\n\\n**Requirement**: Check that the data is split into training and testing sets in the expected proportion.\\n\\n**Observations:**\\n\\n - (test_cross_validation.py) The code includes a test function test_random_train_test_split that verifies the data split proportion.\\n - (test_evaluation.py) The code generates synthetic data for testing but does not explicitly verify the data split proportion.\\n - (test_data.py) The code does not verify data split proportion.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_cross_validation.py\\', \\'Functions\\': [\\'test_random_train_test_split\\'], \\'Line Numbers\\': [\"[17](https://github.com/lyst/lightfm/blob/master/tests/test_cross_validation.py#L17)\"]}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n\\n### 5.1 Test Model Output Shape\\n\\n**Requirement**: Validate that the model\\'s output has the expected shape.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not directly test the model\\'s output shape.\\n - (test_data.py) The code validates the model\\'s output shape.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [\\'test_fitting\\'], \\'Line Numbers\\': [7]}\\n\\n### 6.1 Verify Evaluation Metrics Implementation\\n\\n**Requirement**: Verify that the evaluation metrics are correctly implemented and appropriate for the model\\'s task.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code includes alternative test implementations for precision, recall, and AUC metrics.\\n - (test_data.py) The code does not verify evaluation metrics implementation.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [\\'_precision_at_k\\', \\'_recall_at_k\\', \\'_auc\\'], \\'Line Numbers\\': [34, 78, 122]}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n\\n### 6.2 Evaluate Model\\'s Performance Against Thresholds\\n\\n**Requirement**: Compute evaluation metrics for both the training and testing datasets and ensure that these metrics exceed predefined threshold values, indicating acceptable model performance.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code includes tests for precision, recall, and AUC metrics but does not explicitly compare them against predefined threshold values.\\n - (test_data.py) The code does not evaluate model performance against thresholds.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n\\n### 8.1 Validate Outliers Detection and Handling\\n\\n**Requirement**: Detect outliers in the dataset. Ensure that the outlier detection mechanism is sensitive enough to flag true outliers while ignoring minor anomalies.\\n\\n**Observations:**\\n\\n - (test_evaluation.py) The code does not address outliers detection and handling.\\n - (test_data.py) The code does not validate outliers detection and handling.\\n\\n**Function References:**\\n\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_evaluation.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n - {\\'File Path\\': \\'../../data/raw/openja/lightfm_demo/tests/test_data.py\\', \\'Functions\\': [], \\'Line Numbers\\': []}\\n\\n'" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tmp = tmp.replace('17', '\"[17](https://github.com/lyst/lightfm/blob/master/tests/test_cross_validation.py#L17)\"')\n", - "tmp" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "5a980ec3-b4bf-41ce-ad27-acf04eb076c7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "''" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pypandoc\n", - "pypandoc.convert_text(tmp.replace(\"'\", \"\\\\'\"), 'html', format='md', outputfile='test.html')" - ] - }, { "cell_type": "code", "execution_count": null, From 5e8a6e6096f82002189e671a3c3a520922f03f5e Mon Sep 17 00:00:00 2001 From: John Shiu Date: Wed, 29 May 2024 19:04:17 -0700 Subject: [PATCH 07/23] resolved bugs due to merge conflict --- src/test_creation/analyze.py | 99 --------------------- src/test_creation/modules/workflow/parse.py | 2 + 2 files changed, 2 insertions(+), 99 deletions(-) diff --git a/src/test_creation/analyze.py b/src/test_creation/analyze.py index acc54ef..3fb5a28 100644 --- a/src/test_creation/analyze.py +++ b/src/test_creation/analyze.py @@ -12,105 +12,6 @@ load_dotenv() -<<<<<<< HEAD -class TestEvaluator: - def __init__(self, llm: LanguageModelLike, extractor: RepoFileExtractor, checklist: Checklist, retries: int = 3): - self.llm = llm - self.checklist = checklist - self.file_extractor = extractor - - self.retries = retries - - self.files = self.file_extractor.extract() - if not self.files: - print("File loader returned no files!") - - self.test_items = self._load_tests_from_checklist() - if not self.test_items: - print("Loaded checklist successfully, but it contains no test items!") - - class TestItemEvaluation(BaseModel): - ID: str = Field(description="The corresponding `ID` of the checklist item provided") - Title: str = Field(description="The corresponding `Title` of the checklist item provided") - Requirement: str = Field(description="The corresponding `Requirement` of the checklist item provided") - Observation: str = Field(description="Your detailed observation of the code in accordance to the given checklist item") - Functions: List[str] = Field(description="Test functions that satisfy the given requirement (if any)") - Evaluation: str = Field(description="The summarized evaluation. Must be one of Satisfied/Partially Satisfied/Not Satisfied.") - Score: int = Field(description="The score obtained from the given evaluation (1 for Satisfied / 0.5 for Partially Satisfied / 0 for Not Satisfied)") - - class EvalResult(BaseModel): - results: List[TestItemEvaluation] - - self.parser = JsonOutputParser(pydantic_object=EvalResult) - - self.prompt = PromptTemplate( - template="You are an expert Machine Learning Engineer.\n" - "Please help to evaluate the following code using the given checklist.\n" - "{format_instructions}\n" - "For a test item to be considered as `Satisfied` or `Partially Satisfied`, " - "the corresponding function(s) satisfying the item's requirement must be " - "provided in the `Functions` attribute.\n" - "Here is the checklist as a list of JSON objects:\n```{checklist}```\n" - "Here is the code to be analyzed:\n{context}", - description="Code Review for Machine Learning Project", - input_variables=["checklist", "context"], - partial_variables={"format_instructions": self.parser.get_format_instructions()}, - ) - - self.chain = self.prompt | self.llm | self.parser - - @staticmethod - def _load_test_file_into_splits(file_path: str) -> List[Document]: - loader = PythonLoader(file_path) - py = loader.load() - py_splits = RecursiveCharacterTextSplitter.from_language(language=Language.PYTHON, chunk_size=1000, - chunk_overlap=0).split_documents(py) - return py_splits - - def _load_tests_from_checklist(self) -> str: - checklist = self.checklist.get_all_tests(['ID', 'Title', 'Requirement']) - return json.dumps(checklist) - - def evaluate(self, verbose: bool = False) -> List[dict]: - result = [] - for fp in tqdm(self.files): - if verbose: - print(fp) - splits = self._load_test_file_into_splits(fp) - if verbose: - print(f"# splits: {len(self.files)}") - # FIXME: it sometimes tests only part of the checklist items - - response = None - retry_count = 0 - while not response and retry_count < self.retries: - try: - response = self.chain.invoke({"context": splits, "checklist": self.test_items}) - except ValidationError as e: - retry_count += 1 - continue - - if not response: - raise RuntimeError(f"Unable to obtain valid response from LLM within {self.retries} attempts") - - report = response['results'] - repo = self.file_extractor._repo - for item in report: - item['file'] = fp - item['lineno'] = [repo.ffl_map[fp][func] for func in item['Functions']] - item['lineno_href'] = [ - f"[{lineno}]({repo._get_git_direct_link(repo._get_relative_path(fp), lineno)})" - for lineno in item['lineno'] - ] - result += [{ - 'file': fp, - 'report': report, - }] - return result - - -======= ->>>>>>> main if __name__ == '__main__': def main(checklist_path, repo_path, report_output_path, report_output_format='html'): """ diff --git a/src/test_creation/modules/workflow/parse.py b/src/test_creation/modules/workflow/parse.py index 4a40858..e8d7b2d 100644 --- a/src/test_creation/modules/workflow/parse.py +++ b/src/test_creation/modules/workflow/parse.py @@ -7,6 +7,8 @@ from .response import EvaluationResponse from ..mixins import ExportableMixin +from modules.code_analyzer.repo import Repository + class ResponseParser(ExportableMixin): def __init__(self, response: EvaluationResponse, respository: Repository = None): From a364786987c1a52909f4bda6bf65429410dcfb65 Mon Sep 17 00:00:00 2001 From: John Shiu Date: Wed, 29 May 2024 19:04:29 -0700 Subject: [PATCH 08/23] saved demo --- src/test_creation/demo.ipynb | 96 +++++++++++------------------------- 1 file changed, 29 insertions(+), 67 deletions(-) diff --git a/src/test_creation/demo.ipynb b/src/test_creation/demo.ipynb index fe97609..e70f971 100644 --- a/src/test_creation/demo.ipynb +++ b/src/test_creation/demo.ipynb @@ -33,7 +33,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:32<00:00, 10.71s/it]\n" + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:33<00:00, 11.16s/it]\n" ] } ], @@ -55,16 +55,6 @@ "output_type": "stream", "text": [ "Report:\n", - " Requirement \\\n", - "ID Title \n", - "2.1 Ensure Data File Loads as Expected Ensure that data-loading functions correctly l... \n", - "3.2 Data in the Expected Format Verify that the data to be ingested matches th... \n", - "3.5 Check for Duplicate Records in Data Check for duplicate records in the dataset and... \n", - "4.2 Verify Data Split Proportion Check that the data is split into training and... \n", - "5.3 Ensure Model Output Shape Aligns with Expectation Ensure the shape of the model's output aligns ... \n", - "6.1 Verify Evaluation Metrics Implementation Verify that the evaluation metrics are correct... \n", - "6.2 Evaluate Model's Performance Against Thresholds Compute evaluation metrics for both the traini... \n", - "\n", " is_Satisfied \\\n", "ID Title \n", "2.1 Ensure Data File Loads as Expected 0.0 \n", @@ -72,47 +62,27 @@ "3.5 Check for Duplicate Records in Data 0.0 \n", "4.2 Verify Data Split Proportion 0.5 \n", "5.3 Ensure Model Output Shape Aligns with Expectation 0.0 \n", - "6.1 Verify Evaluation Metrics Implementation 1.0 \n", + "6.1 Verify Evaluation Metrics Implementation 0.5 \n", "6.2 Evaluate Model's Performance Against Thresholds 0.5 \n", "\n", - " n_files_tested \\\n", - "ID Title \n", - "2.1 Ensure Data File Loads as Expected 3 \n", - "3.2 Data in the Expected Format 3 \n", - "3.5 Check for Duplicate Records in Data 3 \n", - "4.2 Verify Data Split Proportion 3 \n", - "5.3 Ensure Model Output Shape Aligns with Expectation 3 \n", - "6.1 Verify Evaluation Metrics Implementation 3 \n", - "6.2 Evaluate Model's Performance Against Thresholds 3 \n", - "\n", - " Observations \\\n", - "ID Title \n", - "2.1 Ensure Data File Loads as Expected [(test_cross_validation.py) The code does not ... \n", - "3.2 Data in the Expected Format [(test_cross_validation.py) The code does not ... \n", - "3.5 Check for Duplicate Records in Data [(test_cross_validation.py) The code does not ... \n", - "4.2 Verify Data Split Proportion [(test_cross_validation.py) The code tests the... \n", - "5.3 Ensure Model Output Shape Aligns with Expectation [(test_cross_validation.py) The code does not ... \n", - "6.1 Verify Evaluation Metrics Implementation [(test_cross_validation.py) The code does not ... \n", - "6.2 Evaluate Model's Performance Against Thresholds [(test_cross_validation.py) The code does not ... \n", + " n_files_tested \n", + "ID Title \n", + "2.1 Ensure Data File Loads as Expected 3 \n", + "3.2 Data in the Expected Format 3 \n", + "3.5 Check for Duplicate Records in Data 3 \n", + "4.2 Verify Data Split Proportion 3 \n", + "5.3 Ensure Model Output Shape Aligns with Expectation 3 \n", + "6.1 Verify Evaluation Metrics Implementation 3 \n", + "6.2 Evaluate Model's Performance Against Thresholds 3 \n", "\n", - " Function References \n", - "ID Title \n", - "2.1 Ensure Data File Loads as Expected [{'File Path': '../../data/raw/openja/lightfm_... \n", - "3.2 Data in the Expected Format [{'File Path': '../../data/raw/openja/lightfm_... \n", - "3.5 Check for Duplicate Records in Data [{'File Path': '../../data/raw/openja/lightfm_... \n", - "4.2 Verify Data Split Proportion [{'File Path': '../../data/raw/openja/lightfm_... \n", - "5.3 Ensure Model Output Shape Aligns with Expectation [{'File Path': '../../data/raw/openja/lightfm_... \n", - "6.1 Verify Evaluation Metrics Implementation [{'File Path': '../../data/raw/openja/lightfm_... \n", - "6.2 Evaluate Model's Performance Against Thresholds [{'File Path': '../../data/raw/openja/lightfm_... \n", - "\n", - "Score: 2.0/7\n", + "Score: 1.5/7\n", "\n" ] }, { "data": { "text/plain": [ - "'2.0/7'" + "'1.5/7'" ] }, "execution_count": 4, @@ -121,7 +91,7 @@ } ], "source": [ - "parser = ResponseParser(response)\n", + "parser = ResponseParser(response, repo)\n", "parser.get_completeness_score(verbose=True)" ] }, @@ -199,7 +169,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:35<00:00, 11.94s/it]\n" + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:31<00:00, 10.40s/it]\n" ] }, { @@ -213,7 +183,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:32<00:00, 10.70s/it]\n" + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:31<00:00, 10.36s/it]\n" ] }, { @@ -228,7 +198,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:38<00:00, 12.83s/it]\n" + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:39<00:00, 13.16s/it]\n" ] }, { @@ -242,7 +212,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:40<00:00, 13.34s/it]\n" + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:42<00:00, 14.24s/it]\n" ] } ], @@ -306,14 +276,14 @@ " \n", " \n", " 1\n", - " 0.142857\n", + " 0.214286\n", " ID ...\n", " gpt-3.5-turbo\n", " 2\n", " \n", " \n", " 2\n", - " 0.714286\n", + " 0.642857\n", " ID ...\n", " gpt-4o\n", " 1\n", @@ -332,8 +302,8 @@ "text/plain": [ " score report model_name \\\n", "0 0.214286 ID ... gpt-3.5-turbo \n", - "1 0.142857 ID ... gpt-3.5-turbo \n", - "2 0.714286 ID ... gpt-4o \n", + "1 0.214286 ID ... gpt-3.5-turbo \n", + "2 0.642857 ID ... gpt-4o \n", "3 0.714286 ID ... gpt-4o \n", "\n", " test_no \n", @@ -428,7 +398,7 @@ " Check that the data is split into training and...\n", " 0.5\n", " 3\n", - " [(test_cross_validation.py) The code does spli...\n", + " [(test_cross_validation.py) The code includes ...\n", " [{'File Path': '../../data/raw/openja/lightfm_...\n", " \n", " \n", @@ -488,7 +458,7 @@ "0 3 [(test_cross_validation.py) The code does not ... \n", "1 3 [(test_cross_validation.py) The code does not ... \n", "2 3 [(test_cross_validation.py) The code does not ... \n", - "3 3 [(test_cross_validation.py) The code does spli... \n", + "3 3 [(test_cross_validation.py) The code includes ... \n", "4 3 [(test_cross_validation.py) The code does not ... \n", "5 3 [(test_cross_validation.py) The code does not ... \n", "6 3 [(test_cross_validation.py) The code does not ... \n", @@ -559,12 +529,12 @@ " \n", " \n", " gpt-3.5-turbo\n", - " 0.002551\n", + " 0.000000\n", " 2\n", " \n", " \n", " gpt-4o\n", - " 0.000000\n", + " 0.002551\n", " 2\n", " \n", " \n", @@ -575,8 +545,8 @@ " score \n", " var count\n", "model_name \n", - "gpt-3.5-turbo 0.002551 2\n", - "gpt-4o 0.000000 2" + "gpt-3.5-turbo 0.000000 2\n", + "gpt-4o 0.002551 2" ] }, "execution_count": 11, @@ -597,19 +567,11 @@ "id": "5b1f94c8-1883-4435-84c7-b0687a6e6387", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/vd/r3dvzdx10pxf47gvdqf81r9h0000gn/T/ipykernel_42405/1426530661.py:5: RuntimeWarning: divide by zero encountered in scalar divide\n", - " f_score = score_var[('score', 'var')]['gpt-3.5-turbo'] / score_var[('score', 'var')]['gpt-4o'] # var(prev) / var(curr)\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "p-value: 0.0\n", + "p-value: 1.0\n", "\n", "2-tail test:\n", " Successfully reject the null hypothesis: Var(Completeness_Score(Current Version)) == Var(Completeness_Score(Last Week Version))\n" From 08b7d1350e55897dc5d225ca1a3b7a16784734c0 Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Wed, 29 May 2024 20:48:00 -0700 Subject: [PATCH 09/23] fix misleading variable name --- src/test_creation/modules/code_analyzer/repo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test_creation/modules/code_analyzer/repo.py b/src/test_creation/modules/code_analyzer/repo.py index f824886..981a5a1 100644 --- a/src/test_creation/modules/code_analyzer/repo.py +++ b/src/test_creation/modules/code_analyzer/repo.py @@ -99,13 +99,13 @@ def _get_all_files(self, include_git_dir: bool = False): return file_paths def _get_language_file_map(self): - file_language_map = defaultdict(list) + language_file_map = defaultdict(list) files = self._get_all_files() for file in files: for k, v in self.fileext_language_map.items(): if file.endswith(k): - file_language_map[v].append(file) - return file_language_map # FIXME: why is it called file_language_map instead of language_file_map? + language_file_map[v].append(file) + return language_file_map def _get_file_function_lineno_map(self): file_function_lineno_map = defaultdict(lambda: defaultdict(int)) From 7f02ed5d156bd50c905616c3b85aec8b885fbac0 Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Tue, 4 Jun 2024 16:15:10 -0700 Subject: [PATCH 10/23] break down `Repository` and separate git-related methods to `GitContext` --- .../modules/code_analyzer/git.py | 64 +++++++++++ .../modules/code_analyzer/repo.py | 103 ++++++------------ 2 files changed, 97 insertions(+), 70 deletions(-) create mode 100644 src/test_creation/modules/code_analyzer/git.py diff --git a/src/test_creation/modules/code_analyzer/git.py b/src/test_creation/modules/code_analyzer/git.py new file mode 100644 index 0000000..85a9fcc --- /dev/null +++ b/src/test_creation/modules/code_analyzer/git.py @@ -0,0 +1,64 @@ +import re +from typing import Union, Optional +from pathlib import Path +from copy import copy + +from git import Repo + + +class GitContext: + def __init__(self, git_dir: Union[str, Path]): + self.git_dir = Path(git_dir) + self.git_repo = Repo(self.git_dir) + + self.branch = self._get_current_branch() + self.host, self.org, self.repo_name = self._get_remote_info() + + self.remote_link_format_map = { + "github": "{host}/{org}/{repo}/blob/{branch}/{path}#L{line_num}", + "gitlab": "{host}/{org}/{repo}/blob/{branch}/{path}#L{line_num}", + "bitbucket": "{host}/{org}/{repo}/src/{branch}/{path}#lines-{" + "line_num}", + "gitee": "{host}/{org}/{repo}/blob/{branch}{path}#L{line_num}" + } + self.remote_protocol = "https" + + def _get_current_branch(self): + if self.git_repo.head.is_detached: + return self.git_repo.head.commit.hexsha + else: + return self.git_repo.active_branch.name + + def _get_remote_info(self) -> tuple[Optional[str], Optional[str], str]: + if self.git_repo.remotes: + if 'origin' in [r.name for r in self.git_repo.remotes]: + remote = self.git_repo.remote() + else: + remote = self.git_repo.remotes[0] + remote_url = remote.url + # git urls: + # https://git-scm.com/docs/git-clone#URLS + pattern = r"(?:\w+:\/\/)?(?:\w+@)?(.+)[\/:](.+)\/([^\.]+)(?:\.git)?" + host, org, repo_name = re.search(pattern, remote_url).groups() + return host, org, repo_name + else: + print("This git repository has no remote") + return None, None, "." + + def construct_remote_link_to_file(self, file_path: Union[str, Path], + line_num: Optional[int] = 0) -> str: + rel_path = Path(file_path).relative_to(self.git_dir) + hits = [keyword for keyword in + self.remote_link_format_map.keys() if keyword in self.host] + if hits: + hit = hits[0] + f_str = copy(self.remote_link_format_map[hit]) + if line_num is None: + f_str = f_str.split("#")[0] + return f"{self.remote_protocol}://" + \ + f_str.format(host=self.host, org=self.org, repo=self.repo_name, + branch=self.branch, path=rel_path, + line_num=line_num) + else: + print("No matching service. Using local link instead...") + return f"file://{str(self.git_dir)}/{rel_path}" diff --git a/src/test_creation/modules/code_analyzer/repo.py b/src/test_creation/modules/code_analyzer/repo.py index bba4110..24b4c7c 100644 --- a/src/test_creation/modules/code_analyzer/repo.py +++ b/src/test_creation/modules/code_analyzer/repo.py @@ -1,27 +1,30 @@ import os -import re import logging +from functools import wraps from pathlib import Path from collections import defaultdict -from configparser import ConfigParser -from typing import Dict, List +from typing import Optional from .analyzers.python import PythonNaiveCodeAnalyzer, PythonASTCodeAnalyzer +from .git import GitContext logger = logging.getLogger("test-creation.repo") +def requires_git_context(func): + @wraps(func) + def wrapper(self, *args, **kwargs): + """wrapper function to check if we have git context.""" + if self.git_context is None: + raise RuntimeError("This repository has no git context.") + func(self, *args, **kwargs) + + return wrapper + + class Repository: def __init__(self, path: str): - # git metadata - self.url = '' - self.mode = '' - self.service = '' - self.user = '' - self.name = '' - self.main_branch = '' - if not os.path.exists(path): raise FileNotFoundError(f"Repository {path} does not exist.") elif os.path.isfile(path): @@ -30,6 +33,9 @@ def __init__(self, path: str): if not os.path.exists(self.path / ".git"): # TODO: to be converted to use logger print("Warning: The repository is not a git repository.") + self.git_context = None + else: + self.git_context = GitContext(self.path) self.files = [] self.fileext_language_map = { @@ -46,55 +52,11 @@ def __init__(self, path: str): } self.lf_map = self._get_language_file_map() self.ffl_map = self._get_file_function_lineno_map() - try: - self._get_git_metadata() - except Exception as e: - logger.info(e) - - def _get_git_metadata(self): - config = ConfigParser() - if os.path.exists(self.path / '.git/config'): - config.read(self.path / '.git/config') - else: - raise FileNotFoundError('/.git/config does not exist') - - self.url = config['remote "origin"']['url'] - - if 'git@' in self.url: - self.mode = 'ssh' - pattern = 'git@(.*?):(.*?)/(.*?).git' - elif 'https' in self.url: - self.mode = 'https' - pattern = 'https://(.*?)/(.*?)/(.*?).git' - - self.service, self.user, self.name = re.search(pattern, self.url).group(1,2,3) - - if 'branch "master"' in list(config): - self.main_branch = 'master' - elif 'branch "main"' in list(config): - self.main_branch = 'main' - - return { - 'mode': self.mode, - 'service': self.service, - 'user': self.user, - 'name': self.name, - 'main_branch': self.main_branch - } - def _get_git_direct_link(self, file: str, lineno: int = None): - link = f'https://{self.service}/{self.user}/{self.name}/blob/{self.main_branch}/{file}' - if lineno: - link += f'#L{lineno}' - return link + @requires_git_context + def _get_git_direct_link(self, file: str, lineno: Optional[int] = None): + return self.git_context.construct_remote_link_to_file(file, line_num=lineno) - def _get_relative_path(self, file: str): - path = file.replace(self.path, '', 1) - if path[0] == '/': - return path - else: - return '/' + path - def _get_all_files(self, include_git_dir: bool = False): file_paths = [] results = list(os.walk(self.path)) @@ -107,7 +69,7 @@ def _get_all_files(self, include_git_dir: bool = False): file_paths.append(f'{root}/{file}') return file_paths - def _get_language_file_map(self): + def _get_language_file_map(self) -> dict[str, list[str]]: language_file_map = defaultdict(list) files = self._get_all_files() for file in files: @@ -116,16 +78,18 @@ def _get_language_file_map(self): language_file_map[v].append(file) return language_file_map - def _get_file_function_lineno_map(self): + def _get_file_function_lineno_map(self) -> dict[str, dict[str, list[str]]]: file_function_lineno_map = defaultdict(lambda: defaultdict(int)) - files = self.lf_map.get("Python", []) - ast = PythonASTCodeAnalyzer() # FIXME: only support Python ATS, what's the implication? - for file in files: - try: - ast.read(file) - file_function_lineno_map[file] = ast._get_function_lineno_map() - except Exception as e: - logger.info("Exception occurred when parsing using ast (Python 2 code?) Using naive parser...") + for lang, files in self.lf_map.items(): + # TODO: only Python is supported now + if lang == "Python": + ast = PythonASTCodeAnalyzer() + for file in files: + try: + ast.read(file) + file_function_lineno_map[lang][file] = ast._get_function_lineno_map() + except Exception as e: + logger.info("Exception occurred when parsing using ast (Python 2 code?) Using naive parser...") return file_function_lineno_map def list_languages(self): @@ -143,7 +107,7 @@ def list_packages(self): packages = list(set(packages)) return packages - def list_test_files(self) -> Dict[str, List[str]]: + def list_test_files(self) -> dict[str, list[str]]: testfiles = defaultdict(list) # for now only Python is supported files = self.lf_map.get("Python", []) @@ -160,4 +124,3 @@ def list_test_files(self) -> Dict[str, List[str]]: if naive.contains_test(): testfiles["Python"].append(file) return testfiles - From 613f73e3e3285d55a0589dca2c6b8b089039f6ed Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Tue, 4 Jun 2024 16:15:52 -0700 Subject: [PATCH 11/23] move abstract `CodeAnalyzer` --- .../code_analyzer/analyzers/__init__.py | 7 ---- .../modules/code_analyzer/analyzers/python.py | 33 +++++++++++++++---- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/test_creation/modules/code_analyzer/analyzers/__init__.py b/src/test_creation/modules/code_analyzer/analyzers/__init__.py index 7f54637..e69de29 100644 --- a/src/test_creation/modules/code_analyzer/analyzers/__init__.py +++ b/src/test_creation/modules/code_analyzer/analyzers/__init__.py @@ -1,7 +0,0 @@ -from abc import ABC - - -class CodeAnalyzer(ABC): - def __init__(self): - pass - diff --git a/src/test_creation/modules/code_analyzer/analyzers/python.py b/src/test_creation/modules/code_analyzer/analyzers/python.py index eec85ae..aa84e56 100644 --- a/src/test_creation/modules/code_analyzer/analyzers/python.py +++ b/src/test_creation/modules/code_analyzer/analyzers/python.py @@ -1,19 +1,40 @@ +from abc import ABC, abstractmethod import ast +from typing import Union +from pathlib import Path from functools import wraps from collections import defaultdict -from . import CodeAnalyzer - def assert_have_read_content(f): @wraps(f) - def decorator(*args, **kwargs): - if args[0].content is None: + def decorator(self, *args, **kwargs): + if self.content is None: raise RuntimeError("No content has been read yet.") - return f(*args, **kwargs) + return f(self, *args, **kwargs) + return decorator +class CodeAnalyzer(ABC): + + @abstractmethod + def read(self, file_path: Union[str, Path]) -> None: + pass + + @abstractmethod + def list_imported_packages(self): + pass + + @abstractmethod + def list_all_functions(self): + pass + + @abstractmethod + def contains_test(self): + pass + + class PythonASTCodeAnalyzer(CodeAnalyzer): def __init__(self): super().__init__() @@ -26,7 +47,7 @@ def read(self, file_path: str): self._tree = ast.parse(self.content) @assert_have_read_content - def _get_function_lineno_map(self): # FIXME: when to use _xxx? when to use xxx? + def _get_function_lineno_map(self): function_lineno_map = defaultdict(int) for node in ast.walk(self._tree): if isinstance(node, ast.FunctionDef): From 93160a76d1c747a193accfb5fa22100ff28d33cc Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Tue, 4 Jun 2024 16:16:43 -0700 Subject: [PATCH 12/23] include basic tests for gitcontext; improve tests by parametrizing --- tests/test_repo.py | 85 +++++++++++++++++++++++++++++++++++++++++++++ tests/test_utils.py | 36 +++++++------------ 2 files changed, 98 insertions(+), 23 deletions(-) create mode 100644 tests/test_repo.py diff --git a/tests/test_repo.py b/tests/test_repo.py new file mode 100644 index 0000000..8fbccbf --- /dev/null +++ b/tests/test_repo.py @@ -0,0 +1,85 @@ +from pathlib import Path + +import pytest +from test_creation.modules.code_analyzer import repo as r +from test_creation.modules.code_analyzer.git import GitContext + + +@pytest.fixture() +def test_git_repo(git_repo): + # The fixture derives from `workspace` in `pytest-shutil`, so they contain + # a handle to the path `path` object (see https://path.readthedocs.io/) + path = git_repo.workspace + txt_file = path / 'hello.txt' + txt_file.write_text('hello world!') + + py_file = Path(path / 'src/python/main.py') + py_file.parent.mkdir(parents=True, exist_ok=True) + py_file.write_text('print("hello world!")') + + # We can run commands relative to the working directory + git_repo.run('git add .') + + # It's better to use the GitPython api directly - the 'api' attribute is + # a handle to the repository object. + git_repo.api.index.commit("Initial commit") + + # The fixture has a URI property you can use in downstream systems + assert git_repo.uri.startswith('file://') + + return git_repo + + +################################################################################ +# Repository # +################################################################################ +def test_repository_should_be_able_to_read_git_repo(test_git_repo): + path = test_git_repo.workspace + repo = r.Repository(path) + assert any(['src/python/main.py' in file for file in repo._get_all_files()]) + + +################################################################################ +# GitContext # +################################################################################ +@pytest.mark.parametrize( + "fixture_name, remote_name, remote_url, expected", + [ + ( + "test_git_repo", + "origin", + "git@github.internal.com:UBC-MDS/testing-repo_1234.git", + ("github.internal.com", "UBC-MDS", "testing-repo_1234") + ), + ( + "test_git_repo", + "export", + "ssh://git@github.internal.com:UBC-MDS/testing-repo_1234.git", + ("github.internal.com", "UBC-MDS", "testing-repo_1234") + ), + ( + "test_git_repo", + "internal", + "https://github.com:8080/UBC-MDS/test-creation.git", + ("github.com:8080", "UBC-MDS", "test-creation") + ), + ( + "test_git_repo", + "origin", + "http://gitlab.example.com:8080/UBC-MDS/test-creation.git", + ("gitlab.example.com:8080", "UBC-MDS", "test-creation") + ), + ( + "test_git_repo", + "a", + "ftp://github.com/SoloSynth1/Solosynth1", + ("github.com", "SoloSynth1", "Solosynth1") + ), + ] +) +def test_git_context_can_extract_remote_git_urls(fixture_name, remote_name, + remote_url, expected, request): + repo = request.getfixturevalue(fixture_name) + repo.api.create_remote(remote_name, remote_url) + gc = GitContext(repo.workspace) + assert (gc.host, gc.org, gc.repo_name) == expected diff --git a/tests/test_utils.py b/tests/test_utils.py index a978032..cdda766 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,31 +1,21 @@ from pathlib import Path +import pytest from test_creation.modules.utils import get_extension -def test_extension_from_string_can_be_extracted_correctly(): - correct_path = "checklist/checklist.csv" - assert get_extension(correct_path) == "csv" - - -def test_extension_from_path_can_be_extracted_correctly(): - correct_path = Path("checklist/checklist.csv") - assert get_extension(correct_path) == "csv" - - -def test_extension_extracted_is_all_lower_cased(): - path = "ALL/CAPITAL/PATH/TEST.ZIP" - assert get_extension(path) == "zip" - - -def test_only_last_extension_will_be_extracted(): - path = "test/multi_ext.tar.gz" - assert get_extension(path) == "gz" - - -def test_file_with_no_extension_will_produce_empty_string(): - path = "test/README" - assert get_extension(path) == "" +@pytest.mark.parametrize( + "path, expected", + [ + ("checklist/checklist.csv", "csv"), + (Path("checklist/checklist.csv"), "csv"), + ("ALL/CAPITAL/PATH/TEST.ZIP", "zip"), + ("test/multi_ext.tar.gz", "gz"), + (Path("test/README"), "") + ] +) +def test_extension_from_string_can_be_extracted_correctly(path, expected): + assert get_extension(path) == expected def test_extracted_extension_does_not_start_with_dot(): From 01ba5b0a898db1546a27efbe16fe88e426f332f9 Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Tue, 4 Jun 2024 16:17:08 -0700 Subject: [PATCH 13/23] change prompt to improve response quality from LLM --- src/test_creation/modules/workflow/prompt_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test_creation/modules/workflow/prompt_format.py b/src/test_creation/modules/workflow/prompt_format.py index 57ea354..205c161 100644 --- a/src/test_creation/modules/workflow/prompt_format.py +++ b/src/test_creation/modules/workflow/prompt_format.py @@ -25,7 +25,7 @@ class TestItemEvaluation(BaseModel): Title: str = Field(description="The corresponding `Title` of the checklist item provided") Requirement: str = Field(description="The corresponding `Requirement` of the checklist item provided") Observation: str = Field(description="Your detailed observation of the code in accordance to the given checklist item") - Functions: List[str] = Field(description="Test functions that satisfy the given requirement (if any)") + Functions: List[str] = Field(description="Test functions that satisfy the given requirement. If no function satisfies, an empty list i.e. [] should be returned.") Evaluation: str = Field(description="The summarized evaluation. Must be one of Satisfied/Partially Satisfied/Not Satisfied.") Score: float = Field(description="The score obtained from the given evaluation (1 for Satisfied / 0.5 for Partially Satisfied / 0 for Not Satisfied)") From 52dfea26f4243867a71114717423208064fbf3a2 Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Tue, 4 Jun 2024 16:17:55 -0700 Subject: [PATCH 14/23] add `GitPython` as dependency --- poetry.lock | 158 ++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 2 + 2 files changed, 159 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index d3f6288..8467de9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -573,6 +573,17 @@ traitlets = ">=4" [package.extras] test = ["pytest"] +[[package]] +name = "contextlib2" +version = "21.6.0" +description = "Backports and enhancements for the contextlib module" +optional = false +python-versions = ">=3.6" +files = [ + {file = "contextlib2-21.6.0-py2.py3-none-any.whl", hash = "sha256:3fbdb64466afd23abaf6c977627b75b6139a5a3e8ce38405c5b413aed7a0471f"}, + {file = "contextlib2-21.6.0.tar.gz", hash = "sha256:ab1e2bfe1d01d968e1b7e8d9023bc51ef3509bba217bb730cee3827e1ee82869"}, +] + [[package]] name = "dataclasses-json" version = "0.6.6" @@ -663,6 +674,20 @@ files = [ {file = "docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b"}, ] +[[package]] +name = "execnet" +version = "2.1.1" +description = "execnet: rapid multi-Python deployment" +optional = false +python-versions = ">=3.8" +files = [ + {file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"}, + {file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"}, +] + +[package.extras] +testing = ["hatch", "pre-commit", "pytest", "tox"] + [[package]] name = "executing" version = "2.0.1" @@ -802,6 +827,38 @@ files = [ {file = "frozenlist-1.4.1.tar.gz", hash = "sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b"}, ] +[[package]] +name = "gitdb" +version = "4.0.11" +description = "Git Object Database" +optional = false +python-versions = ">=3.7" +files = [ + {file = "gitdb-4.0.11-py3-none-any.whl", hash = "sha256:81a3407ddd2ee8df444cbacea00e2d038e40150acfa3001696fe0dcf1d3adfa4"}, + {file = "gitdb-4.0.11.tar.gz", hash = "sha256:bf5421126136d6d0af55bc1e7c1af1c397a34f5b7bd79e776cd3e89785c2b04b"}, +] + +[package.dependencies] +smmap = ">=3.0.1,<6" + +[[package]] +name = "gitpython" +version = "3.1.43" +description = "GitPython is a Python library used to interact with Git repositories" +optional = false +python-versions = ">=3.7" +files = [ + {file = "GitPython-3.1.43-py3-none-any.whl", hash = "sha256:eec7ec56b92aad751f9912a73404bc02ba212a23adb2c7098ee668417051a1ff"}, + {file = "GitPython-3.1.43.tar.gz", hash = "sha256:35f314a9f878467f5453cc1fee295c3e18e52f1b99f10f6cf5b1682e968a9e7c"}, +] + +[package.dependencies] +gitdb = ">=4.0.1,<5" + +[package.extras] +doc = ["sphinx (==4.3.2)", "sphinx-autodoc-typehints", "sphinx-rtd-theme", "sphinxcontrib-applehelp (>=1.0.2,<=1.0.4)", "sphinxcontrib-devhelp (==1.0.2)", "sphinxcontrib-htmlhelp (>=2.0.0,<=2.0.1)", "sphinxcontrib-qthelp (==1.0.3)", "sphinxcontrib-serializinghtml (==1.1.5)"] +test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"] + [[package]] name = "greenlet" version = "3.0.3" @@ -1856,6 +1913,22 @@ files = [ {file = "mistune-3.0.2.tar.gz", hash = "sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8"}, ] +[[package]] +name = "mock" +version = "5.1.0" +description = "Rolling backport of unittest.mock for all Pythons" +optional = false +python-versions = ">=3.6" +files = [ + {file = "mock-5.1.0-py3-none-any.whl", hash = "sha256:18c694e5ae8a208cdb3d2c20a993ca1a7b0efa258c247a1e565150f477f83744"}, + {file = "mock-5.1.0.tar.gz", hash = "sha256:5e96aad5ccda4718e0a229ed94b2024df75cc2d55575ba5762d31f5767b8767d"}, +] + +[package.extras] +build = ["blurb", "twine", "wheel"] +docs = ["sphinx"] +test = ["pytest", "pytest-cov"] + [[package]] name = "multidict" version = "6.0.5" @@ -2391,6 +2464,39 @@ files = [ qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"] testing = ["docopt", "pytest"] +[[package]] +name = "path" +version = "16.14.0" +description = "A module wrapper for os.path" +optional = false +python-versions = ">=3.8" +files = [ + {file = "path-16.14.0-py3-none-any.whl", hash = "sha256:8ee37703cbdc7cc83835ed4ecc6b638226fb2b43b7b45f26b620589981a109a5"}, + {file = "path-16.14.0.tar.gz", hash = "sha256:dbaaa7efd4602fd6ba8d82890dc7823d69e5de740a6e842d9919b0faaf2b6a8e"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["appdirs", "more-itertools", "packaging", "pygments", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "pywin32"] + +[[package]] +name = "path-py" +version = "12.5.0" +description = "A module wrapper for os.path" +optional = false +python-versions = ">=3.5" +files = [ + {file = "path.py-12.5.0-py3-none-any.whl", hash = "sha256:a43e82eb2c344c3fd0b9d6352f6b856f40b8b7d3d65cc05978b42c3715668496"}, + {file = "path.py-12.5.0.tar.gz", hash = "sha256:8d885e8b2497aed005703d94e0fd97943401f035e42a136810308bff034529a8"}, +] + +[package.dependencies] +path = "*" + +[package.extras] +docs = ["jaraco.packaging (>=3.2)", "rst.linker (>=1.9)", "sphinx"] +testing = ["appdirs", "packaging", "pygments", "pytest (>=3.5,!=3.7.3)", "pytest-black-multipy", "pytest-checkdocs (>=1.2.3)", "pytest-cov", "pytest-flake8"] + [[package]] name = "pexpect" version = "4.9.0" @@ -2745,6 +2851,45 @@ pluggy = ">=1.5,<2.0" [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-git" +version = "1.7.0" +description = "Git repository fixture for py.test" +optional = false +python-versions = "*" +files = [ + {file = "pytest-git-1.7.0.tar.gz", hash = "sha256:356fef84eb0d663d2a5eceafb3ff6b2c3043b2b964b1872b67e51979dbbb43f8"}, + {file = "pytest_git-1.7.0-py2.py3-none-any.whl", hash = "sha256:f0737e688bb6d53b4a501d9eba340885e63522ee57e17c24137525c7d9a17361"}, +] + +[package.dependencies] +gitpython = "*" +pytest = "*" +pytest-shutil = "*" + +[[package]] +name = "pytest-shutil" +version = "1.7.0" +description = "A goodie-bag of unix shell and environment tools for py.test" +optional = false +python-versions = "*" +files = [ + {file = "pytest-shutil-1.7.0.tar.gz", hash = "sha256:d8165261de76e7508505c341d94c02b113dc963f274543abca74dbfabd021261"}, + {file = "pytest_shutil-1.7.0-py2.py3-none-any.whl", hash = "sha256:b3568a675cb092c9b15c789ebd3046b79cfaca476868939748729d14557a98ff"}, +] + +[package.dependencies] +contextlib2 = "*" +execnet = "*" +mock = "*" +"path.py" = "*" +pytest = "*" +six = "*" +termcolor = "*" + +[package.extras] +tests = ["pytest"] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -3410,6 +3555,17 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "smmap" +version = "5.0.1" +description = "A pure Python implementation of a sliding window memory map manager" +optional = false +python-versions = ">=3.7" +files = [ + {file = "smmap-5.0.1-py3-none-any.whl", hash = "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da"}, + {file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"}, +] + [[package]] name = "sniffio" version = "1.3.1" @@ -4356,4 +4512,4 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-it [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "3d29f42d5d54d85cf30292bf7fdbff588c68f9be3c873a24c82c48e13ee9d1db" +content-hash = "c29a8c64d644a7d809b5f4b5e910a4fe50dc0bd19859d60b94a727a270b929a7" diff --git a/pyproject.toml b/pyproject.toml index 6ca0046..a681543 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,12 +18,14 @@ langchain = "^0.2.1" langchain-openai = "^0.1.7" langchain-community = "^0.2.1" langchain-core = "^0.2.1" +gitpython = "^3.1.43" [tool.poetry.group.dev.dependencies] jupyter = "^1.0.0" jupyter-book = "^1.0.0" jupyterlab = "^4.2.1" pytest = "^8.2.1" +pytest-git = "^1.7.0" [tool.poetry.scripts] test-creation = "test_creation:cli_main" From c6b4a138c21870fe324afefb6c3416f762051473 Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Tue, 4 Jun 2024 16:18:47 -0700 Subject: [PATCH 15/23] fix validation logic to return invalid responses --- .../modules/workflow/evaluator.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/test_creation/modules/workflow/evaluator.py b/src/test_creation/modules/workflow/evaluator.py index 36f6e23..17cfa40 100644 --- a/src/test_creation/modules/workflow/evaluator.py +++ b/src/test_creation/modules/workflow/evaluator.py @@ -30,22 +30,24 @@ class TestEvaluator(PipelineRunner, ABC): """Abstract base class for test evaluators i.e. class object to run evaluation of test files from a given repository. """ - def __init__(self, llm: LanguageModelLike, prompt_format: PromptFormat, repository: Repository, - checklist: Checklist): + + def __init__(self, llm: LanguageModelLike, prompt_format: PromptFormat, + repository: Repository, checklist: Checklist): self.llm = llm self.checklist = checklist self.repository = repository self.prompt_format = prompt_format - self.test_items = None + self._test_items = None self.chain = self.prompt_format.prompt | self.llm | self.prompt_format.parser class PerFileTestEvaluator(TestEvaluator): """Concrete test evaluator that performs per-file evaluation.""" - def __init__(self, llm: LanguageModelLike, prompt_format: PromptFormat, repository: Repository, - checklist: Checklist, retries: int = 3): + + def __init__(self, llm: LanguageModelLike, prompt_format: PromptFormat, + repository: Repository, checklist: Checklist, retries: int = 3): super().__init__(llm, prompt_format, repository, checklist) self.retries = retries @@ -53,7 +55,8 @@ def __init__(self, llm: LanguageModelLike, prompt_format: PromptFormat, reposito if not self._files: print("File loader returned no files!") - self._test_items = self.checklist.get_all_tests(['ID', 'Title', 'Requirement']) + self._test_items = self.checklist.get_all_tests(['ID', 'Title', + 'Requirement']) if not self._test_items: print("Loaded checklist successfully, but it contains no test items!") @@ -67,9 +70,12 @@ def _load_test_file_into_splits(file_path: str) -> List[Document]: def _validate_response(self, raw_response: dict) -> None: """Validation logics that are not covered by pydantic or langchain.""" - # ensures the number of items in the response is the same as provided checklists - if len(raw_response['results']) != len(self.test_items): - raise ValidationError("Number of items returned from LLM does not match that in checklist.") + # ensures the number of items in the response is the same as provided + # checklists + if len(raw_response['results']) != len(self._test_items): + raise AssertionError("Number of items returned from LLM does not match that in checklist.") + if not all(['Functions' in item for item in raw_response['results']]): + raise AssertionError("Not all items returned contain the attribute `Functions`.") def run(self, verbose: bool = False) -> EvaluationResponse: eval_response = EvaluationResponse( @@ -106,8 +112,11 @@ def run(self, verbose: bool = False) -> EvaluationResponse: self._validate_response(response) except Exception as e: + if verbose: + print(f"error occurred: {e.__class__.__name__} - {str(e)}") errors.append({'name': e.__class__.__name__, 'description': str(e)}) retry_count += 1 + response = None continue if not response: From 7157a8b4f9b22d3d2b151a115ce817db054a839d Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Tue, 4 Jun 2024 16:19:33 -0700 Subject: [PATCH 16/23] wip: change calling procedure for line number formatting --- src/test_creation/cli/repository.py | 2 +- src/test_creation/modules/workflow/parse.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/test_creation/cli/repository.py b/src/test_creation/cli/repository.py index ed1b5e1..63bc319 100644 --- a/src/test_creation/cli/repository.py +++ b/src/test_creation/cli/repository.py @@ -65,7 +65,7 @@ def evaluate(repo_path: str, report_output_path: str, repository=repo, checklist=checklist) response = evaluator.run() - parser = ResponseParser(response) + parser = ResponseParser(response, repository=repo) parser.get_completeness_score(verbose=verbose) parser.export_evaluation_report(report_output_path, exist_ok=overwrite) diff --git a/src/test_creation/modules/workflow/parse.py b/src/test_creation/modules/workflow/parse.py index 2bbd4fd..b914970 100644 --- a/src/test_creation/modules/workflow/parse.py +++ b/src/test_creation/modules/workflow/parse.py @@ -10,13 +10,13 @@ class ResponseParser(ExportableMixin): - def __init__(self, response: EvaluationResponse, respository: Repository = None): + def __init__(self, response: EvaluationResponse, repository: Repository = None): # FIXME: respository is required to extract the line numbers for functions # I added an optional argument "respository" here, can't think of any better way to handle it yet super().__init__() self.response = response self.evaluation_report = None - self.repository = respository + self.repository = repository self.items = [] def _parse_items(self): @@ -27,18 +27,17 @@ def _parse_items(self): fp = result.files_evaluated[0] # FIXME: it might fail if the evaluation is on multiple files item['File Path'] = fp if self.repository: - item['lineno'] = [self.repository.ffl_map[fp][func] for func in item['Functions']] + item['lineno'] = [self.repository.ffl_map['Python'][fp][func] for func in item['Functions']] else: item['lineno'] = [] item['Line Numbers'] = [ - f"[{lineno}]({self.repository._get_git_direct_link(self.repository._get_relative_path(fp), lineno)})" + f"[{lineno}]({self.repository._get_git_direct_link(fp, lineno)})" for lineno in item['lineno'] ] items.append(item) self.items = items return items - def get_completeness_score(self, score_format: str = 'fraction', verbose: bool = False) -> Optional[float]: """Compute Evaluation Report and Completeness Score.""" From d8f289006dbcbc0b968174aaf1ccecd2f69f4c20 Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Tue, 4 Jun 2024 17:08:19 -0700 Subject: [PATCH 17/23] change schema of `evaluationreseponse` to include repository and checklist class object --- src/test_creation/cli/repository.py | 2 +- .../modules/workflow/evaluator.py | 4 +- src/test_creation/modules/workflow/parse.py | 10 +++-- .../modules/workflow/response.py | 37 +++++++++++++++---- 4 files changed, 39 insertions(+), 14 deletions(-) diff --git a/src/test_creation/cli/repository.py b/src/test_creation/cli/repository.py index 63bc319..ed1b5e1 100644 --- a/src/test_creation/cli/repository.py +++ b/src/test_creation/cli/repository.py @@ -65,7 +65,7 @@ def evaluate(repo_path: str, report_output_path: str, repository=repo, checklist=checklist) response = evaluator.run() - parser = ResponseParser(response, repository=repo) + parser = ResponseParser(response) parser.get_completeness_score(verbose=verbose) parser.export_evaluation_report(report_output_path, exist_ok=overwrite) diff --git a/src/test_creation/modules/workflow/evaluator.py b/src/test_creation/modules/workflow/evaluator.py index 17cfa40..2788125 100644 --- a/src/test_creation/modules/workflow/evaluator.py +++ b/src/test_creation/modules/workflow/evaluator.py @@ -80,8 +80,8 @@ def _validate_response(self, raw_response: dict) -> None: def run(self, verbose: bool = False) -> EvaluationResponse: eval_response = EvaluationResponse( model={'name': self.llm.model_name, 'temperature': self.llm.temperature}, - repository_path=self.repository.path, - checklist_path=self.checklist.path + repository={'path': self.repository.path, 'object': self.repository}, + checklist={'path': self.checklist.path, 'object': self.checklist} ) for fp in tqdm(self._files): diff --git a/src/test_creation/modules/workflow/parse.py b/src/test_creation/modules/workflow/parse.py index b914970..ffee6da 100644 --- a/src/test_creation/modules/workflow/parse.py +++ b/src/test_creation/modules/workflow/parse.py @@ -6,17 +6,16 @@ from .response import EvaluationResponse from ..mixins import ExportableMixin from ..utils import get_extension -from ..code_analyzer.repo import Repository class ResponseParser(ExportableMixin): - def __init__(self, response: EvaluationResponse, repository: Repository = None): + def __init__(self, response: EvaluationResponse): # FIXME: respository is required to extract the line numbers for functions # I added an optional argument "respository" here, can't think of any better way to handle it yet super().__init__() self.response = response self.evaluation_report = None - self.repository = repository + self.repository = self.response.repository.object self.items = [] def _parse_items(self): @@ -24,16 +23,19 @@ def _parse_items(self): for result in self.response.call_results: response = result.parsed_response['results'] for item in response: - fp = result.files_evaluated[0] # FIXME: it might fail if the evaluation is on multiple files + fp = result.files_evaluated[0] item['File Path'] = fp if self.repository: item['lineno'] = [self.repository.ffl_map['Python'][fp][func] for func in item['Functions']] else: item['lineno'] = [] + print(item) + print(item['lineno']) item['Line Numbers'] = [ f"[{lineno}]({self.repository._get_git_direct_link(fp, lineno)})" for lineno in item['lineno'] ] + print(item['Line Numbers']) items.append(item) self.items = items return items diff --git a/src/test_creation/modules/workflow/response.py b/src/test_creation/modules/workflow/response.py index 386bd95..07643bf 100644 --- a/src/test_creation/modules/workflow/response.py +++ b/src/test_creation/modules/workflow/response.py @@ -2,14 +2,31 @@ from pathlib import Path from typing import List, Dict, Any, Optional, Union -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ConfigDict +from ..code_analyzer.repo import Repository +from ..checklist.checklist import Checklist -class LLM(BaseModel): + +class LLMInfo(BaseModel): name: str = Field(description="Name of the LLM used") temperature: float = Field(description="Temperature of the LLM") +class RepositoryInfo(BaseModel): + path: Union[str, Path] = Field(description="Path of the repository") + object: Repository = Field(description="Repository object") + + model_config = ConfigDict(arbitrary_types_allowed=True) + + +class ChecklistInfo(BaseModel): + path: Union[str, Path] = Field(description="Path of the checklist") + object: Checklist = Field(description="Checklist object") + + model_config = ConfigDict(arbitrary_types_allowed=True) + + class Error(BaseModel): name: str = Field(description="Class name of the error") description: str = Field(description="Description of the error") @@ -35,8 +52,14 @@ class EvaluationResponse(BaseModel): name temperature } - repository_path - checklist_path + repository { + path + object + } + checklist { + path + object + } call_results [{ start_time end_time @@ -52,7 +75,7 @@ class EvaluationResponse(BaseModel): }] } """ - model: LLM = Field(description="LLM-related information") - repository_path: Union[str, Path] = Field(description="Repository path") - checklist_path: Union[str, Path] = Field(description="Checklist path") + model: LLMInfo = Field(description="LLM-related information") + repository: RepositoryInfo = Field(description="Repository-related information") + checklist: ChecklistInfo = Field(description="Checklist-related information") call_results: List[CallResult] = Field(description="List of call results", default=[]) From d4014430d8fa0a5613c2da764c7adc1d876d77f1 Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Tue, 4 Jun 2024 17:09:05 -0700 Subject: [PATCH 18/23] reduce redundant computation on finding out which remote service to use --- .../modules/code_analyzer/git.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/test_creation/modules/code_analyzer/git.py b/src/test_creation/modules/code_analyzer/git.py index 85a9fcc..0d3bd83 100644 --- a/src/test_creation/modules/code_analyzer/git.py +++ b/src/test_creation/modules/code_analyzer/git.py @@ -22,6 +22,16 @@ def __init__(self, git_dir: Union[str, Path]): "gitee": "{host}/{org}/{repo}/blob/{branch}{path}#L{line_num}" } self.remote_protocol = "https" + self.remote_service_family = self.__get_remote_service_family() + + def __get_remote_service_family(self): + result = None + if self.host: + hits = [key for key in self.remote_link_format_map.keys() if + key in self.host] + if hits: + result = hits[0] + return result def _get_current_branch(self): if self.git_repo.head.is_detached: @@ -48,17 +58,16 @@ def _get_remote_info(self) -> tuple[Optional[str], Optional[str], str]: def construct_remote_link_to_file(self, file_path: Union[str, Path], line_num: Optional[int] = 0) -> str: rel_path = Path(file_path).relative_to(self.git_dir) - hits = [keyword for keyword in - self.remote_link_format_map.keys() if keyword in self.host] - if hits: - hit = hits[0] - f_str = copy(self.remote_link_format_map[hit]) + if self.remote_service_family: + f_str = copy(self.remote_link_format_map[self.remote_service_family]) if line_num is None: f_str = f_str.split("#")[0] - return f"{self.remote_protocol}://" + \ + injected_str = f"{self.remote_protocol}://" + \ f_str.format(host=self.host, org=self.org, repo=self.repo_name, branch=self.branch, path=rel_path, line_num=line_num) + print(injected_str) + return injected_str else: print("No matching service. Using local link instead...") return f"file://{str(self.git_dir)}/{rel_path}" From e74525bfa9ec120a7544359c0e3a8696d22bd413 Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Tue, 4 Jun 2024 18:10:23 -0700 Subject: [PATCH 19/23] change parser to attach link to github repo on function names --- src/test_creation/modules/code_analyzer/git.py | 3 +-- src/test_creation/modules/code_analyzer/repo.py | 8 +++++--- src/test_creation/modules/workflow/parse.py | 12 +++++------- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/test_creation/modules/code_analyzer/git.py b/src/test_creation/modules/code_analyzer/git.py index 0d3bd83..c8041fc 100644 --- a/src/test_creation/modules/code_analyzer/git.py +++ b/src/test_creation/modules/code_analyzer/git.py @@ -62,11 +62,10 @@ def construct_remote_link_to_file(self, file_path: Union[str, Path], f_str = copy(self.remote_link_format_map[self.remote_service_family]) if line_num is None: f_str = f_str.split("#")[0] - injected_str = f"{self.remote_protocol}://" + \ + injected_str = f"{self.remote_protocol}://" + \ f_str.format(host=self.host, org=self.org, repo=self.repo_name, branch=self.branch, path=rel_path, line_num=line_num) - print(injected_str) return injected_str else: print("No matching service. Using local link instead...") diff --git a/src/test_creation/modules/code_analyzer/repo.py b/src/test_creation/modules/code_analyzer/repo.py index 24b4c7c..f67ee7a 100644 --- a/src/test_creation/modules/code_analyzer/repo.py +++ b/src/test_creation/modules/code_analyzer/repo.py @@ -17,7 +17,7 @@ def wrapper(self, *args, **kwargs): """wrapper function to check if we have git context.""" if self.git_context is None: raise RuntimeError("This repository has no git context.") - func(self, *args, **kwargs) + return func(self, *args, **kwargs) return wrapper @@ -54,8 +54,10 @@ def __init__(self, path: str): self.ffl_map = self._get_file_function_lineno_map() @requires_git_context - def _get_git_direct_link(self, file: str, lineno: Optional[int] = None): - return self.git_context.construct_remote_link_to_file(file, line_num=lineno) + def get_git_direct_link(self, file: str, + lineno: Optional[int] = None) -> str: + return self.git_context.construct_remote_link_to_file(file, + line_num=lineno) def _get_all_files(self, include_git_dir: bool = False): file_paths = [] diff --git a/src/test_creation/modules/workflow/parse.py b/src/test_creation/modules/workflow/parse.py index ffee6da..e8e9b1e 100644 --- a/src/test_creation/modules/workflow/parse.py +++ b/src/test_creation/modules/workflow/parse.py @@ -16,6 +16,7 @@ def __init__(self, response: EvaluationResponse): self.response = response self.evaluation_report = None self.repository = self.response.repository.object + self.git_context = self.repository.git_context self.items = [] def _parse_items(self): @@ -29,13 +30,10 @@ def _parse_items(self): item['lineno'] = [self.repository.ffl_map['Python'][fp][func] for func in item['Functions']] else: item['lineno'] = [] - print(item) - print(item['lineno']) - item['Line Numbers'] = [ - f"[{lineno}]({self.repository._get_git_direct_link(fp, lineno)})" - for lineno in item['lineno'] + item['Referenced Functions'] = [ + f"[{func}]({self.repository.get_git_direct_link(fp, lineno)})" + for func, lineno in zip(item['Functions'], item['lineno']) ] - print(item['Line Numbers']) items.append(item) self.items = items return items @@ -60,7 +58,7 @@ def get_completeness_score(self, score_format: str = 'fraction', verbose: bool = items = self._parse_items() report_df = pd.DataFrame(items) - report_df['Function References'] = report_df[['File Path', 'Functions', "Line Numbers"]].to_dict(orient='records') + report_df['Function References'] = report_df[['File Path', 'Referenced Functions']].to_dict(orient='records') report_df['Observation'] = '(' + report_df['File Path'].apply(lambda x: os.path.split(x)[-1]) + ') ' + \ report_df['Observation'] report_df = report_df.groupby(['ID', 'Title']).agg({ From 835334ac6f1a7e1ae662a5b972bfd4e4fe752885 Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Tue, 4 Jun 2024 18:10:45 -0700 Subject: [PATCH 20/23] fix error raising lines being too long --- src/test_creation/modules/mixins.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/test_creation/modules/mixins.py b/src/test_creation/modules/mixins.py index 792cfbe..bd51df3 100644 --- a/src/test_creation/modules/mixins.py +++ b/src/test_creation/modules/mixins.py @@ -21,10 +21,16 @@ def _filedump_check(self, output_path: str, exist_ok: bool, expects_directory_if "provided a flag/argument for " "file overwriting?)") elif os.path.exists(normalized_path): - if expects_directory_if_exists and not os.path.isdir(normalized_path): - raise NotADirectoryError("An non-directory already exists in the path but the write operation is expecting to overwrite a directory.") - elif not expects_directory_if_exists and not os.path.isfile(normalized_path): - raise IsADirectoryError("An non-file object already exists in the path but the write operation is expecting to overwrite a file.") + if expects_directory_if_exists and not os.path.isdir( + normalized_path): + raise NotADirectoryError("An non-directory already exists in " + "the path but the write operation is" + " expecting to overwrite a directory.") + elif not expects_directory_if_exists and not os.path.isfile( + normalized_path): + raise IsADirectoryError("An non-file object already exists in " + "the path but the write operation is " + "expecting to overwrite a file.") if not os.access(normalized_path, os.W_OK): raise PermissionError(f"Write permission is not granted for the output path: {normalized_path}") From 9edb286b5e73ada2ed18b527b6174477070b7c6a Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Tue, 4 Jun 2024 18:17:56 -0700 Subject: [PATCH 21/23] implement naive parser for function line number as fallback; remove unused imports --- .../modules/code_analyzer/analyzers/python.py | 9 +++++++++ src/test_creation/modules/code_analyzer/repo.py | 3 +++ src/test_creation/modules/workflow/evaluator.py | 1 - 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/test_creation/modules/code_analyzer/analyzers/python.py b/src/test_creation/modules/code_analyzer/analyzers/python.py index aa84e56..9db3c5d 100644 --- a/src/test_creation/modules/code_analyzer/analyzers/python.py +++ b/src/test_creation/modules/code_analyzer/analyzers/python.py @@ -90,6 +90,15 @@ def read(self, file_path: str): with open(file_path, 'r') as f: self.content = f.readlines() + @assert_have_read_content + def _get_function_lineno_map(self): + function_lineno_map = defaultdict(int) + for line_num, line in enumerate(self.content): + if line.lstrip().startswith('def '): + func_name = line.lstrip().split('(')[0].split(' ')[1] + function_lineno_map[func_name] = line_num + 1 # line starts with 1 + return function_lineno_map + @assert_have_read_content def list_imported_packages(self): packages = set() diff --git a/src/test_creation/modules/code_analyzer/repo.py b/src/test_creation/modules/code_analyzer/repo.py index f67ee7a..bf9c50e 100644 --- a/src/test_creation/modules/code_analyzer/repo.py +++ b/src/test_creation/modules/code_analyzer/repo.py @@ -86,12 +86,15 @@ def _get_file_function_lineno_map(self) -> dict[str, dict[str, list[str]]]: # TODO: only Python is supported now if lang == "Python": ast = PythonASTCodeAnalyzer() + naive = PythonNaiveCodeAnalyzer() for file in files: try: ast.read(file) file_function_lineno_map[lang][file] = ast._get_function_lineno_map() except Exception as e: logger.info("Exception occurred when parsing using ast (Python 2 code?) Using naive parser...") + naive.read(file) + file_function_lineno_map[lang][file] = naive._get_function_lineno_map() return file_function_lineno_map def list_languages(self): diff --git a/src/test_creation/modules/workflow/evaluator.py b/src/test_creation/modules/workflow/evaluator.py index 2788125..88920d4 100644 --- a/src/test_creation/modules/workflow/evaluator.py +++ b/src/test_creation/modules/workflow/evaluator.py @@ -8,7 +8,6 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter, Language from langchain_core.language_models import LanguageModelLike from langchain_core.documents import Document -from pydantic import ValidationError from ..checklist.checklist import Checklist from ..code_analyzer.repo import Repository From 2bf68a7a40384f17d5131482561774edb927d1c7 Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Tue, 4 Jun 2024 18:31:35 -0700 Subject: [PATCH 22/23] add test for checking links when no remote is provided --- src/test_creation/modules/code_analyzer/git.py | 6 +++++- tests/test_repo.py | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/test_creation/modules/code_analyzer/git.py b/src/test_creation/modules/code_analyzer/git.py index c8041fc..f7334b6 100644 --- a/src/test_creation/modules/code_analyzer/git.py +++ b/src/test_creation/modules/code_analyzer/git.py @@ -57,7 +57,11 @@ def _get_remote_info(self) -> tuple[Optional[str], Optional[str], str]: def construct_remote_link_to_file(self, file_path: Union[str, Path], line_num: Optional[int] = 0) -> str: - rel_path = Path(file_path).relative_to(self.git_dir) + path = Path(file_path) + if path.is_absolute(): + rel_path = path.relative_to(self.git_dir) + else: + rel_path = path if self.remote_service_family: f_str = copy(self.remote_link_format_map[self.remote_service_family]) if line_num is None: diff --git a/tests/test_repo.py b/tests/test_repo.py index 8fbccbf..fb29a73 100644 --- a/tests/test_repo.py +++ b/tests/test_repo.py @@ -83,3 +83,9 @@ def test_git_context_can_extract_remote_git_urls(fixture_name, remote_name, repo.api.create_remote(remote_name, remote_url) gc = GitContext(repo.workspace) assert (gc.host, gc.org, gc.repo_name) == expected + + +def test_git_context_gives_out_local_link_when_no_remote(test_git_repo): + context = GitContext(test_git_repo.workspace) + link = context.construct_remote_link_to_file("src/python/main.py") + assert link == f"file://{test_git_repo.workspace}/src/python/main.py" \ No newline at end of file From 836e056b472cf8973b51349503d43ed7d8a2c935 Mon Sep 17 00:00:00 2001 From: SoloSynth1 Date: Tue, 4 Jun 2024 19:16:29 -0700 Subject: [PATCH 23/23] fix incorrectly set default argument --- src/test_creation/modules/code_analyzer/git.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test_creation/modules/code_analyzer/git.py b/src/test_creation/modules/code_analyzer/git.py index f7334b6..8a43010 100644 --- a/src/test_creation/modules/code_analyzer/git.py +++ b/src/test_creation/modules/code_analyzer/git.py @@ -56,7 +56,7 @@ def _get_remote_info(self) -> tuple[Optional[str], Optional[str], str]: return None, None, "." def construct_remote_link_to_file(self, file_path: Union[str, Path], - line_num: Optional[int] = 0) -> str: + line_num: Optional[int] = None) -> str: path = Path(file_path) if path.is_absolute(): rel_path = path.relative_to(self.git_dir)