Skip to content

Commit

Permalink
fix: Fix the user prompt, store the report when running function ' (#78)
Browse files Browse the repository at this point in the history
  • Loading branch information
tonyshumlh authored May 21, 2024
1 parent 347f721 commit 37e8253
Showing 1 changed file with 21 additions and 14 deletions.
35 changes: 21 additions & 14 deletions src/test_creation/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@ def __init__(self, repo_path=None):
Your task is to answer each question in the checklist using only the provided test functions.
If an answer to the question is provided, it must be annotated with a citation of the test function(s) in the Observation session.
Output a JSON format:
{
[{
"ID":
"Title":
"Requirement":
"Observation":
"Functions": [ ... ]
"Evaluation": Satisfied/Partially Satisfied/Not Satisfied
"Score": (1 for Satisfied / 0.5 for Partially Satisfied / 0 for Not Satisfied)
}
}]
"""

self.evaluation_result = None
Expand Down Expand Up @@ -207,17 +207,17 @@ def extract_json(self, response, start='[', end=']'):
string = response[start_idx:-end_idx]
return json.loads(string)

def evaluate(self, on_file=True):
def evaluate(self, on_file=True, verbose=False):
result = []
if on_file:
for fp in tqdm(self.test_fps):
print(fp)
if verbose:
print(fp)
self.load_test_file(fp)
print(f"# splits: {len(self.test_fps)}")
if verbose:
print(f"# splits: {len(self.test_fps)}")
response, history = self.get_evaluation_response() # FIXME: it sometimes tests only part of the checklist items
# print(response)
report = self.extract_json(response)
# print(report)
for item in report:
item['file'] = fp
result += [{
Expand All @@ -240,19 +240,26 @@ def evaluate(self, on_file=True):
self.evaluation_result = result
return

def get_completeness_score(self):
def get_completeness_score(self, score_format='fraction', verbose=False):
report_df = pd.DataFrame(self.evaluation_result)['report'].explode('report').apply(pd.Series)
report_df = report_df.groupby(['ID', 'Title']).agg({
'Score': ['max', 'count'],
'Functions': ['sum']
})
report_df.columns = ['is_Satisfied', 'n_files_tested', 'functions']
score = f"{report_df['is_Satisfied'].sum()}/{report_df['is_Satisfied'].count()}"
print("Report:")
print(report_df)
print()
print(f'Score: {score}')
print()
self.evaluation_report = report_df

if score_format == 'fraction':
score = f"{report_df['is_Satisfied'].sum()}/{report_df['is_Satisfied'].count()}"
elif score_format == 'number':
score = report_df['is_Satisfied'].sum()/report_df['is_Satisfied'].count()

if verbose:
print("Report:")
print(report_df)
print()
print(f'Score: {score}')
print()
return score


Expand Down

0 comments on commit 37e8253

Please sign in to comment.