From 4cc9ab5bc6abad961ec992e4036e063f76484cf9 Mon Sep 17 00:00:00 2001 From: sarbjitgrewal Date: Tue, 17 Oct 2023 11:32:37 +0530 Subject: [PATCH 1/8] bitbucket similar issue --- pr_agent/git_providers/bitbucket_provider.py | 98 +++++++++++++++++++- pr_agent/git_providers/github_provider.py | 56 ++++++++++- pr_agent/tools/pr_similar_issue.py | 62 ++++++------- 3 files changed, 179 insertions(+), 37 deletions(-) diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index 56b9f711c..fb8d24f98 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -14,8 +14,7 @@ class BitbucketProvider(GitProvider): def __init__( - self, pr_url: Optional[str] = None, incremental: Optional[bool] = False - ): + self, pr_url: Optional[str] = None, incremental: Optional[bool] = False): s = requests.Session() try: bearer = context.get("bitbucket_bearer_token", None) @@ -32,12 +31,15 @@ def __init__( self.repo = None self.pr_num = None self.pr = None + self.feature = None + self.issue_num = None + self.issue_name = None self.temp_comments = [] self.incremental = incremental - if pr_url: + if pr_url and 'pull' in pr_url: self.set_pr(pr_url) - self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"] - self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data["links"]['self']['href'] + self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"] + self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data["links"]['self']['href'] def get_repo_settings(self): try: @@ -228,6 +230,27 @@ def _parse_pr_url(pr_url: str) -> Tuple[str, int]: raise ValueError("Unable to convert PR number to integer") from e return workspace_slug, repo_slug, pr_number + + @staticmethod + def _parse_issue_url(issue_url: str) -> Tuple[str, int]: + parsed_url = urlparse(issue_url) + + if "bitbucket.org" not in parsed_url.netloc: + raise ValueError("The provided URL is not a valid Bitbucket URL") + + path_parts = parsed_url.path.strip('/').split('/') + if len(path_parts) < 5 or path_parts[2] != "issues": + raise ValueError("The provided URL does not appear to be a Bitbucket issue URL") + + workspace_slug = path_parts[0] + repo_slug = path_parts[1] + try: + issue_number = int(path_parts[3]) + except ValueError as e: + raise ValueError("Unable to convert issue number to integer") from e + + return workspace_slug, repo_slug, issue_number + def _get_repo(self): if self.repo is None: @@ -263,3 +286,68 @@ def publish_labels(self, pr_types: list): # bitbucket does not support labels def get_labels(self): pass + + def get_issue(self, workspace_slug, repo_name, original_issue_number): + issue = self.bitbucket_client.repositories.get(workspace_slug, repo_name).issues.get(original_issue_number) + return issue + + def get_issue_url(self, issue): + return issue._BitbucketBase__data['links']['html']['href'] + + def get_issue_body(self, issue): + return issue.content['raw'] + + def get_issue_number(self, issue): + return issue.id + + def get_issue_comment_body(self, comment): + return comment['content']['raw'] + + def get_issue_comment_user(self, comment): + return comment['user']['display_name'] + + def get_issue_created_at(self, issue): + return str(issue.created_on) + + def get_username(self, issue, workspace_slug): + return workspace_slug + + + def get_repo_issues(self, repo_obj): + return repo_obj._Repository__issues.each() + + + def get_issues_comments(self, workspace_slug, repo_name, original_issue_number): + import requests + + url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments" + + payload = {} + headers = {} + + response = requests.request("GET", url, headers=headers, data=payload) + return response.json()['values'] + + def create_issue_comment(self, similar_issues_str, workspace_slug, repo_name, original_issue_number): + url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments" + payload = json.dumps({ + "content": { + "raw": similar_issues_str + } + }) + headers = { + 'Authorization': f'Bearer {get_settings().get("BITBUCKET.BEARER_TOKEN", None)}', + 'Content-Type': 'application/json' + } + + response = requests.request("POST", url, headers=headers, data=payload) + + def get_repo_obj(self, workspace_slug, repo_name): + return self.bitbucket_client.repositories.get(workspace_slug, repo_name) + + def get_repo_name_for_indexing(self, repo_obj): + return repo_obj._BitbucketBase__data['full_name'].lower().replace('/', '-').replace('_/', '-') + + def check_if_issue_pull_request(self, issue): + return False + diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index e5f62eb3c..4a7ca48b3 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -336,8 +336,9 @@ def _parse_issue_url(issue_url: str) -> Tuple[str, int]: issue_number = int(path_parts[3]) except ValueError as e: raise ValueError("Unable to convert issue number to integer") from e + workspace_slug = None - return repo_name, issue_number + return workspace_slug, repo_name, issue_number def _get_github_client(self): deployment_type = get_settings().get("GITHUB.DEPLOYMENT_TYPE", "user") @@ -454,3 +455,56 @@ def get_pr_id(self): return pr_id except: return "" + + def get_repo_issues(self, repo_obj): + return list(repo_obj.get_issues(state='all')) + + def get_issues_comments(self, workspace_slug, repo_name, original_issue_number): + return self.repo_obj.get_issue(original_issue_number) + + def get_issue_url(self, issue): + return issue.html_url + + def create_issue_comment(self, similar_issues_str, workspace_slug, repo_name, original_issue_number): + try: + issue = self.repo_obj.get_issue(original_issue_number) + issue.create_comment(similar_issues_str) + except Exception as e: + logging.exception(f"Failed to create issue comment, error: {e}") + + def get_issue_body(self, issue): + return issue.body + + def get_issue_number(self, issue): + return issue.number + + def get_issues_comments(self, workspace_slug, repo_name, original_issue_number): + issue = self.repo_obj.get_issue(original_issue_number) + return list(issue.get_comments()) + + def get_issue_body(self, issue): + return issue.body + + def get_username(self, issue, workspace_slug): + return issue.user.login + + def get_issue_created_at(self, issue): + return str(issue.created_at) + + def get_issue_comment_body(self, comment): + return comment.body + + def get_issue(self, workspace_slug, repo_name, original_issue_number): + return self.repo_obj.get_issue(original_issue_number) + + def get_repo_obj(self, workspace_slug, repo_name): + return self.github_client.get_repo(repo_name) + + def get_repo_name_for_indexing(self, repo_obj): + return repo_obj.full_name.lower().replace('/', '-').replace('_/', '-') + + def check_if_issue_pull_request(self, issue): + if issue.pull_request: + return True + return False + diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index d7b6a7994..9987c08a9 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -19,29 +19,27 @@ class PRSimilarIssue: def __init__(self, issue_url: str, args: list = None): - if get_settings().config.git_provider != "github": - raise Exception("Only github is supported for similar issue tool") self.cli_mode = get_settings().CONFIG.CLI_MODE self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan self.issue_url = issue_url self.git_provider = get_git_provider()() - repo_name, issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1]) - self.git_provider.repo = repo_name - self.git_provider.repo_obj = self.git_provider.github_client.get_repo(repo_name) + self.workspace_slug, self.repo_name, self.issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1]) + self.git_provider.repo = self.repo_name + self.git_provider.repo_obj = self.git_provider.get_repo_obj(self.workspace_slug, self.repo_name) self.token_handler = TokenHandler() repo_obj = self.git_provider.repo_obj - repo_name_for_index = self.repo_name_for_index = repo_obj.full_name.lower().replace('/', '-').replace('_/', '-') + repo_name_for_index = self.repo_name_for_index = self.git_provider.get_repo_name_for_indexing(repo_obj) index_name = self.index_name = "codium-ai-pr-agent-issues" # assuming pinecone api key and environment are set in secrets file try: - api_key = get_settings().pinecone.api_key - environment = get_settings().pinecone.environment + api_key = get_settings().github.api_key + environment = get_settings().github.environment except Exception: if not self.cli_mode: - repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) - issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) + workspace_slug, repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) + issue_main = self.git_provider.get_issue(workspace_slug, repo_name, original_issue_number) issue_main.create_comment("Please set pinecone api key and environment in secrets file") raise Exception("Please set pinecone api key and environment in secrets file") @@ -65,19 +63,21 @@ def __init__(self, issue_url: str, args: list = None): logging.info('Indexing the entire repo...') logging.info('Getting issues...') - issues = list(repo_obj.get_issues(state='all')) + issues = self.git_provider.get_repo_issues(repo_obj) logging.info('Done') self._update_index_with_issues(issues, repo_name_for_index, upsert=upsert) else: # update index if needed pinecone_index = pinecone.Index(index_name=index_name) issues_to_update = [] - issues_paginated_list = repo_obj.get_issues(state='all') + issues_paginated_list = [] + issues_paginated_list = self.git_provider.get_repo_issues(repo_obj) counter = 1 for issue in issues_paginated_list: - if issue.pull_request: + issue_pull_request = self.git_provider.check_if_issue_pull_request(issue) + if issue_pull_request: continue issue_str, comments, number = self._process_issue(issue) - issue_key = f"issue_{number}" + issue_key = f"issue_{number}" id = issue_key + "." + "issue" res = pinecone_index.fetch([id]).to_dict() is_new_issue = True @@ -99,8 +99,8 @@ def __init__(self, issue_url: str, args: list = None): async def run(self): logging.info('Getting issue...') - repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) - issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) + workspace_slug, repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) + issue_main = self.git_provider.get_issue(workspace_slug, repo_name, original_issue_number) issue_str, comments, number = self._process_issue(issue_main) openai.api_key = get_settings().openai.key logging.info('Done') @@ -132,25 +132,23 @@ async def run(self): logging.info('Publishing response...') similar_issues_str = "### Similar Issues\n___\n\n" for i, issue_number_similar in enumerate(relevant_issues_number_list): - issue = self.git_provider.repo_obj.get_issue(issue_number_similar) + issue = self.git_provider.get_issue(workspace_slug, repo_name, issue_number_similar) title = issue.title - url = issue.html_url - if relevant_comment_number_list[i] != -1: - url = list(issue.get_comments())[relevant_comment_number_list[i]].html_url + url = self.git_provider.get_issue_url(issue) similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n" if get_settings().config.publish_output: - response = issue_main.create_comment(similar_issues_str) + response = self.git_provider.create_issue_comment(similar_issues_str, workspace_slug, repo_name, original_issue_number) logging.info(similar_issues_str) logging.info('Done') def _process_issue(self, issue): header = issue.title - body = issue.body - number = issue.number + body = self.git_provider.get_issue_body(issue) + number = self.git_provider.get_issue_number(issue) if get_settings().pr_similar_issue.skip_comments: comments = [] else: - comments = list(issue.get_comments()) + comments = self.git_provider.get_issues_comments(self.workspace_slug, self.repo_name, self.issue_number) issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}" return issue_str, comments, number @@ -158,7 +156,7 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal logging.info('Processing issues...') corpus = Corpus() example_issue_record = Record( - id=f"example_issue_{repo_name_for_index}", + id=str([issue.number for issue in issues_list]), text="example_issue", metadata=Metadata(repo=repo_name_for_index) ) @@ -166,7 +164,9 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal counter = 0 for issue in issues_list: - if issue.pull_request: + + issue_pull_request = self.git_provider.check_if_issue_pull_request(issue) + if issue_pull_request: continue counter += 1 @@ -178,8 +178,8 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal issue_str, comments, number = self._process_issue(issue) issue_key = f"issue_{number}" - username = issue.user.login - created_at = str(issue.created_at) + username = self.git_provider.get_username(issue, self.workspace_slug) + created_at = self.git_provider.get_issue_created_at(issue) if len(issue_str) < 8000 or \ self.token_handler.count_tokens(issue_str) < MAX_TOKENS[MODEL]: # fast reject first issue_record = Record( @@ -193,7 +193,7 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal corpus.append(issue_record) if comments: for j, comment in enumerate(comments): - comment_body = comment.body + comment_body = self.git_provider.get_issue_comment_body(comment) num_words_comment = len(comment_body.split()) if num_words_comment < 10 or not isinstance(comment_body, str): continue @@ -233,8 +233,8 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal ds = Dataset.from_pandas(df, meta) logging.info('Done') - api_key = get_settings().pinecone.api_key - environment = get_settings().pinecone.environment + api_key = get_settings().github.api_key + environment = get_settings().github.environment if not upsert: logging.info('Creating index from scratch...') ds.to_pinecone_index(self.index_name, api_key=api_key, environment=environment) From 733ed907de66915efa6d2134e48232bc8bc65557 Mon Sep 17 00:00:00 2001 From: sarbjitgrewal Date: Thu, 19 Oct 2023 10:41:50 +0530 Subject: [PATCH 2/8] similar_issue feature is working for github and bitbucket --- pr_agent/git_providers/bitbucket_provider.py | 14 ++++++++++++++ pr_agent/git_providers/github_provider.py | 8 +++++++- pr_agent/tools/pr_similar_issue.py | 7 ++++--- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index a85d90998..04b5422bf 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -10,6 +10,7 @@ from ..config_loader import get_settings from ..log import get_logger from .git_provider import FilePatchInfo, GitProvider +import ast class BitbucketProvider(GitProvider): @@ -350,4 +351,17 @@ def get_repo_name_for_indexing(self, repo_obj): def check_if_issue_pull_request(self, issue): return False + + def get_issue_numbers(self, issue): + list_of_issue_numbers = [] + for issue in issue: + list_of_issue_numbers.append(issue.id) + return str(list_of_issue_numbers) + + def get_issue_numbers_from_list(self, issues): + # convert str to list' + int_list = ast.literal_eval(issues) + int_list = [int(x) for x in int_list] + for issue_number in int_list: + return issue_number diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index e3bda3ffc..1955cc182 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -494,7 +494,7 @@ def get_issue_comment_body(self, comment): return comment.body def get_issue(self, workspace_slug, repo_name, original_issue_number): - return self.repo_obj.get_issue(original_issue_number) + return self.repo_obj.get_issue(int(original_issue_number)) def get_repo_obj(self, workspace_slug, repo_name): return self.github_client.get_repo(repo_name) @@ -506,4 +506,10 @@ def check_if_issue_pull_request(self, issue): if issue.pull_request: return True return False + + def get_issue_numbers(self, issues_list): + return str([issue.number for issue in issues_list]) + + def get_issue_numbers_from_list(self, r): + return int(r.split('.')[0].split('_')[-1]) diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index 025c16fe4..a177e791b 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -129,11 +129,11 @@ async def run(self): continue try: - issue_number = int(r["id"].split('.')[0].split('_')[-1]) + issue_id= r['id'] + issue_number = self.git_provider.get_issue_numbers_from_list(issue_id) except: get_logger().debug(f"Failed to parse issue number from {r['id']}") continue - if original_issue_number == issue_number: continue if issue_number not in relevant_issues_number_list: @@ -171,8 +171,9 @@ def _process_issue(self, issue): def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=False): get_logger().info('Processing issues...') corpus = Corpus() + issues = self.git_provider.get_issue_numbers(issues_list) example_issue_record = Record( - id=str([issue.number for issue in issues_list]), + id=str(issues), text="example_issue", metadata=Metadata(repo=repo_name_for_index) ) From ec77d5ff077e6d28f41bbba794896c9d93d453fa Mon Sep 17 00:00:00 2001 From: sarbjitgrewal Date: Thu, 19 Oct 2023 10:50:19 +0530 Subject: [PATCH 3/8] similar_issue feature is working for github and bitbucket --- pr_agent/tools/pr_similar_issue.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index a177e791b..f2269fc0e 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -34,8 +34,8 @@ def __init__(self, issue_url: str, args: list = None): # assuming pinecone api key and environment are set in secrets file try: - api_key = get_settings().github.api_key - environment = get_settings().github.environment + api_key = get_settings().pinecone.api_key + environment = get_settings().pinecone.environment except Exception: if not self.cli_mode: workspace_slug, repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) @@ -250,8 +250,8 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal ds = Dataset.from_pandas(df, meta) get_logger().info('Done') - api_key = get_settings().github.api_key - environment = get_settings().github.environment + api_key = get_settings().pinecone.api_key + environment = get_settings().pinecone.environment if not upsert: get_logger().info('Creating index from scratch...') ds.to_pinecone_index(self.index_name, api_key=api_key, environment=environment) From 7d9885f2b7940580c3101a4aa3083bd5c8b8a3ef Mon Sep 17 00:00:00 2001 From: sarbjitgrewal Date: Thu, 19 Oct 2023 11:09:14 +0530 Subject: [PATCH 4/8] update github provider for similar issue --- pr_agent/git_providers/github_provider.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index b6d08dcea..253e99622 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -241,7 +241,7 @@ def get_user_id(self): self.github_user_id = self.github_client.get_user().raw_data['login'] except Exception as e: self.github_user_id = "" - # logging.exception(f"Failed to get user id, error: {e}") + # get_logger().exception(f"Failed to get user id, error: {e}") return self.github_user_id def get_notifications(self, since: datetime): @@ -469,7 +469,7 @@ def create_issue_comment(self, similar_issues_str, workspace_slug, repo_name, or issue = self.repo_obj.get_issue(original_issue_number) issue.create_comment(similar_issues_str) except Exception as e: - logging.exception(f"Failed to create issue comment, error: {e}") + get_logger().exception(f"Failed to create issue comment, error: {e}") def get_issue_body(self, issue): return issue.body From c6495642c33ec8600c1161eb8b6228c01b131b82 Mon Sep 17 00:00:00 2001 From: sarbjitgrewal Date: Thu, 19 Oct 2023 12:58:45 +0530 Subject: [PATCH 5/8] update github provider for similar issue --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f5bc474e3..c03f8148d 100644 --- a/README.md +++ b/README.md @@ -113,13 +113,13 @@ See the [Release notes](./RELEASE_NOTES.md) for updates on the latest changes. | | ⮑ Extended | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: | | | Reflect and Review | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: | :white_check_mark: | | | Update CHANGELOG.md | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | -| | Find similar issue | :white_check_mark: | | | | | | +| | Find similar issue | :white_check_mark: | | :white_check_mark: | | | | | | Add Documentation | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: | | | | | | | | | | USAGE | CLI | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | App / webhook | :white_check_mark: | :white_check_mark: | | | | | | Tagging bot | :white_check_mark: | | | | | -| | Actions | :white_check_mark: | | | | | +| | Actions | :white_check_mark: | | :white_check_mark: | | | | | Web server | | | | | | :white_check_mark: | | | | | | | | | | CORE | PR compression | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | From 3010c1045c032559aafcdc0cc59baa2abc985db5 Mon Sep 17 00:00:00 2001 From: sarbjitgrewal Date: Fri, 27 Oct 2023 12:26:32 +0530 Subject: [PATCH 6/8] remove workslug from common file --- pr_agent/git_providers/bitbucket_provider.py | 32 ++++++++++------- pr_agent/git_providers/github_provider.py | 37 +++++++++++++------- pr_agent/tools/pr_similar_issue.py | 29 +++++++-------- 3 files changed, 56 insertions(+), 42 deletions(-) diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index 04b5422bf..30e6f5ba0 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -288,9 +288,10 @@ def publish_labels(self, pr_types: list): def get_labels(self): pass - def get_issue(self, workspace_slug, repo_name, original_issue_number): + def get_issue(self, issue_url): + workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue_url) issue = self.bitbucket_client.repositories.get(workspace_slug, repo_name).issues.get(original_issue_number) - return issue + return issue, original_issue_number def get_issue_url(self, issue): return issue._BitbucketBase__data['links']['html']['href'] @@ -310,7 +311,8 @@ def get_issue_comment_user(self, comment): def get_issue_created_at(self, issue): return str(issue.created_on) - def get_username(self, issue, workspace_slug): + def get_username(self, issue, issue_url): + workspace_slug, repo_name, original_issue_numbers = self._parse_issue_url(issue_url) return workspace_slug @@ -318,7 +320,8 @@ def get_repo_issues(self, repo_obj): return repo_obj._Repository__issues.each() - def get_issues_comments(self, workspace_slug, repo_name, original_issue_number): + def get_issues_comments(self, issue): + workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue) import requests url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments" @@ -329,7 +332,8 @@ def get_issues_comments(self, workspace_slug, repo_name, original_issue_number): response = requests.request("GET", url, headers=headers, data=payload) return response.json()['values'] - def create_issue_comment(self, similar_issues_str, workspace_slug, repo_name, original_issue_number): + def create_issue_comment(self, similar_issues_str, issue_url, original_issue_number): + workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue_url) url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments" payload = json.dumps({ "content": { @@ -343,7 +347,8 @@ def create_issue_comment(self, similar_issues_str, workspace_slug, repo_name, or response = requests.request("POST", url, headers=headers, data=payload) - def get_repo_obj(self, workspace_slug, repo_name): + def get_repo_obj_parse_issue_url(self, issue_url): + workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue_url) return self.bitbucket_client.repositories.get(workspace_slug, repo_name) def get_repo_name_for_indexing(self, repo_obj): @@ -352,16 +357,19 @@ def get_repo_name_for_indexing(self, repo_obj): def check_if_issue_pull_request(self, issue): return False - def get_issue_numbers(self, issue): - list_of_issue_numbers = [] - for issue in issue: - list_of_issue_numbers.append(issue.id) - return str(list_of_issue_numbers) - def get_issue_numbers_from_list(self, issues): # convert str to list' int_list = ast.literal_eval(issues) int_list = [int(x) for x in int_list] for issue_number in int_list: return issue_number + + def get_similar_issues(self, issue_url, issue_number_similar): + workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue_url) + issue = self.bitbucket_client.repositories.get(workspace_slug, repo_name).issues.get(issue_number_similar) + return issue + def get_main_issue(self, issue_url): + workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue_url) + issue = self.bitbucket_client.repositories.get(workspace_slug, repo_name).issues.get(original_issue_number) + return issue diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 253e99622..9b7910756 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -335,9 +335,8 @@ def _parse_issue_url(issue_url: str) -> Tuple[str, int]: issue_number = int(path_parts[3]) except ValueError as e: raise ValueError("Unable to convert issue number to integer") from e - workspace_slug = None - - return workspace_slug, repo_name, issue_number + + return repo_name, issue_number def _get_github_client(self): deployment_type = get_settings().get("GITHUB.DEPLOYMENT_TYPE", "user") @@ -458,13 +457,15 @@ def get_pr_id(self): def get_repo_issues(self, repo_obj): return list(repo_obj.get_issues(state='all')) - def get_issues_comments(self, workspace_slug, repo_name, original_issue_number): + def get_issues_comments(self, issue): + repo_name, original_issue_number = self._parse_issue_url(issue) return self.repo_obj.get_issue(original_issue_number) def get_issue_url(self, issue): return issue.html_url - def create_issue_comment(self, similar_issues_str, workspace_slug, repo_name, original_issue_number): + def create_issue_comment(self, similar_issues_str, issue_url, original_issue_number): + repo_name, original_issue_number = self._parse_issue_url(issue_url) try: issue = self.repo_obj.get_issue(original_issue_number) issue.create_comment(similar_issues_str) @@ -477,14 +478,15 @@ def get_issue_body(self, issue): def get_issue_number(self, issue): return issue.number - def get_issues_comments(self, workspace_slug, repo_name, original_issue_number): + def get_issues_comments(self, issue): + repo_name, original_issue_number = self._parse_issue_url(issue) issue = self.repo_obj.get_issue(original_issue_number) return list(issue.get_comments()) def get_issue_body(self, issue): return issue.body - def get_username(self, issue, workspace_slug): + def get_username(self, issue, issue_url): return issue.user.login def get_issue_created_at(self, issue): @@ -493,10 +495,13 @@ def get_issue_created_at(self, issue): def get_issue_comment_body(self, comment): return comment.body - def get_issue(self, workspace_slug, repo_name, original_issue_number): - return self.repo_obj.get_issue(int(original_issue_number)) + def get_issue(self, issue_url): + repo_name, original_issue_number = self._parse_issue_url(issue_url) + issue = self.repo_obj.get_issue(original_issue_number) + return issue, original_issue_number - def get_repo_obj(self, workspace_slug, repo_name): + def get_repo_obj_parse_issue_url(self, issue_url): + repo_name, original_issue_number = self._parse_issue_url(issue_url) return self.github_client.get_repo(repo_name) def get_repo_name_for_indexing(self, repo_obj): @@ -507,9 +512,15 @@ def check_if_issue_pull_request(self, issue): return True return False - def get_issue_numbers(self, issues_list): - return str([issue.number for issue in issues_list]) - def get_issue_numbers_from_list(self, r): return int(r.split('.')[0].split('_')[-1]) + + def get_similar_issues(self, issue_url, issue_number_similar): + repo_name, original_issue_number = self._parse_issue_url(issue_url) issue = self.github_client.get_repo(repo_name).get_issue(issue_number_similar) + return issue + + def get_main_issue(self, issue_url): + repo_name, original_issue_number = self._parse_issue_url(issue_url) + issue = self.github_client.get_repo(repo_name).get_issue(original_issue_number) + return issue diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index 1b0972ac9..c984de93a 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -24,9 +24,7 @@ def __init__(self, issue_url: str, args: list = None): self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan self.issue_url = issue_url self.git_provider = get_git_provider()() - self.workspace_slug, self.repo_name, self.issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1]) - self.git_provider.repo = self.repo_name - self.git_provider.repo_obj = self.git_provider.get_repo_obj(self.workspace_slug, self.repo_name) + self.git_provider.repo_obj = self.git_provider.get_repo_obj_parse_issue_url(issue_url.split('=')[-1]) self.token_handler = TokenHandler() repo_obj = self.git_provider.repo_obj repo_name_for_index = self.repo_name_for_index = self.git_provider.get_repo_name_for_indexing(repo_obj) @@ -38,17 +36,16 @@ def __init__(self, issue_url: str, args: list = None): environment = get_settings().pinecone.environment except Exception: if not self.cli_mode: - workspace_slug, repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) - issue_main = self.git_provider.get_issue(workspace_slug, repo_name, original_issue_number) + issue_main = self.git_provider.get_main_issue(self.issue_url.split('=')[-1]) issue_main.create_comment("Please set pinecone api key and environment in secrets file") raise Exception("Please set pinecone api key and environment in secrets file") # check if index exists, and if repo is already indexed - run_from_scratch = False + run_from_scratch = True if run_from_scratch: # for debugging - if not index_name in pinecone.list_indexes(): + pinecone.init(api_key=api_key, environment=environment) + if index_name in pinecone.list_indexes(): get_logger().info('Removing index...') - pinecone.init(api_key=api_key, environment=environment) pinecone.delete_index(index_name) get_logger().info('Done') @@ -106,8 +103,7 @@ def __init__(self, issue_url: str, args: list = None): async def run(self): get_logger().info('Getting issue...') - workspace_slug, repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) - issue_main = self.git_provider.get_issue(workspace_slug, repo_name, original_issue_number) + issue_main, original_issue_number = self.git_provider.get_issue(self.issue_url.split('=')[-1]) issue_str, comments, number = self._process_issue(issue_main) openai.api_key = get_settings().openai.key get_logger().info('Done') @@ -130,7 +126,7 @@ async def run(self): try: issue_id= r['id'] - issue_number = self.git_provider.get_issue_numbers_from_list(issue_id) + issue_number = int(issue_id.split('.')[0].split('_')[-1]) except: get_logger().debug(f"Failed to parse issue number from {r['id']}") continue @@ -148,12 +144,12 @@ async def run(self): get_logger().info('Publishing response...') similar_issues_str = "### Similar Issues\n___\n\n" for i, issue_number_similar in enumerate(relevant_issues_number_list): - issue = self.git_provider.get_issue(workspace_slug, repo_name, issue_number_similar) + issue = self.git_provider.get_similar_issues(self.issue_url.split('=')[-1], issue_number_similar) title = issue.title url = self.git_provider.get_issue_url(issue) similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n" if get_settings().config.publish_output: - response = self.git_provider.create_issue_comment(similar_issues_str, workspace_slug, repo_name, original_issue_number) + response = self.git_provider.create_issue_comment(similar_issues_str, self.issue_url.split('=')[-1], original_issue_number) get_logger().info(similar_issues_str) get_logger().info('Done') @@ -164,16 +160,15 @@ def _process_issue(self, issue): if get_settings().pr_similar_issue.skip_comments: comments = [] else: - comments = self.git_provider.get_issues_comments(self.workspace_slug, self.repo_name, self.issue_number) + comments = self.git_provider.get_issues_comments(self.issue_url.split('=')[-1]) issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}" return issue_str, comments, number def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=False): get_logger().info('Processing issues...') corpus = Corpus() - issues = self.git_provider.get_issue_numbers(issues_list) example_issue_record = Record( - id=str(issues), + id=f"example_issue_{repo_name_for_index}", text="example_issue", metadata=Metadata(repo=repo_name_for_index) ) @@ -195,7 +190,7 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal issue_str, comments, number = self._process_issue(issue) issue_key = f"issue_{number}" - username = self.git_provider.get_username(issue, self.workspace_slug) + username = self.git_provider.get_username(issue, self.issue_url.split('=')[-1]) created_at = self.git_provider.get_issue_created_at(issue) if len(issue_str) < 8000 or \ self.token_handler.count_tokens(issue_str) < MAX_TOKENS[MODEL]: # fast reject first From cb57bdf6ddca2f5783aad1b036fdaa39cb5e199a Mon Sep 17 00:00:00 2001 From: sarbjitgrewal Date: Mon, 30 Oct 2023 15:34:49 +0530 Subject: [PATCH 7/8] fix issues --- pr_agent/git_providers/github_provider.py | 3 ++- pr_agent/tools/pr_similar_issue.py | 8 +++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 9b7910756..4f697224a 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -516,7 +516,8 @@ def get_issue_numbers_from_list(self, r): return int(r.split('.')[0].split('_')[-1]) def get_similar_issues(self, issue_url, issue_number_similar): - repo_name, original_issue_number = self._parse_issue_url(issue_url) issue = self.github_client.get_repo(repo_name).get_issue(issue_number_similar) + repo_name, original_issue_number = self._parse_issue_url(issue_url) + issue = self.github_client.get_repo(repo_name).get_issue(issue_number_similar) return issue def get_main_issue(self, issue_url): diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index c984de93a..e2c8be4b9 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -41,7 +41,7 @@ def __init__(self, issue_url: str, args: list = None): raise Exception("Please set pinecone api key and environment in secrets file") # check if index exists, and if repo is already indexed - run_from_scratch = True + run_from_scratch = False if run_from_scratch: # for debugging pinecone.init(api_key=api_key, environment=environment) if index_name in pinecone.list_indexes(): @@ -73,7 +73,6 @@ def __init__(self, issue_url: str, args: list = None): else: # update index if needed pinecone_index = pinecone.Index(index_name=index_name) issues_to_update = [] - issues_paginated_list = [] issues_paginated_list = self.git_provider.get_repo_issues(repo_obj) counter = 1 for issue in issues_paginated_list: @@ -81,7 +80,7 @@ def __init__(self, issue_url: str, args: list = None): if issue_pull_request: continue issue_str, comments, number = self._process_issue(issue) - issue_key = f"issue_{number}" + issue_key = f"issue_{number}" id = issue_key + "." + "issue" res = pinecone_index.fetch([id]).to_dict() is_new_issue = True @@ -125,8 +124,7 @@ async def run(self): continue try: - issue_id= r['id'] - issue_number = int(issue_id.split('.')[0].split('_')[-1]) + issue_number = int(r["id"].split('.')[0].split('_')[-1]) except: get_logger().debug(f"Failed to parse issue number from {r['id']}") continue From f995b3e81e41e98c2efb6e287642219f0e9fd414 Mon Sep 17 00:00:00 2001 From: sarbjitgrewal Date: Mon, 30 Oct 2023 17:01:37 +0530 Subject: [PATCH 8/8] update function names accordingly --- pr_agent/git_providers/bitbucket_provider.py | 13 ++++++------- pr_agent/git_providers/github_provider.py | 14 +++++--------- pr_agent/tools/pr_similar_issue.py | 12 +++++++----- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index 30e6f5ba0..e99417c97 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -248,8 +248,7 @@ def _parse_issue_url(issue_url: str) -> Tuple[str, int]: try: issue_number = int(path_parts[3]) except ValueError as e: - raise ValueError("Unable to convert issue number to integer") from e - + raise ValueError("Unable to convert issue number to integer") from e return workspace_slug, repo_slug, issue_number @@ -320,7 +319,7 @@ def get_repo_issues(self, repo_obj): return repo_obj._Repository__issues.each() - def get_issues_comments(self, issue): + def parse_issue_url_and_get_comments(self, issue): workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue) import requests @@ -332,7 +331,7 @@ def get_issues_comments(self, issue): response = requests.request("GET", url, headers=headers, data=payload) return response.json()['values'] - def create_issue_comment(self, similar_issues_str, issue_url, original_issue_number): + def parse_issue_url_and_create_comment(self, similar_issues_str, issue_url, original_issue_number): workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue_url) url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments" payload = json.dumps({ @@ -347,7 +346,7 @@ def create_issue_comment(self, similar_issues_str, issue_url, original_issue_num response = requests.request("POST", url, headers=headers, data=payload) - def get_repo_obj_parse_issue_url(self, issue_url): + def parse_issue_url_and_get_repo_obj(self, issue_url): workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue_url) return self.bitbucket_client.repositories.get(workspace_slug, repo_name) @@ -364,12 +363,12 @@ def get_issue_numbers_from_list(self, issues): for issue_number in int_list: return issue_number - def get_similar_issues(self, issue_url, issue_number_similar): + def parse_issue_url_and_get_similar_issues(self, issue_url, issue_number_similar): workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue_url) issue = self.bitbucket_client.repositories.get(workspace_slug, repo_name).issues.get(issue_number_similar) return issue - def get_main_issue(self, issue_url): + def parse_issue_url_and_get_main_issue(self, issue_url): workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue_url) issue = self.bitbucket_client.repositories.get(workspace_slug, repo_name).issues.get(original_issue_number) return issue diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 4f697224a..1af01bb0c 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -457,14 +457,10 @@ def get_pr_id(self): def get_repo_issues(self, repo_obj): return list(repo_obj.get_issues(state='all')) - def get_issues_comments(self, issue): - repo_name, original_issue_number = self._parse_issue_url(issue) - return self.repo_obj.get_issue(original_issue_number) - def get_issue_url(self, issue): return issue.html_url - def create_issue_comment(self, similar_issues_str, issue_url, original_issue_number): + def parse_issue_url_and_create_comment(self, similar_issues_str, issue_url, original_issue_number): repo_name, original_issue_number = self._parse_issue_url(issue_url) try: issue = self.repo_obj.get_issue(original_issue_number) @@ -478,7 +474,7 @@ def get_issue_body(self, issue): def get_issue_number(self, issue): return issue.number - def get_issues_comments(self, issue): + def parse_issue_url_and_get_comments(self, issue): repo_name, original_issue_number = self._parse_issue_url(issue) issue = self.repo_obj.get_issue(original_issue_number) return list(issue.get_comments()) @@ -500,7 +496,7 @@ def get_issue(self, issue_url): issue = self.repo_obj.get_issue(original_issue_number) return issue, original_issue_number - def get_repo_obj_parse_issue_url(self, issue_url): + def parse_issue_url_and_get_repo_obj(self, issue_url): repo_name, original_issue_number = self._parse_issue_url(issue_url) return self.github_client.get_repo(repo_name) @@ -515,12 +511,12 @@ def check_if_issue_pull_request(self, issue): def get_issue_numbers_from_list(self, r): return int(r.split('.')[0].split('_')[-1]) - def get_similar_issues(self, issue_url, issue_number_similar): + def parse_issue_url_and_get_similar_issues(self, issue_url, issue_number_similar): repo_name, original_issue_number = self._parse_issue_url(issue_url) issue = self.github_client.get_repo(repo_name).get_issue(issue_number_similar) return issue - def get_main_issue(self, issue_url): + def parse_issue_url_and_get_main_issue(self, issue_url): repo_name, original_issue_number = self._parse_issue_url(issue_url) issue = self.github_client.get_repo(repo_name).get_issue(original_issue_number) return issue diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index e2c8be4b9..e260f3d0e 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -24,7 +24,7 @@ def __init__(self, issue_url: str, args: list = None): self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan self.issue_url = issue_url self.git_provider = get_git_provider()() - self.git_provider.repo_obj = self.git_provider.get_repo_obj_parse_issue_url(issue_url.split('=')[-1]) + self.git_provider.repo_obj = self.git_provider.parse_issue_url_and_get_repo_obj(issue_url.split('=')[-1]) self.token_handler = TokenHandler() repo_obj = self.git_provider.repo_obj repo_name_for_index = self.repo_name_for_index = self.git_provider.get_repo_name_for_indexing(repo_obj) @@ -36,7 +36,7 @@ def __init__(self, issue_url: str, args: list = None): environment = get_settings().pinecone.environment except Exception: if not self.cli_mode: - issue_main = self.git_provider.get_main_issue(self.issue_url.split('=')[-1]) + issue_main = self.git_provider.parse_issue_url_and_get_issue(self.issue_url.split('=')[-1]) issue_main.create_comment("Please set pinecone api key and environment in secrets file") raise Exception("Please set pinecone api key and environment in secrets file") @@ -128,6 +128,7 @@ async def run(self): except: get_logger().debug(f"Failed to parse issue number from {r['id']}") continue + if original_issue_number == issue_number: continue if issue_number not in relevant_issues_number_list: @@ -142,12 +143,12 @@ async def run(self): get_logger().info('Publishing response...') similar_issues_str = "### Similar Issues\n___\n\n" for i, issue_number_similar in enumerate(relevant_issues_number_list): - issue = self.git_provider.get_similar_issues(self.issue_url.split('=')[-1], issue_number_similar) + issue = self.git_provider.parse_issue_url_and_get_similar_issues(self.issue_url.split('=')[-1], issue_number_similar) title = issue.title url = self.git_provider.get_issue_url(issue) similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n" if get_settings().config.publish_output: - response = self.git_provider.create_issue_comment(similar_issues_str, self.issue_url.split('=')[-1], original_issue_number) + response = self.git_provider.parse_issue_url_and_create_comment(similar_issues_str, self.issue_url.split('=')[-1], original_issue_number) get_logger().info(similar_issues_str) get_logger().info('Done') @@ -158,7 +159,8 @@ def _process_issue(self, issue): if get_settings().pr_similar_issue.skip_comments: comments = [] else: - comments = self.git_provider.get_issues_comments(self.issue_url.split('=')[-1]) + comments = self.git_provider.parse_issue_url_and_get_comments(self.issue_url.split('=')[-1]) + print('comments: ', comments) issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}" return issue_str, comments, number