diff --git a/README.md b/README.md index 26cb77d10..7ac2b3f18 100644 --- a/README.md +++ b/README.md @@ -113,13 +113,13 @@ See the [Tools Guide](./docs/TOOLS_GUIDE.md) for detailed description of the dif | | ⮑ Extended | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: | | | Reflect and Review | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: | :white_check_mark: | | | Update CHANGELOG.md | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | -| | Find similar issue | :white_check_mark: | | | | | | +| | Find similar issue | :white_check_mark: | | :white_check_mark: | | | | | | Add Documentation | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: | | | | | | | | | | USAGE | CLI | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | App / webhook | :white_check_mark: | :white_check_mark: | | | | | | Tagging bot | :white_check_mark: | | | | | -| | Actions | :white_check_mark: | | | | | +| | Actions | :white_check_mark: | | :white_check_mark: | | | | | Web server | | | | | | :white_check_mark: | | | | | | | | | | CORE | PR compression | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index fef51794b..e99417c97 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -10,12 +10,12 @@ from ..config_loader import get_settings from ..log import get_logger from .git_provider import FilePatchInfo, GitProvider +import ast class BitbucketProvider(GitProvider): def __init__( - self, pr_url: Optional[str] = None, incremental: Optional[bool] = False - ): + self, pr_url: Optional[str] = None, incremental: Optional[bool] = False): s = requests.Session() try: bearer = context.get("bitbucket_bearer_token", None) @@ -32,12 +32,15 @@ def __init__( self.repo = None self.pr_num = None self.pr = None + self.feature = None + self.issue_num = None + self.issue_name = None self.temp_comments = [] self.incremental = incremental - if pr_url: + if pr_url and 'pull' in pr_url: self.set_pr(pr_url) - self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"] - self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data["links"]['self']['href'] + self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"] + self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data["links"]['self']['href'] def get_repo_settings(self): try: @@ -228,6 +231,26 @@ def _parse_pr_url(pr_url: str) -> Tuple[str, int]: raise ValueError("Unable to convert PR number to integer") from e return workspace_slug, repo_slug, pr_number + + @staticmethod + def _parse_issue_url(issue_url: str) -> Tuple[str, int]: + parsed_url = urlparse(issue_url) + + if "bitbucket.org" not in parsed_url.netloc: + raise ValueError("The provided URL is not a valid Bitbucket URL") + + path_parts = parsed_url.path.strip('/').split('/') + if len(path_parts) < 5 or path_parts[2] != "issues": + raise ValueError("The provided URL does not appear to be a Bitbucket issue URL") + + workspace_slug = path_parts[0] + repo_slug = path_parts[1] + try: + issue_number = int(path_parts[3]) + except ValueError as e: + raise ValueError("Unable to convert issue number to integer") from e + return workspace_slug, repo_slug, issue_number + def _get_repo(self): if self.repo is None: @@ -263,3 +286,89 @@ def publish_labels(self, pr_types: list): # bitbucket does not support labels def get_labels(self): pass + + def get_issue(self, issue_url): + workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue_url) + issue = self.bitbucket_client.repositories.get(workspace_slug, repo_name).issues.get(original_issue_number) + return issue, original_issue_number + + def get_issue_url(self, issue): + return issue._BitbucketBase__data['links']['html']['href'] + + def get_issue_body(self, issue): + return issue.content['raw'] + + def get_issue_number(self, issue): + return issue.id + + def get_issue_comment_body(self, comment): + return comment['content']['raw'] + + def get_issue_comment_user(self, comment): + return comment['user']['display_name'] + + def get_issue_created_at(self, issue): + return str(issue.created_on) + + def get_username(self, issue, issue_url): + workspace_slug, repo_name, original_issue_numbers = self._parse_issue_url(issue_url) + return workspace_slug + + + def get_repo_issues(self, repo_obj): + return repo_obj._Repository__issues.each() + + + def parse_issue_url_and_get_comments(self, issue): + workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue) + import requests + + url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments" + + payload = {} + headers = {} + + response = requests.request("GET", url, headers=headers, data=payload) + return response.json()['values'] + + def parse_issue_url_and_create_comment(self, similar_issues_str, issue_url, original_issue_number): + workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue_url) + url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments" + payload = json.dumps({ + "content": { + "raw": similar_issues_str + } + }) + headers = { + 'Authorization': f'Bearer {get_settings().get("BITBUCKET.BEARER_TOKEN", None)}', + 'Content-Type': 'application/json' + } + + response = requests.request("POST", url, headers=headers, data=payload) + + def parse_issue_url_and_get_repo_obj(self, issue_url): + workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue_url) + return self.bitbucket_client.repositories.get(workspace_slug, repo_name) + + def get_repo_name_for_indexing(self, repo_obj): + return repo_obj._BitbucketBase__data['full_name'].lower().replace('/', '-').replace('_/', '-') + + def check_if_issue_pull_request(self, issue): + return False + + def get_issue_numbers_from_list(self, issues): + # convert str to list' + int_list = ast.literal_eval(issues) + int_list = [int(x) for x in int_list] + for issue_number in int_list: + return issue_number + + def parse_issue_url_and_get_similar_issues(self, issue_url, issue_number_similar): + workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue_url) + issue = self.bitbucket_client.repositories.get(workspace_slug, repo_name).issues.get(issue_number_similar) + return issue + + def parse_issue_url_and_get_main_issue(self, issue_url): + workspace_slug, repo_name, original_issue_number = self._parse_issue_url(issue_url) + issue = self.bitbucket_client.repositories.get(workspace_slug, repo_name).issues.get(original_issue_number) + return issue diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 7a47fbf54..1af01bb0c 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -241,7 +241,7 @@ def get_user_id(self): self.github_user_id = self.github_client.get_user().raw_data['login'] except Exception as e: self.github_user_id = "" - # logging.exception(f"Failed to get user id, error: {e}") + # get_logger().exception(f"Failed to get user id, error: {e}") return self.github_user_id def get_notifications(self, since: datetime): @@ -335,7 +335,7 @@ def _parse_issue_url(issue_url: str) -> Tuple[str, int]: issue_number = int(path_parts[3]) except ValueError as e: raise ValueError("Unable to convert issue number to integer") from e - + return repo_name, issue_number def _get_github_client(self): @@ -453,3 +453,71 @@ def get_pr_id(self): return pr_id except: return "" + + def get_repo_issues(self, repo_obj): + return list(repo_obj.get_issues(state='all')) + + def get_issue_url(self, issue): + return issue.html_url + + def parse_issue_url_and_create_comment(self, similar_issues_str, issue_url, original_issue_number): + repo_name, original_issue_number = self._parse_issue_url(issue_url) + try: + issue = self.repo_obj.get_issue(original_issue_number) + issue.create_comment(similar_issues_str) + except Exception as e: + get_logger().exception(f"Failed to create issue comment, error: {e}") + + def get_issue_body(self, issue): + return issue.body + + def get_issue_number(self, issue): + return issue.number + + def parse_issue_url_and_get_comments(self, issue): + repo_name, original_issue_number = self._parse_issue_url(issue) + issue = self.repo_obj.get_issue(original_issue_number) + return list(issue.get_comments()) + + def get_issue_body(self, issue): + return issue.body + + def get_username(self, issue, issue_url): + return issue.user.login + + def get_issue_created_at(self, issue): + return str(issue.created_at) + + def get_issue_comment_body(self, comment): + return comment.body + + def get_issue(self, issue_url): + repo_name, original_issue_number = self._parse_issue_url(issue_url) + issue = self.repo_obj.get_issue(original_issue_number) + return issue, original_issue_number + + def parse_issue_url_and_get_repo_obj(self, issue_url): + repo_name, original_issue_number = self._parse_issue_url(issue_url) + return self.github_client.get_repo(repo_name) + + def get_repo_name_for_indexing(self, repo_obj): + return repo_obj.full_name.lower().replace('/', '-').replace('_/', '-') + + def check_if_issue_pull_request(self, issue): + if issue.pull_request: + return True + return False + + def get_issue_numbers_from_list(self, r): + return int(r.split('.')[0].split('_')[-1]) + + def parse_issue_url_and_get_similar_issues(self, issue_url, issue_number_similar): + repo_name, original_issue_number = self._parse_issue_url(issue_url) + issue = self.github_client.get_repo(repo_name).get_issue(issue_number_similar) + return issue + + def parse_issue_url_and_get_main_issue(self, issue_url): + repo_name, original_issue_number = self._parse_issue_url(issue_url) + issue = self.github_client.get_repo(repo_name).get_issue(original_issue_number) + return issue + diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index c3a0793bc..e260f3d0e 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -19,19 +19,15 @@ class PRSimilarIssue: def __init__(self, issue_url: str, args: list = None): - if get_settings().config.git_provider != "github": - raise Exception("Only github is supported for similar issue tool") self.cli_mode = get_settings().CONFIG.CLI_MODE self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan self.issue_url = issue_url self.git_provider = get_git_provider()() - repo_name, issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1]) - self.git_provider.repo = repo_name - self.git_provider.repo_obj = self.git_provider.github_client.get_repo(repo_name) + self.git_provider.repo_obj = self.git_provider.parse_issue_url_and_get_repo_obj(issue_url.split('=')[-1]) self.token_handler = TokenHandler() repo_obj = self.git_provider.repo_obj - repo_name_for_index = self.repo_name_for_index = repo_obj.full_name.lower().replace('/', '-').replace('_/', '-') + repo_name_for_index = self.repo_name_for_index = self.git_provider.get_repo_name_for_indexing(repo_obj) index_name = self.index_name = "codium-ai-pr-agent-issues" # assuming pinecone api key and environment are set in secrets file @@ -40,8 +36,7 @@ def __init__(self, issue_url: str, args: list = None): environment = get_settings().pinecone.environment except Exception: if not self.cli_mode: - repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) - issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) + issue_main = self.git_provider.parse_issue_url_and_get_issue(self.issue_url.split('=')[-1]) issue_main.create_comment("Please set pinecone api key and environment in secrets file") raise Exception("Please set pinecone api key and environment in secrets file") @@ -72,16 +67,17 @@ def __init__(self, issue_url: str, args: list = None): get_logger().info('Indexing the entire repo...') get_logger().info('Getting issues...') - issues = list(repo_obj.get_issues(state='all')) + issues = self.git_provider.get_repo_issues(repo_obj) get_logger().info('Done') self._update_index_with_issues(issues, repo_name_for_index, upsert=upsert) else: # update index if needed pinecone_index = pinecone.Index(index_name=index_name) issues_to_update = [] - issues_paginated_list = repo_obj.get_issues(state='all') + issues_paginated_list = self.git_provider.get_repo_issues(repo_obj) counter = 1 for issue in issues_paginated_list: - if issue.pull_request: + issue_pull_request = self.git_provider.check_if_issue_pull_request(issue) + if issue_pull_request: continue issue_str, comments, number = self._process_issue(issue) issue_key = f"issue_{number}" @@ -106,8 +102,7 @@ def __init__(self, issue_url: str, args: list = None): async def run(self): get_logger().info('Getting issue...') - repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) - issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) + issue_main, original_issue_number = self.git_provider.get_issue(self.issue_url.split('=')[-1]) issue_str, comments, number = self._process_issue(issue_main) openai.api_key = get_settings().openai.key get_logger().info('Done') @@ -148,25 +143,24 @@ async def run(self): get_logger().info('Publishing response...') similar_issues_str = "### Similar Issues\n___\n\n" for i, issue_number_similar in enumerate(relevant_issues_number_list): - issue = self.git_provider.repo_obj.get_issue(issue_number_similar) + issue = self.git_provider.parse_issue_url_and_get_similar_issues(self.issue_url.split('=')[-1], issue_number_similar) title = issue.title - url = issue.html_url - if relevant_comment_number_list[i] != -1: - url = list(issue.get_comments())[relevant_comment_number_list[i]].html_url + url = self.git_provider.get_issue_url(issue) similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n" if get_settings().config.publish_output: - response = issue_main.create_comment(similar_issues_str) + response = self.git_provider.parse_issue_url_and_create_comment(similar_issues_str, self.issue_url.split('=')[-1], original_issue_number) get_logger().info(similar_issues_str) get_logger().info('Done') def _process_issue(self, issue): header = issue.title - body = issue.body - number = issue.number + body = self.git_provider.get_issue_body(issue) + number = self.git_provider.get_issue_number(issue) if get_settings().pr_similar_issue.skip_comments: comments = [] else: - comments = list(issue.get_comments()) + comments = self.git_provider.parse_issue_url_and_get_comments(self.issue_url.split('=')[-1]) + print('comments: ', comments) issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}" return issue_str, comments, number @@ -182,7 +176,9 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal counter = 0 for issue in issues_list: - if issue.pull_request: + + issue_pull_request = self.git_provider.check_if_issue_pull_request(issue) + if issue_pull_request: continue counter += 1 @@ -194,8 +190,8 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal issue_str, comments, number = self._process_issue(issue) issue_key = f"issue_{number}" - username = issue.user.login - created_at = str(issue.created_at) + username = self.git_provider.get_username(issue, self.issue_url.split('=')[-1]) + created_at = self.git_provider.get_issue_created_at(issue) if len(issue_str) < 8000 or \ self.token_handler.count_tokens(issue_str) < MAX_TOKENS[MODEL]: # fast reject first issue_record = Record( @@ -209,7 +205,7 @@ def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=Fal corpus.append(issue_record) if comments: for j, comment in enumerate(comments): - comment_body = comment.body + comment_body = self.git_provider.get_issue_comment_body(comment) num_words_comment = len(comment_body.split()) if num_words_comment < 10 or not isinstance(comment_body, str): continue