From ef742e0204228c05f1408580bdf6dda4908811b1 Mon Sep 17 00:00:00 2001 From: Samuel Aktar Laskar Date: Sat, 30 Mar 2024 00:51:17 +0530 Subject: [PATCH 1/4] Scaling points for PRs based on size of PR (#219) --- lib/api.ts | 63 ++++++++++++++++++++++++++++--------------- lib/types.ts | 1 + scraper/src/github.py | 11 ++++++++ 3 files changed, 54 insertions(+), 21 deletions(-) diff --git a/lib/api.ts b/lib/api.ts index 7f501f2e..2d4fc904 100644 --- a/lib/api.ts +++ b/lib/api.ts @@ -15,6 +15,7 @@ const points = { issue_opened: 4, eod_update: 2, pr_opened: 1, + no_of_linked_issues: 1, pr_merged: 7, pr_collaborated: 2, issue_closed: 0, @@ -24,6 +25,7 @@ const points = { // Reviewing a PR would get 4 points // Finding a bug would add up to 4 points // Opening a PR would give a single point and merging it would give you the other 7 points, making 8 per PR +// For each linked issue in pr body, points would increase by 1. // Updating the EOD would get 2 points per day and additional 20 for regular daily updates plus 10 for just missing one function formatSlug(slug: string) { @@ -115,9 +117,17 @@ export async function getContributorBySlug(file: string, detail = false) { return { activity: [ ...acc.activity, - { ...activity, points: points[activity.type] || 0 }, + { + ...activity, + points: + (points[activity.type] || 0) + + (activity.no_of_linked_issues || 0), + }, ], - points: acc.points + (points[activity.type] || 0), + points: + acc.points + + (points[activity.type] || 0) + + (activity.no_of_linked_issues || 0), comment_created: acc.comment_created + (activity.type === "comment_created" ? 1 : 0), eod_update: acc.eod_update + (activity.type === "eod_update" ? 1 : 0), @@ -218,6 +228,16 @@ function getCalendarData(activity: Activity[]) { } else { acc[date][activity.type] = 1; } + if (!acc[date]["points"]) { + acc[date]["points"] = points[activity.type]; + } else { + acc[date]["points"] += points[activity.type]; + if (activity.type === "pr_opened") { + acc[date]["points"] += + (activity.no_of_linked_issues ?? 0) * points["no_of_linked_issues"]; + } + } + if (!acc[date].types.includes(activity.type)) { acc[date].types.push(activity.type); // console.log(activity.type); @@ -248,29 +268,30 @@ function getCalendarData(activity: Activity[]) { }); } -const HIGHLIGHT_KEYS = [ - "eod_update", - "comment_created", - "pr_opened", - "pr_reviewed", - "pr_merged", - "pr_collaborated", - "issue_assigned", - "issue_opened", -] as const; +// const HIGHLIGHT_KEYS = [ +// "eod_update", +// "comment_created", +// "pr_opened", +// "pr_reviewed", +// "pr_merged", +// "pr_collaborated", +// "issue_assigned", +// "issue_opened", +// ] as const; -const computePoints = ( - calendarDataEntry: Highlights, - initialPoints: number, -) => { - return HIGHLIGHT_KEYS.map( - (key) => points[key] * (calendarDataEntry[key] ?? 0), - ).reduce((a, b) => a + b, initialPoints); -}; +// const computePoints = ( +// calendarDataEntry: Highlights, +// initialPoints: number, +// ) => { +// return HIGHLIGHT_KEYS.map( +// (key) => points[key] * (calendarDataEntry[key] ?? 0), +// ).reduce((a, b) => a + b, initialPoints); +// }; const HighlightsReducer = (acc: Highlights, day: Highlights) => { return { - points: computePoints(day, acc.points), + // points: computePoints(day, acc.points), + points: acc.points + (day.points ?? 0), eod_update: acc.eod_update + (day.eod_update ?? 0), comment_created: acc.comment_created + (day.comment_created ?? 0), pr_opened: acc.pr_opened + (day.pr_opened ?? 0), diff --git a/lib/types.ts b/lib/types.ts index d5cabe34..0566d36d 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -76,6 +76,7 @@ export interface Activity { text: string; collaborated_with?: string[]; turnaround_time?: number; + no_of_linked_issues?: number; } export interface OpenPr { diff --git a/scraper/src/github.py b/scraper/src/github.py index 797a5899..20857837 100755 --- a/scraper/src/github.py +++ b/scraper/src/github.py @@ -71,6 +71,14 @@ def append(self, user, event): "authored_issue_and_pr": [], } + def parse_linked_issues(self, pr_body): + if isinstance(pr_body, str): + pattern = r"#(\d+)" + matches = re.findall(pattern, pr_body) + return len(set(matches)) + else: + return 0 + def parse_event(self, event, event_time): user = event["actor"]["login"] try: @@ -116,6 +124,8 @@ def parse_event(self, event, event_time): ) elif event["type"] == "PullRequestEvent": + pr_body = event["payload"]["pull_request"]["body"] + no_of_linked_issues = self.parse_linked_issues(pr_body) if event["payload"]["action"] == "opened": self.append( user, @@ -125,6 +135,7 @@ def parse_event(self, event, event_time): "time": event_time, "link": event["payload"]["pull_request"]["html_url"], "text": event["payload"]["pull_request"]["title"], + "no_of_linked_issues" : no_of_linked_issues }, ) From 162cb5eafcb4d82d28f8ef301ddea3434bae5cc1 Mon Sep 17 00:00:00 2001 From: Samuel Aktar Laskar Date: Mon, 1 Apr 2024 14:57:27 +0530 Subject: [PATCH 2/4] Updated the scraper to find linked issue to PR --- scraper/src/github.py | 17 +++-- scraper/src/linked_issue_parser.py | 113 +++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 5 deletions(-) create mode 100644 scraper/src/linked_issue_parser.py diff --git a/scraper/src/github.py b/scraper/src/github.py index 20857837..94f433d0 100755 --- a/scraper/src/github.py +++ b/scraper/src/github.py @@ -9,7 +9,7 @@ from pathlib import Path from urllib.parse import parse_qsl, urlparse from zoneinfo import ZoneInfo - +from linked_issue_parser import LinkedIssueParser import requests logging.basicConfig( @@ -124,8 +124,6 @@ def parse_event(self, event, event_time): ) elif event["type"] == "PullRequestEvent": - pr_body = event["payload"]["pull_request"]["body"] - no_of_linked_issues = self.parse_linked_issues(pr_body) if event["payload"]["action"] == "opened": self.append( user, @@ -134,8 +132,7 @@ def parse_event(self, event, event_time): "title": f'{event["repo"]["name"]}#{event["payload"]["pull_request"]["number"]}', "time": event_time, "link": event["payload"]["pull_request"]["html_url"], - "text": event["payload"]["pull_request"]["title"], - "no_of_linked_issues" : no_of_linked_issues + "text": event["payload"]["pull_request"]["title"] }, ) @@ -144,6 +141,15 @@ def parse_event(self, event, event_time): and event["payload"]["pull_request"]["merged"] ): turnaround_time = self.caclculate_turnaround_time(event) + pr_body = event["payload"]["pull_request"]["body"] + repo = event["repo"]["name"] + parts = repo.split('/') + org_name = parts[0] + repo_name = parts[1] + pr_no = event['payload']['pull_request']['number'] + linked_issue_parser = LinkedIssueParser(org=org_name,repo=repo_name,pr_no=pr_no,pr_body=pr_body) + linked_issues = linked_issue_parser.parse_linked_issues() + self.log.debug(f'linked_issues for pr {pr_no} are {linked_issues}') self.append( event["payload"]["pull_request"]["user"]["login"], { @@ -153,6 +159,7 @@ def parse_event(self, event, event_time): "link": event["payload"]["pull_request"]["html_url"], "text": event["payload"]["pull_request"]["title"], "turnaround_time": turnaround_time, + "linked_issues" : linked_issues }, ) diff --git a/scraper/src/linked_issue_parser.py b/scraper/src/linked_issue_parser.py new file mode 100644 index 00000000..56b34036 --- /dev/null +++ b/scraper/src/linked_issue_parser.py @@ -0,0 +1,113 @@ +import re +from os import getenv +import requests + +class LinkedIssueParser: + def __init__(self,org:str, repo:str, pr_no:int, pr_body: str): + self.repo = repo + self.pr_no = pr_no + self.pr_body = pr_body + self.org = org + + # The logic here is as follows: + # Get a list of all events on a Pull Request of the type CONNECTED_EVENT and DISCONNECTED_EVENT + # Create a map, keyed by Issue number and keep a count of how may times the issue is CONNECTED and DISCONNECTED + # From that map, look for keys that have an odd-numbered count, as these are the events that have been CONNECTED that don't have a corresponding DISCONNECTED event. + + def parse_ui_linked_issues(self): + query = """ + {{ + resource(url: "https://github.com/{org}/{repo}/pull/{pr_no}") {{ + ... on PullRequest {{ + timelineItems(itemTypes: [CONNECTED_EVENT, DISCONNECTED_EVENT], first: 100) {{ + nodes {{ + ... on ConnectedEvent {{ + id + subject {{ + ... on Issue {{ + number + }} + }} + }} + ... on DisconnectedEvent {{ + id + subject {{ + ... on Issue {{ + number + }} + }} + }} + }} + }} + }} + }} + }} + """.format(org = self.org, repo = self.repo, pr_no = self.pr_no) + gh_url = 'https://api.github.com/graphql' + token = getenv('GITHUB_TOKEN') + headers = { + 'Authorization': f'Bearer {token}', + 'Content-Type':'application/json' + } + response = requests.post(gh_url,headers=headers,json={'query':query}) + if response.status_code != 200: + return [] + data = response.json() + issues = {} + for node in data['data']['resource']['timelineItems']['nodes']: + issue_number = node['subject']['number'] + if issue_number in issues: + issues[issue_number] +=1 + else: + issues[issue_number] = 1 + + linked_issues = [] + for issue, count in issues.items(): + if count % 2 != 0: + linked_issues.append(f'https://github.com/{self.org}/{self.repo}/issues/{issue}') + return linked_issues + + def get_concat_commit_messages(self): + commit_url = f'https://api.github.com/repos/{self.org}/{self.repo}/pulls/{self.pr_no}/commits' + resposne = requests.get(commit_url) + if resposne.status_code != 200: + return "" + json_data = resposne.json() + result = "" + for commit in json_data: + message = commit['commit']['message'] + result = f'{result} , {message}' + return result + + def parse_desc_linked_issues(self): + pattern_same_repo = r'\b(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s+#(\d+)' + pattern_other_repo = r'\b(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s+(\S+\/\S+)#(\d+)' + commit_messages = self.get_concat_commit_messages() + text = f'{self.pr_body} {commit_messages}' + same_repo_linked_issues = re.findall(pattern_same_repo,text,re.IGNORECASE) + other_repo_linked_issues = re.findall(pattern_other_repo,text,re.IGNORECASE) + linked_issues = set([]) + for issue in same_repo_linked_issues: + linked_issues.add(issue) + for issue in other_repo_linked_issues: + linked_issues.add(issue) + linked_issues_url = [] + for issue in linked_issues: + if isinstance(issue, str): + linked_issues_url.append(f'https://github.com/{self.org}/{self.repo}/issues/{issue}') + elif isinstance(issue, tuple): + linked_issues_url.append(f'https://github.com/{issue[0]}/issues/{issue[1]}') + continue + return linked_issues_url + + + def parse_linked_issues(self): + linked_issues = [] + ui_linked_issues = self.parse_ui_linked_issues() + desc_linked_issues = self.parse_desc_linked_issues() + for issue in ui_linked_issues: + linked_issues.append(issue) + for issue in desc_linked_issues: + linked_issues.append(issue) + return linked_issues + \ No newline at end of file From a19c8beea934eee6ebe9a70036fc5d57640657d2 Mon Sep 17 00:00:00 2001 From: Samuel Aktar Laskar Date: Mon, 1 Apr 2024 16:40:06 +0530 Subject: [PATCH 3/4] Sorting logic updated --- app/api/leaderboard/functions.ts | 1 - lib/api.ts | 69 +++++++++++++++++--------------- lib/types.ts | 5 ++- 3 files changed, 40 insertions(+), 35 deletions(-) diff --git a/app/api/leaderboard/functions.ts b/app/api/leaderboard/functions.ts index bd67f363..c5ae692c 100644 --- a/app/api/leaderboard/functions.ts +++ b/app/api/leaderboard/functions.ts @@ -15,7 +15,6 @@ export const getLeaderboardData = async ( roles: ("core" | "intern" | "operations" | "contributor")[] = [], ) => { const contributors = await getContributors(); - const data = contributors .filter((a) => a.highlights.points) .map((contributor) => ({ diff --git a/lib/api.ts b/lib/api.ts index 642ffbe5..851cc0a8 100644 --- a/lib/api.ts +++ b/lib/api.ts @@ -15,7 +15,6 @@ const points = { issue_opened: 4, eod_update: 2, pr_opened: 1, - no_of_linked_issues: 1, pr_merged: 7, pr_collaborated: 2, issue_closed: 0, @@ -121,13 +120,13 @@ export async function getContributorBySlug(file: string, detail = false) { ...activity, points: (points[activity.type] || 0) + - (activity.no_of_linked_issues || 0), + (activity.linked_issues?.length || 0), }, ], points: acc.points + (points[activity.type] || 0) + - (activity.no_of_linked_issues || 0), + (activity.linked_issues?.length || 0), comment_created: acc.comment_created + (activity.type === "comment_created" ? 1 : 0), eod_update: acc.eod_update + (activity.type === "eod_update" ? 1 : 0), @@ -141,6 +140,8 @@ export async function getContributorBySlug(file: string, detail = false) { acc.issue_assigned + (activity.type === "issue_assigned" ? 1 : 0), issue_opened: acc.issue_opened + (activity.type === "issue_opened" ? 1 : 0), + linked_issues: + acc.linked_issues + (activity.linked_issues?.length || 0), }; }, { @@ -154,6 +155,7 @@ export async function getContributorBySlug(file: string, detail = false) { pr_reviewed: 0, issue_assigned: 0, issue_opened: 0, + linked_issues: 0, } as Highlights & { activity: Activity[] }, ); @@ -191,12 +193,13 @@ export async function getContributorBySlug(file: string, detail = false) { pr_collaborated: weightedActivity.pr_collaborated, issue_assigned: weightedActivity.issue_assigned, issue_opened: weightedActivity.issue_opened, + linked_issues: weightedActivity.linked_issues, }, weekSummary: getLastWeekHighlights(calendarData), summarize, calendarData: detail ? calendarData : [], ...data, - } as Contributor & { summarize: typeof summarize }; + } as unknown as Contributor & { summarize: typeof summarize }; } let contributors: Awaited>[] | null = @@ -228,13 +231,12 @@ function getCalendarData(activity: Activity[]) { } else { acc[date][activity.type] = 1; } - if (!acc[date]["points"]) { - acc[date]["points"] = points[activity.type]; - } else { - acc[date]["points"] += points[activity.type]; - if (activity.type === "pr_opened") { - acc[date]["points"] += - (activity.no_of_linked_issues ?? 0) * points["no_of_linked_issues"]; + + if (activity.type === "pr_merged") { + if (acc[date]["linked_issues"]) { + acc[date]["linked_issues"] += activity.linked_issues?.length || 0; + } else { + acc[date]["linked_issues"] = activity.linked_issues?.length || 0; } } @@ -268,30 +270,31 @@ function getCalendarData(activity: Activity[]) { }); } -// const HIGHLIGHT_KEYS = [ -// "eod_update", -// "comment_created", -// "pr_opened", -// "pr_reviewed", -// "pr_merged", -// "pr_collaborated", -// "issue_assigned", -// "issue_opened", -// ] as const; - -// const computePoints = ( -// calendarDataEntry: Highlights, -// initialPoints: number, -// ) => { -// return HIGHLIGHT_KEYS.map( -// (key) => points[key] * (calendarDataEntry[key] ?? 0), -// ).reduce((a, b) => a + b, initialPoints); -// }; +const HIGHLIGHT_KEYS = [ + "eod_update", + "comment_created", + "pr_opened", + "pr_reviewed", + "pr_merged", + "pr_collaborated", + "issue_assigned", + "issue_opened", +] as const; + +const computePoints = ( + calendarDataEntry: Highlights, + initialPoints: number, +) => { + let totalPoints = HIGHLIGHT_KEYS.map( + (key) => points[key] * (calendarDataEntry[key] ?? 0), + ).reduce((a, b) => a + b, initialPoints); + totalPoints += calendarDataEntry.linked_issues || 0; + return totalPoints; +}; const HighlightsReducer = (acc: Highlights, day: Highlights) => { return { - // points: computePoints(day, acc.points), - points: acc.points + (day.points ?? 0), + points: computePoints(day, acc.points), eod_update: acc.eod_update + (day.eod_update ?? 0), comment_created: acc.comment_created + (day.comment_created ?? 0), pr_opened: acc.pr_opened + (day.pr_opened ?? 0), @@ -300,6 +303,7 @@ const HighlightsReducer = (acc: Highlights, day: Highlights) => { pr_collaborated: acc.pr_collaborated + (day.pr_collaborated ?? 0), issue_assigned: acc.issue_assigned + (day.issue_assigned ?? 0), issue_opened: acc.issue_opened + (day.issue_opened ?? 0), + linked_issues: acc.linked_issues + (day.linked_issues ?? 0), }; }; @@ -313,6 +317,7 @@ const HighlightsInitialValue = { pr_collaborated: 0, issue_assigned: 0, issue_opened: 0, + linked_issues: 0, } as Highlights; const getLastWeekHighlights = (calendarData: Highlights[]) => { diff --git a/lib/types.ts b/lib/types.ts index 0566d36d..0640d5d4 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -21,7 +21,7 @@ export interface Contributor { slack: string; joining_date: string; role: "core" | "intern" | "operations" | "contributor"; - courses_completed: string[]; + courses_completed?: string[]; } export interface ActivityData { @@ -42,6 +42,7 @@ export interface Highlights { pr_collaborated: number; issue_assigned: number; issue_opened: number; + linked_issues: number; } export interface WeekSummary { @@ -76,7 +77,7 @@ export interface Activity { text: string; collaborated_with?: string[]; turnaround_time?: number; - no_of_linked_issues?: number; + linked_issues?: string[]; //issues url, which are linked to merged prs } export interface OpenPr { From 9c4a0cc46dcbcfda743806fb5117f69faa3318d9 Mon Sep 17 00:00:00 2001 From: Samuel Aktar Laskar Date: Mon, 1 Apr 2024 16:45:09 +0530 Subject: [PATCH 4/4] Minor changes --- lib/api.ts | 1 - lib/types.ts | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/api.ts b/lib/api.ts index 851cc0a8..f98ae5cc 100644 --- a/lib/api.ts +++ b/lib/api.ts @@ -24,7 +24,6 @@ const points = { // Reviewing a PR would get 4 points // Finding a bug would add up to 4 points // Opening a PR would give a single point and merging it would give you the other 7 points, making 8 per PR -// For each linked issue in pr body, points would increase by 1. // Updating the EOD would get 2 points per day and additional 20 for regular daily updates plus 10 for just missing one function formatSlug(slug: string) { diff --git a/lib/types.ts b/lib/types.ts index 0640d5d4..29b9130e 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -21,7 +21,7 @@ export interface Contributor { slack: string; joining_date: string; role: "core" | "intern" | "operations" | "contributor"; - courses_completed?: string[]; + courses_completed: string[]; } export interface ActivityData {