-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmulti_users_fetch.py
93 lines (67 loc) · 3.82 KB
/
multi_users_fetch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
""" Fetch contributions for multiple users """
import json
import pandas as pd
from typing import Dict
from leaderboard.queries.query import query
from leaderboard.fetch_data import execute_query
from leaderboard.utils.formatter import convert_to_intermediate_table
def fetch_contributions_for_multi_users(
user_list: list, query_variables: Dict
) -> pd.DataFrame:
""" Returns combined intermediate table dataframe for all users
Args:
user_list: list of users whoss contribution data is to be fetched
query_variables: arguments required for the OSS graphql query
"""
# stores list of intermediate dataframes
intermediate_list = []
for userName in user_list:
query_variables["username"] = userName
# variables that sore the pagination info for each of the 5 contribution types
page_info_T1 = {"hasNextPage": True, "endCursor": None}
page_info_T2 = {"hasNextPage": True, "endCursor": None}
page_info_T3 = {"hasNextPage": True, "endCursor": None}
# in case of issue comment(T4), 'hasPreviousPage' and 'startCursor' is used instead as it involves a different approach to get issue comment contributions after a specific time
page_info_T4 = {"hasPreviousPage": True, "startCursor": None}
page_info_T5 = {"hasNextPage": True, "endCursor": None}
while True:
# extra params required for pagination
query_variables["pullReqCursor"] = page_info_T1["endCursor"]
query_variables["pullreqreviewcursor"] = page_info_T2["endCursor"]
query_variables["issueCursor"] = page_info_T3["endCursor"]
query_variables["issueCommentsCursor"] = page_info_T4["startCursor"]
query_variables["repoCursor"] = page_info_T5["endCursor"]
# in case of issue comment(T4), if 'hasPreviousPage' is false, we set the data count to zero so that no more issue comment data is fetched
if page_info_T4["hasPreviousPage"]:
query_variables["issueCommentDataCount"] = query_variables["dataCount"]
else:
query_variables["issueCommentDataCount"] = 0
result = execute_query(query, query_variables)
flat_data = convert_to_intermediate_table(
json.dumps(result.json(), indent=4), query_variables["timedelta"]
)
intermediate_list.append(flat_data["intermediate_table"])
# we only set the page info values that we get from api response when the hasNextPage/hasPreviousPage is true
# once we get a false value for hasNextPage/hasPreviousPage from api, we set the page info once and ignore the page info value in subsequent responses
if page_info_T1["hasNextPage"]:
page_info_T1 = flat_data["page_info"]["page_info_T1"]
if page_info_T2["hasNextPage"]:
page_info_T2 = flat_data["page_info"]["page_info_T2"]
if page_info_T3["hasNextPage"]:
page_info_T3 = flat_data["page_info"]["page_info_T3"]
if page_info_T4["hasPreviousPage"]:
page_info_T4 = flat_data["page_info"]["page_info_T4"]
if page_info_T5["hasNextPage"]:
page_info_T5 = flat_data["page_info"]["page_info_T5"]
# if none of the contribution data types have any more data to be fetched for a user, break out of the whie loop and move on to next user
if (
not page_info_T1["hasNextPage"]
and not page_info_T2["hasNextPage"]
and not page_info_T3["hasNextPage"]
and not page_info_T4["hasPreviousPage"]
and not page_info_T5["hasNextPage"]
):
break
# combines all the intermediate dataframes into one
combined = pd.concat(intermediate_list)
return combined