forked from eyra/port
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscript.py
427 lines (334 loc) · 19.7 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
import logging
import json
import io
from typing import Optional, Literal
import pandas as pd
import port.api.props as props
import port.validate as validate
import port.youtube as youtube
import port.tiktok as tiktok
from port.api.commands import (CommandSystemDonate, CommandUIRender, CommandSystemExit)
LOG_STREAM = io.StringIO()
logging.basicConfig(
stream=LOG_STREAM,
level=logging.DEBUG,
format="%(asctime)s --- %(name)s --- %(levelname)s --- %(message)s",
datefmt="%Y-%m-%dT%H:%M:%S%z",
)
LOGGER = logging.getLogger("script")
def process(session_id):
LOGGER.info("Starting the donation flow")
yield donate_logs(f"{session_id}-tracking")
platforms = [ ("YouTube", extract_youtube, youtube.validate), ("TikTok", extract_tiktok, tiktok.validate), ]
#platforms = [ ("YouTube", extract_youtube, youtube.validate), ]
#platforms = [ ("TikTok", extract_tiktok, tiktok.validate), ]
# progress in %
subflows = len(platforms)
steps = 3
step_percentage = (100 / subflows) / steps
progress = 0
# For each platform
# 1. Prompt file extraction loop
# 2. In case of succes render data on screen
for platform in platforms:
platform_name, extraction_fun, validation_fun = platform
table_list = None
progress += step_percentage
# Prompt file extraction loop
while True:
LOGGER.info("Prompt for file for %s", platform_name)
yield donate_logs(f"{session_id}-{platform_name}-tracking")
# Render the propmt file page
promptFile = prompt_file("application/zip, text/plain, application/json", platform_name)
file_result = yield render_donation_page(platform_name, promptFile, progress)
if file_result.__type__ == "PayloadString":
validation = validation_fun(file_result.value)
# DDP is recognized: Status code zero
if validation.status_code.id == 0:
LOGGER.info("Payload for %s", platform_name)
yield donate_logs(f"{session_id}-{platform_name}-tracking")
table_list = extraction_fun(file_result.value, validation)
break
# DDP is not recognized: Different status code
if validation.status_code.id != 0:
LOGGER.info("Not a valid %s zip; No payload; prompt retry_confirmation", platform_name)
yield donate_logs(f"{session_id}-{platform_name}-tracking")
retry_result = yield render_donation_page(platform_name, retry_confirmation(platform_name), progress)
if retry_result.__type__ == "PayloadTrue":
continue
else:
LOGGER.info("Skipped during retry %s", platform_name)
yield donate_logs(f"{session_id}-{platform_name}-tracking")
break
else:
LOGGER.info("Skipped %s", platform_name)
yield donate_logs(f"{session_id}-{platform_name}-tracking")
break
progress += step_percentage
# Render data on screen
if table_list is not None:
LOGGER.info("Prompt consent; %s", platform_name)
yield donate_logs(f"{session_id}-{platform_name}-tracking")
# Check if something got extracted
if len(table_list) == 0:
yield donate_status(f"{session_id}-{platform_name}-NO-DATA-FOUND", "NO_DATA_FOUND")
table_list.append(create_empty_table(platform_name))
prompt = assemble_tables_into_form(table_list)
consent_result = yield render_donation_page(platform_name, prompt, progress)
if consent_result.__type__ == "PayloadJSON":
LOGGER.info("Data donated; %s", platform_name)
yield donate(platform_name, consent_result.value)
yield donate_logs(f"{session_id}-{platform_name}-tracking")
yield donate_status(f"{session_id}-{platform_name}-DONATED", "DONATED")
progress += step_percentage
questionnaire_results = yield render_questionnaire(progress, platform_name)
if questionnaire_results.__type__ == "PayloadJSON":
yield donate(f"{session_id}-{platform_name}-questionnaire-donation", questionnaire_results.value)
else:
LOGGER.info("Skipped questionnaire: %s", platform_name)
yield donate_logs(f"{session_id}-{platform_name}-tracking")
else:
LOGGER.info("Skipped ater reviewing consent: %s", platform_name)
yield donate_logs(f"{session_id}-{platform_name}-tracking")
yield donate_status(f"{session_id}-{platform_name}-SKIP-REVIEW-CONSENT", "SKIP_REVIEW_CONSENT")
yield exit(0, "Success")
yield render_end_page()
##################################################################
def assemble_tables_into_form(table_list: list[props.PropsUIPromptConsentFormTable]) -> props.PropsUIPromptConsentForm:
"""
Assembles all donated data in consent form to be displayed
"""
return props.PropsUIPromptConsentForm(table_list, [])
def donate_logs(key):
log_string = LOG_STREAM.getvalue() # read the log stream
if log_string:
log_data = log_string.split("\n")
else:
log_data = ["no logs"]
return donate(key, json.dumps(log_data))
def create_empty_table(platform_name: str) -> props.PropsUIPromptConsentFormTable:
"""
Show something in case no data was extracted
"""
title = props.Translatable({
"en": "Er ging niks mis, maar we konden geen gegevens in jouw data vinden",
"nl": "Er ging niks mis, maar we konden geen gegevens in jouw data vinden",
})
df = pd.DataFrame(["No data found"], columns=["No data found"])
table = props.PropsUIPromptConsentFormTable(f"{platform_name}_no_data_found", title, df)
return table
##################################################################
# Visualization helpers
def create_chart(type: Literal["bar", "line", "area"],
nl_title: str, en_title: str,
x: str, y: Optional[str] = None,
x_label: Optional[str] = None, y_label: Optional[str] = None,
date_format: Optional[str] = None, aggregate: str = "count", addZeroes: bool = True):
if y is None:
y = x
if aggregate != "count":
raise ValueError("If y is None, aggregate must be count if y is not specified")
return props.PropsUIChartVisualization(
title = props.Translatable({"en": en_title, "nl": nl_title}),
type = type,
group = props.PropsUIChartGroup(column= x, label= x_label, dateFormat= date_format),
values = [props.PropsUIChartValue(column= y, label= y_label, aggregate= aggregate, addZeroes= addZeroes)]
)
def create_wordcloud(nl_title: str, en_title: str, column: str,
tokenize: bool = False,
value_column: Optional[str] = None):
return props.PropsUITextVisualization(title = props.Translatable({"en": en_title, "nl": nl_title}),
type='wordcloud',
text_column=column,
value_column=value_column,
tokenize=tokenize
)
##################################################################
# Extraction functions
def extract_youtube(youtube_zip: str, validation: validate.ValidateInput) -> list[props.PropsUIPromptConsentFormTable]:
"""
Main data extraction function
Assemble all extraction logic here
"""
tables_to_render = []
# Extract Watch later.csv
#df = youtube.watch_later_to_df(youtube_zip)
#if not df.empty:
# table_title = props.Translatable({"en": "YouTube watch later", "nl": "YouTube watch later"})
# table = props.PropsUIPromptConsentFormTable("youtube_watch_later", table_title, df)
# tables_to_render.append(table)
# Extract subscriptions.csv
#df = youtube.subscriptions_to_df(youtube_zip, validation)
#if not df.empty:
# table_title = props.Translatable({"en": "YouTube subscriptions", "nl": "YouTube subscriptions"})
# table = props.PropsUIPromptConsentFormTable("youtube_subscriptions", table_title, df)
# tables_to_render.append(table)
# Extract subscriptions.csv
df = youtube.watch_history_to_df(youtube_zip, validation)
if not df.empty:
table_title = props.Translatable({"en": "YouTube watch history", "nl": "Kijkgeschiedenis van YouTube video’s"})
#vis = [
# create_chart("area", "YouTube videos bekeken", "YouTube videos watched", "Date standard format", y_label="Aantal videos", date_format="auto"),
# create_chart("bar", "Activiteit per uur van de dag", "Activity per hour of the day", "Date standard format", y_label="Aantal videos", date_format="hour_cycle"),
#]
vis = [
create_chart("area", "Het aantal YouTube video’s dat u heeft bekeken over tijd", "YouTube videos watched", "Date standard format", y_label="Aantal videos", date_format="auto"),
create_wordcloud("Uw meest bekeken YouTube kanalen",'Channels Watched', "Channel")
]
table = props.PropsUIPromptConsentFormTable("youtube_watch_history", table_title, df, visualizations=vis)
tables_to_render.append(table)
df = youtube.search_history_to_df(youtube_zip, validation)
if not df.empty:
table_title = props.Translatable({"en": "YouTube search history", "nl": "Uw zoekgeschiedenis op YouTube"})
vis = [
create_wordcloud("Uw meest gebruikte zoektermen op YouTube",'Search Terms', "Search Terms")
]
table = props.PropsUIPromptConsentFormTable("youtube_searches", table_title, df, visualizations = vis)
tables_to_render.append(table)
# Extract comments
df = youtube.my_comments_to_df(youtube_zip, validation)
if not df.empty:
table_title = props.Translatable({"en": "YouTube comments", "nl": "YouTube reacties"})
table = props.PropsUIPromptConsentFormTable("youtube_comments", table_title, df)
tables_to_render.append(table)
# Extract live chat messages
#df = youtube.my_live_chat_messages_to_df(youtube_zip, validation)
#if not df.empty:
# table_title = props.Translatable({"en": "YouTube my live chat messages", "nl": "YouTube my live chat messages"})
# table = props.PropsUIPromptConsentFormTable("youtube_my_live_chat_messages", table_title, df)
# tables_to_render.append(table)
return tables_to_render
def extract_tiktok(tiktok_file: str, validation: validate.ValidateInput) -> list[props.PropsUIPromptConsentFormTable]:
tables_to_render = []
df = tiktok.video_browsing_history_to_df(tiktok_file, validation)
if not df.empty:
table_title = props.Translatable({"en": "Tiktok video browsing history", "nl": "Kijkgeschiedenis van TikTok video’s"})
vis = [
create_chart("area", "Het aantal TikTok video’s dat u heeft bekeken over tijd", "TikTok videos watched", "Date", y_label="Aantal videos", date_format="auto"),
]
table = props.PropsUIPromptConsentFormTable("tiktok_video_browsing_history", table_title, df, visualizations=vis)
tables_to_render.append(table)
df = tiktok.search_history_to_df(tiktok_file, validation)
if not df.empty:
table_title = props.Translatable({"en": "Tiktok search history", "nl": "TikTok zoekgeschiedenis"})
vis = [
create_wordcloud("Uw meest gebruikte zoektermen op TikTok",'Search Term', "Search Term")
]
table = props.PropsUIPromptConsentFormTable("tiktok_search_history", table_title, df, visualizations=vis)
tables_to_render.append(table)
df = tiktok.favorite_videos_to_df(tiktok_file, validation)
if not df.empty:
table_title = props.Translatable({"en": "Tiktok favorite videos", "nl": "Je favoriete video’s op TikTok"})
table = props.PropsUIPromptConsentFormTable("tiktok_favorite_videos", table_title, df)
tables_to_render.append(table)
#df = tiktok.following_to_df(tiktok_file, validation)
#if not df.empty:
# table_title = props.Translatable({"en": "Tiktok following", "nl": "Tiktok following"})
# table = props.PropsUIPromptConsentFormTable("tiktok_following", table_title, df)
# tables_to_render.append(table)
df = tiktok.like_to_df(tiktok_file, validation)
if not df.empty:
table_title = props.Translatable({"en": "Tiktok likes", "nl": "De video’s die u heeft geliked op TikTok"})
table = props.PropsUIPromptConsentFormTable("tiktok_like", table_title, df)
tables_to_render.append(table)
df = tiktok.share_history_to_df(tiktok_file, validation)
if not df.empty:
table_title = props.Translatable({"en": "Tiktok share history", "nl": "De video’s die u heeft gedeeld op TikTok"})
table = props.PropsUIPromptConsentFormTable("tiktok_share_history", table_title, df)
tables_to_render.append(table)
#df = tiktok.comment_to_df(tiktok_file, validation)
#if not df.empty:
# table_title = props.Translatable({"en": "Tiktok comment history", "nl": "Tiktok comment history"})
# table = props.PropsUIPromptConsentFormTable("tiktok_comment", table_title, df)
# tables_to_render.append(table)
#df = tiktok.watch_live_history_to_df(tiktok_file, validation)
#if not df.empty:
# table_title = props.Translatable({"en": "Tiktok watch live history", "nl": "Tiktok watch live history"})
# table = props.PropsUIPromptConsentFormTable("tiktok_watch_live_history", table_title, df)
# tables_to_render.append(table)
return tables_to_render
##########################################
def render_end_page():
page = props.PropsUIPageEnd()
return CommandUIRender(page)
def render_donation_page(platform, body, progress):
header = props.PropsUIHeader(props.Translatable({"en": platform, "nl": platform}))
footer = props.PropsUIFooter(progress)
page = props.PropsUIPageDonation(platform, header, body, footer)
return CommandUIRender(page)
def retry_confirmation(platform):
text = props.Translatable(
{
"en": f"Unfortunately, we could not process your {platform} file. If you are sure that you selected the correct file, press Continue. To select a different file, press Try again.",
"nl": f"Helaas, kunnen we uw {platform} bestand niet verwerken. Weet u zeker dat u het juiste bestand heeft gekozen? Ga dan verder. Probeer opnieuw als u een ander bestand wilt kiezen."
}
)
ok = props.Translatable({"en": "Try again", "nl": "Probeer opnieuw"})
cancel = props.Translatable({"en": "Continue", "nl": "Verder"})
return props.PropsUIPromptConfirm(text, ok, cancel)
def prompt_file(extensions, platform):
description = props.Translatable(
{
"en": f"Please follow the download instructions and choose the file that you stored on your device. Click “Skip” at the right bottom, if you do not have a file from {platform}.",
"nl": f"Volg de download instructies en kies het bestand dat u opgeslagen heeft op uw apparaat. Als u geen {platform} bestand heeft klik dan op “Overslaan” rechts onder."
}
)
return props.PropsUIPromptFileInput(description, extensions)
def donate(key, json_string):
return CommandSystemDonate(key, json_string)
def exit(code, info):
return CommandSystemExit(code, info)
def donate_status(filename: str, message: str):
return donate(filename, json.dumps({"status": message}))
###############################################################################################
# Questionnaire questions
def render_questionnaire(progress, platform_name):
understanding = props.Translatable({
"en": "How would you describe the information you shared with the researchers at the University of Amsterdam?",
"nl": "Hoe zou u de informatie omschrijven die u heeft gedeeld met de onderzoekers van de Universiteit van Amsterdam?"
})
indentify_consumption = props.Translatable({"en": f"If you have viewed the information, to what extent do you recognize your own viewing behavior on {platform_name}?",
"nl": f"Als u de informatie heeft bekeken, in hoeverre herkent u dan uw eigen kijkgedrag op {platform_name}?"})
identify_consumption_choices = [
props.Translatable({"en": f"I recognized my viewing behavior on {platform_name}",
"nl": f"Ik herkende mijn kijkgedrag op {platform_name}"}),
props.Translatable({"en": f"I recognized my {platform_name} watching patterns and patters of those I share my account with",
"nl": f"Ik herkende mijn eigen {platform_name} kijkgedrag en die van anderen met wie ik mijn account deel"}),
props.Translatable({"en": f"I recognized mostly the watching patterns of those I share my account with",
"nl": f"Ik herkende vooral het kijkgedrag van anderen met wie ik mijn account deel"}),
props.Translatable({"en": f"I did not look at my data ",
"nl": f"Ik heb niet naar mijn gegevens gekeken"}),
props.Translatable({"en": f"Other",
"nl": f"Anders"})
]
enjoyment = props.Translatable({"en": "In case you looked at the data presented on this page, how interesting did you find looking at your data?", "nl": "Als u naar uw data hebt gekeken, hoe interessant vond u het om daar naar te kijken?"})
enjoyment_choices = [
props.Translatable({"en": "not at all interesting", "nl": "Helemaal niet interessant"}),
props.Translatable({"en": "somewhat uninteresting", "nl": "Een beetje oninteressant"}),
props.Translatable({"en": "neither interesting nor uninteresting", "nl": "Niet interessant, niet oninteressant"}),
props.Translatable({"en": "somewhat interesting", "nl": "Een beetje interessant"}),
props.Translatable({"en": "very interesting", "nl": "Erg interessant"})
]
awareness = props.Translatable({"en": f"Did you know that {platform_name} collected this data about you?",
"nl": f"Wist u dat {platform_name} deze gegevens over u verzamelde?"})
awareness_choices = [
props.Translatable({"en":"Yes", "nl": "Ja"}),
props.Translatable({"en":"No", "nl": "Nee"})
]
additional_comments = props.Translatable({
"en": "Do you have any additional comments about the donation? Please add them here.",
"nl": "Heeft u nog andere opmerkingen? Laat die hier achter."
})
questions = [
props.PropsUIQuestionOpen(question=understanding, id=1),
props.PropsUIQuestionMultipleChoice(question=indentify_consumption, id=2, choices=identify_consumption_choices),
props.PropsUIQuestionMultipleChoice(question=enjoyment, id=3, choices=enjoyment_choices),
props.PropsUIQuestionMultipleChoice(question=awareness, id=4, choices=awareness_choices),
props.PropsUIQuestionOpen(question=additional_comments, id=5),
]
description = props.Translatable({"en": "Below you can find a couple of questions about the data donation process", "nl": "Hieronder vind u een paar vragen over het data donatie process"})
header = props.PropsUIHeader(props.Translatable({"en": "Questionnaire", "nl": "Vragenlijst"}))
body = props.PropsUIPromptQuestionnaire(questions=questions, description=description)
footer = props.PropsUIFooter(progress)
page = props.PropsUIPageDonation("page", header, body, footer)
return CommandUIRender(page)