-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathread_data.py
119 lines (96 loc) · 4.33 KB
/
read_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import pandas as pd
import numpy as np
import csv
def get_time_dict():
rng = pd.date_range('2013-10-27', '2014-08-01')
print('number of dates:', len(rng))
time_dict = pd.Series(np.arange(len(rng)), index=rng)
print(time_dict['2013/10/30'])
return time_dict
class Enrollment():
def __init__(self, filename):
fin = open(filename)
# fin.next()
self.enrollment_ids = []
self.enrollment_info = {}
self.user_info = {}
self.user_enrollment_id = {}
self.course_info = {}
for line in fin:
enrollment_id, username, course_id = line.strip().split(',')
if enrollment_id == 'enrollment_id': # ignore the first row
continue
self.enrollment_ids.append(enrollment_id)
self.enrollment_info[enrollment_id] = [username, course_id]
if username not in self.user_info:
self.user_info[username] = [course_id]
self.user_enrollment_id[username] = [enrollment_id]
else:
self.user_info[username].append(course_id)
self.user_enrollment_id[username].append(enrollment_id)
if course_id not in self.course_info:
self.course_info[course_id] = [username]
else:
self.course_info[course_id].append(username)
# print("load enrollment info over!")
#
# print("number of courses:", len(self.course_info))
# print("number of enrollments:", len(self.enrollment_info))
# print("information of enrollment_id=4:", self.enrollment_info.get("4"))
class Truth():
def __init__(self, filename):
with open(filename, 'r') as fin:
reader = csv.reader(fin)
self.truth_ids= []
self.truth_info = {}
for line in reader:
self.truth_ids.append(line[0])
self.truth_info[line[0]] = [line[0],line[1]]
class Log():
def __init__(self, filename):
fin = open(filename)
#fin.next()
self.enrollment_info = {}
self.dates = {}
self.events = {}
for line in fin:
enrollment_id, time, source, event, objects = line.strip().split(',')
if enrollment_id == 'enrollment_id': # ignore the first row
continue
if enrollment_id not in self.enrollment_info:
# self.enrollment_info[enrollment_id] = [time, source, event, objects]
self.enrollment_info[enrollment_id] = [enrollment_id] # print(log.enrollment_info.get("4")[0]) --> enrollment id
self.enrollment_info[enrollment_id].append([enrollment_id, time, source, event, objects]) # print(log.enrollment_info.get("4")[1]) --> ['4', 2014-06-15T01:44:10', 'server', 'navigate', 'Oj6eQgzrdqBMlaCtaq1IkY6zruSrb71b']
self.dates[enrollment_id] = [time[:10]]
self.events[enrollment_id] = [event]
else:
self.enrollment_info[enrollment_id].append([enrollment_id, time, source, event, objects])
if time[:10] not in self.dates[enrollment_id]:
self.dates[enrollment_id].append(time[:10])
self.events[enrollment_id].append(event)
class Date():
def __init__(self, filename):
with open(filename, 'r') as fin:
reader = csv.reader(fin)
self.course_ids = []
self.course_info = {}
i = 0
for line in reader:
if line[0] == 'course_id': # ignore the first row
continue
if line[0] != "":
self.course_info[i] = [line[0], line[1], line[2]]
i = i + 1
class Object():
def __init__(self, filename):
with open(filename, 'r') as fin:
reader = csv.reader(fin)
self.module_ids = []
self.object_info = {}
i = 0
for line in reader:
if line[0] == 'course_id': # ignore the first row
continue
if line[0] != "":
self.object_info[i] = [line[0], line[1], line[2],line[3], line[4]]
i = i + 1