-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata.py
61 lines (47 loc) · 1.71 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import numpy as np
import argparse
import math
def get_data(path,data_name,train_ratio):
if(data_name=='music'):
num_users = 2718
num_items = 5567
num_total_ratings = 69709
elif(data_name=='book'):
num_users = 2718
num_items = 6777
num_total_ratings = 96041
fp = open(path + data_name + '.csv')
user_train_set = set()
user_test_set = set()
item_train_set = set()
item_test_set = set()
train_r = np.zeros((num_users, num_items))
test_r = np.zeros((num_users, num_items))
train_mask_r = np.zeros((num_users, num_items))
test_mask_r = np.zeros((num_users, num_items))
random_perm_idx = np.random.permutation(num_total_ratings)
train_idx = random_perm_idx[0:int(num_total_ratings * train_ratio)]
test_idx = random_perm_idx[int(num_total_ratings * train_ratio):]
lines = fp.readlines()
''' Train '''
for itr in train_idx:
line = lines[itr]
user, item, rating = line.split(",")
user_idx = int(user) - 1
item_idx = int(item) - 1
train_r[user_idx, item_idx] = int(rating)
train_mask_r[user_idx, item_idx] = 1
user_train_set.add(user_idx)
item_train_set.add(item_idx)
''' Test '''
for itr in test_idx:
line = lines[itr]
user, item, rating = line.split(",")
user_idx = int(user) - 1
item_idx = int(item) - 1
test_r[user_idx, item_idx] = int(rating)
test_mask_r[user_idx, item_idx] = 1
user_test_set.add(user_idx)
item_test_set.add(item_idx)
return train_r,train_mask_r,test_r,test_mask_r,user_train_set,item_train_set,user_test_set,item_test_set,\
num_users,num_items,num_total_ratings