-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathseed.py
122 lines (82 loc) · 3.31 KB
/
seed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""Utility file to seed ratings database from MovieLens data in seed_data/"""
from model import User
from model import Rating
from model import Movie
from model import connect_to_db, db
from server import app
from datetime import datetime
def load_users():
"""Load users from u.user into database."""
print "Users"
# Delete all rows in table, so if we need to run this a second time,
# we won't be trying to add duplicate users
User.query.delete()
# Read u.user file and insert data
for row in open("seed_data/u.user"):
row = row.rstrip()
user_id, age, gender, occupation, zipcode = row.split("|")
user = User(user_id=user_id,
age=age,
zipcode=zipcode)
# We need to add to the session or it won't ever be stored
db.session.add(user)
# Once we're done, we should commit our work
db.session.commit()
def load_movies():
"""Load movies from u.item into database."""
print "Movies"
# Delete all rows in table, so if we need to run this a second time,
# we won't be trying to add duplicate users
Movie.query.delete()
# Read u.item file and insert data
for row in open("seed_data/u.item"):
row =row.rstrip()
movie_id, title_long, released_string, imdb_url = row.split("|")[:4]
#we modified the datetime format changed released_string into
#new format by using datetim.strptime to convert it.
print row
if released_string:
release_at = datetime.strptime(released_string, "%d-%b-%Y")
else:
release_at = None
#here we stripped the title of the (xxxx) year and parenthesis
#using the slice method.
title = title_long[:-7]
print movie_id, title_long, released_string, imdb_url
#assign the return values from our for loop to a new variable
movie = Movie(movie_id=movie_id, title=title, released_at=release_at,
imdb_url=imdb_url)
# We need to add to the session or it won't ever be stored
db.session.add(movie)
#Once we're done, we should commit our work
db.session.commit()
def load_ratings():
"""Load ratings from u.data into database."""
print "Ratings"
# Delete all rows in table, so if we need to run this a second time,
# we won't be trying to add duplicate users
Rating.query.delete()
# Read u.data file and insert data
for row in open("seed_data/u.data"):
row = row.rstrip()
user_id, movie_id, score, timestamp = row.split("\t")
user_id = int(user_id)
movie_id = int(movie_id)
score = int(score)
#from rating class take the movie_id and make it equal to the movie_id
#from the for loop above. We are calling it to make an instance of the rating
#class
rating = Rating(movie_id=movie_id, user_id=user_id, score=score)
#We need to add to the session or it won't ever be stored
db.session.add(rating)
#Once we're done, we should commit our work
db.session.commit()
def make_new_user
if __name__ == "__main__":
connect_to_db(app)
# In case tables haven't been created, create them
db.create_all()
# Import different types of data
#load_users()
#load_movies()
#load_ratings()