-
Notifications
You must be signed in to change notification settings - Fork 1
/
function.py
129 lines (108 loc) · 3.75 KB
/
function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import FunctionTransformer
import numpy as np
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import config
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(config.spotify['client_id'],
config.spotify['client_secret']))
def get_album_songs(uri_info):
uri = []
track = []
duration = []
explicit = []
track_number = []
one = sp.album_tracks(uri_info, limit=50, offset=0, market='US')
df1 = pd.DataFrame(one)
for i, x in df1['items'].items():
uri.append(x['uri'])
track.append(x['name'])
duration.append(x['duration_ms'])
explicit.append(x['explicit'])
track_number.append(x['track_number'])
df2 = pd.DataFrame({
'uri':uri,
'track':track,
'duration_ms':duration,
'explicit':explicit,
'track_number':track_number})
return df2
def get_track_info(df):
danceability = []
energy = []
key = []
loudness = []
speechiness = []
acousticness = []
instrumentalness = []
liveness = []
valence = []
tempo = []
for i in df['uri']:
for x in sp.audio_features(tracks=[i]):
danceability.append(x['danceability'])
energy.append(x['energy'])
key.append(x['key'])
loudness.append(x['loudness'])
speechiness.append(x['speechiness'])
acousticness.append(x['acousticness'])
instrumentalness.append(x['instrumentalness'])
liveness.append(x['liveness'])
valence.append(x['valence'])
tempo.append(x['tempo'])
df2 = pd.DataFrame({
'danceability':danceability,
'energy':energy,
'key':key,
'loudness':loudness,
'speechiness':speechiness,
'acousticness':acousticness,
'instrumentalness':instrumentalness,
'liveness':liveness,
'valence':valence,
'tempo':tempo})
return df2
def popularity(df):
empty = []
for i in df['uri']:
series_track = pd.Series(sp.track(i))
empty.append(series_track)
df2 = pd.DataFrame(empty)
return df2
def add_popularity(df):
pop = popularity(df)
df['popularity'] = pop['popularity']
return df.head()
def lyrics_to_df(data, df):
for i in range(len(data['tracks'])):
album = data['name']
title = data['tracks'][i]['song']['title']
lyric = data['tracks'][i]['song']['lyrics']
df = df.append({'track': title,'album': album, 'lyrics': lyric}, ignore_index=True)
return df
def single_reg(model, X_train, X_test, y_train, y_test):
model.fit(X_train, y_train)
y_hat_train = model.predict(X_train)
y_hat_test = model.predict(X_test)
train_mse = mean_squared_error(y_train, y_hat_train)
test_mse = mean_squared_error(y_test, y_hat_test)
print('Train Root Mean Square Error:', train_mse**0.5)
print('Test Root Mean Square Error:', test_mse**0.5)
return model
def log_transform(x):
x = x+1
return np.log(x)
transformer = FunctionTransformer(log_transform)
def full_reg(model, X_train, X_test, y_train, y_test):
pipeline = Pipeline([('ss', StandardScaler()), ('regressor', model)])
pipeline.fit(X_train, y_train)
y_hat_train = pipeline.predict(X_train)
y_hat_test = pipeline.predict(X_test)
train_mse = mean_squared_error(y_train, y_hat_train)
test_mse = mean_squared_error(y_test, y_hat_test)
print('Train Root Mean Square Error:', train_mse**0.5)
print('Test Root Mean Square Error:', test_mse**0.5)
return model