forked from hanifareygina/capstone-ui-hanifa
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
134 lines (112 loc) · 5.58 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from flask import Flask, render_template
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from io import BytesIO
import base64
app = Flask(__name__)
playstore = playstore = pd.read_csv('data/googleplaystore.csv')
playstore.drop_duplicates(subset = 'App', keep="first", inplace=True)
# bagian ini untuk menghapus row 10472 karena nilai data tersebut tidak tersimpan pada kolom yang benar
playstore.drop([10472], inplace=True)
playstore.Category = playstore['Category'].astype('category')
playstore.Installs = playstore.Installs.apply(lambda x: x.replace(',',''))
playstore.Installs = playstore.Installs.apply(lambda x: x.replace('+',''))
# Bagian ini untuk merapikan kolom Size, Anda tidak perlu mengubah apapun di bagian ini
playstore['Size'].replace('Varies with device', np.nan, inplace = True )
playstore.Size = (playstore.Size.replace(r'[kM]+$', '', regex=True).astype(float) * \
playstore.Size.str.extract(r'[\d\.]+([kM]+)', expand=False)
.fillna(1)
.replace(['k','M'], [10**3, 10**6]).astype(int))
playstore['Size'].fillna(playstore.groupby('Category')['Size'].transform('mean'),inplace = True)
playstore['Price'] = playstore['Price'].apply(lambda x: x.replace('$',''))
playstore['Price'] = playstore['Price'].astype('float64')
# Ubah tipe data Reviews, Size, Installs ke dalam tipe data integer
playstore[['Reviews','Size','Installs']]=playstore[['Reviews','Size','Installs']].astype('int64')
@app.route("/")
# This fuction for rendering the table
def index():
df2 = playstore.copy()
# Statistik
top_category =pd.crosstab(
index=playstore['Category'],
columns='Jumlah'
).sort_values('Jumlah', ascending = False).reset_index()
# Dictionary stats digunakan untuk menyimpan beberapa data yang digunakan untuk menampilkan nilai di value box dan tabel
stats = {
'most_categories' : top_category.loc[0,'Category'],
'total': top_category.loc[0,'Jumlah'],
'rev_table' : playstore.groupby(['Category','App']).agg({'Reviews':'sum','Rating':'mean'}).sort_values(by='Reviews', ascending=False).reset_index().head(10).to_html(classes=['table thead-light table-striped table-bordered table-hover table-sm'])
}
## Bar Plot
cat_order = df2.groupby('Category').agg({
'App': 'count'
}).rename({'Category':'Total'}, axis=1).sort_values(by='App', ascending=False).reset_index().head()
X = cat_order['Category']
Y = cat_order['App']
my_colors = 'rgbkymc'
# bagian ini digunakan untuk membuat kanvas/figure
fig = plt.figure(figsize=(8,3),dpi=300)
fig.add_subplot()
# bagian ini digunakan untuk membuat bar plot
plt.barh(X,Y, color=my_colors)
# bagian ini digunakan untuk menyimpan plot dalam format image.png
plt.savefig('cat_order.png',bbox_inches="tight")
# bagian ini digunakan untuk mengconvert matplotlib png ke base64 agar dapat ditampilkan ke template html
figfile = BytesIO()
plt.savefig(figfile, format='png')
figfile.seek(0)
figdata_png = base64.b64encode(figfile.getvalue())
# variabel result akan dimasukkan ke dalam parameter di fungsi render_template() agar dapat ditampilkan di
# halaman html
result = str(figdata_png)[2:-1]
## Scatter Plot
X = df2['Reviews'].values # axis x
Y = df2['Rating'].values # axis y
area = playstore['Installs'].values/10000000 # ukuran besar/kecilnya lingkaran scatter plot
fig = plt.figure(figsize=(5,5))
fig.add_subplot()
# isi nama method untuk scatter plot, variabel x, dan variabel y
plt.scatter(x=X,y=Y, s=area, alpha=0.3)
plt.xlabel('Reviews')
plt.ylabel('Rating')
plt.savefig('rev_rat.png',bbox_inches="tight")
figfile = BytesIO()
plt.savefig(figfile, format='png')
figfile.seek(0)
figdata_png = base64.b64encode(figfile.getvalue())
result2 = str(figdata_png)[2:-1]
## Histogram Size Distribution
X=(playstore['Size']/1000000).values
fig = plt.figure(figsize=(5,5))
fig.add_subplot()
plt.hist(X,bins=100, density=True, alpha=0.75)
plt.xlabel('Size')
plt.ylabel('Frequency')
plt.savefig('hist_size.png',bbox_inches="tight")
figfile = BytesIO()
plt.savefig(figfile, format='png')
figfile.seek(0)
figdata_png = base64.b64encode(figfile.getvalue())
result3 = str(figdata_png)[2:-1]
## Buatlah sebuah plot yang menampilkan insight di dalam data
contentrating=playstore.groupby('Content Rating').sum().sort_values(by='Installs',ascending=False).reset_index()
contentrating=contentrating[['Content Rating','Installs']].head(4)
# Pie chart, where the slices will be ordered and plotted counter-clockwise:
labels = contentrating['Content Rating']
sizes = contentrating['Installs']
explode = (0.07, 0.07, 0.15, 0.17) # only "explode" the 2nd slice (i.e. 'Hogs')
fig1, ax1 = plt.subplots(figsize=(8,8))
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
shadow=True, startangle=90)
ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.savefig('pie_size.png',bbox_inches="tight")
figfile = BytesIO()
plt.savefig(figfile, format='png')
figfile.seek(0)
figdata_png = base64.b64encode(figfile.getvalue())
result4 = str(figdata_png)[2:-1]
# Tambahkan hasil result plot pada fungsi render_template()
return render_template('index.html', stats=stats, result=result, result2=result2, result3=result3, result4=result4)
if __name__ == "__main__":
app.run(debug=True)