-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
374 lines (299 loc) · 19.2 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
import streamlit as st
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")
def app():
@st.cache(allow_output_mutation=True)
def load_data():
data = pd.read_csv('crimes.csv')
c11 = pd.read_csv('india2011.csv')
lit= pd.read_csv('Literacy.csv')
data=data.iloc[:,1:]
tc=c11.groupby("State name").sum().reset_index().sort_values("State name")
c11p=c11[["District name","Population", "State name"]]
c11p.columns=["DISTRICT","Population", "State"]
lit.columns=['Unnamed: 0','DISTRICT', 'State', 'Literacy']
lit.drop("Unnamed: 0", axis=1,inplace=True)
lit["DISTRICT"]=lit["DISTRICT"].str.lstrip()
lit.drop("State",axis=1,inplace=True)
for i in range (0, len(c11["State name"])):
c11["State name"][i]=c11["State name"][i].title()
c11["District name"][i]=c11["District name"][i].title()
for i in range (0, len(data['Year'])):
data['DISTRICT'][i]=data['DISTRICT'][i].title()
data['STATE/UT'][i]=data['STATE/UT'][i].title()
for i in range(0,len(data["Year"])):
if data['DISTRICT'][i] in ["Total District(S)", "Zz Total", "Delhi Ut Total"]:
data['DISTRICT'][i]="Total"
for i in range(0,len(data["Year"])):
if data['STATE/UT'][i] =="D&N Haveli":
data['STATE/UT'][i]="D & N Haveli"
for i in range(0,len(data["Year"])):
if data['STATE/UT'][i] =="Delhi Ut":
data['STATE/UT'][i]="Delhi"
for i in range(0,len(data["Year"])):
if data['STATE/UT'][i] =="A&N Islands":
data['STATE/UT'][i]="A & N Islands"
#this is done because there are a lot of UTs divided by direction, ie., Delhi ans Sikkim both has North, South district.
for i in range(0,len(data["Year"])):
if data['DISTRICT'][i] in ["South", "North", "East", "West"]:
data['DISTRICT'][i]= str(data["STATE/UT"][i]) + " "+ str(data['DISTRICT'][i])
sd=data[["STATE/UT", "DISTRICT"]].drop_duplicates()
for i in range (0, len(data["DISTRICT"])):
if data["DISTRICT"][i] == "Coochbehar":
data["DISTRICT"][i]= "Koch Bihar"
elif data["DISTRICT"][i] == "Malda":
data["DISTRICT"][i]= "Maldah"
elif data["DISTRICT"][i] in ["24 Parganas North","North 24 Parganas"]:
data["DISTRICT"][i]= "North Twenty Four Parganas"
elif data["DISTRICT"][i] in ["24 Parganas South","South 24 Parganas"]:
data["DISTRICT"][i]= "South Twenty Four Parganas"
elif data["DISTRICT"][i] == "Hooghly":
data["DISTRICT"][i] = "Hugli"
elif data["DISTRICT"][i] == "Hyderabad City":
data["DISTRICT"][i] = "Hyderabad"
elif data["DISTRICT"][i] == "Cyberabad":
data["STATE/UT"][i] = "Andhra Pradesh"
District_Total= data.groupby("DISTRICT").sum()
#Lets add Population and Literacy to every individual District
District_Total=pd.merge(District_Total,c11p, on = "DISTRICT", how="left")
District_Total=pd.merge(District_Total,lit, on = "DISTRICT", how="left")
# District_Total = pd.merge(District_Total,sd, on = "DISTRICT", how="left")
District_Total20=District_Total.sort_values(by="Rape", ascending=False).reset_index().head(22)
District_Total20.drop(0,axis=0,inplace=True)
# We will use join(inner join) to concate two tables
state_total=data[data["DISTRICT"]=="Total"]
label10=np.arange(1,11)
label20=np.arange(0,20)
return data, District_Total, state_total, c11p, lit
data_re, District_Total_re , state_total_re,c11p, lit =load_data()
data=data_re
District_Total = District_Total_re
state_total = state_total_re
st.title("Crime against Women(2001-2014, India) Data Analysis and Visulization")
st.text("""
Disclaimer(please read it):
This analysis and visualization is only for educational purposes, use of below
graphs and analysis is strictly prohibited in any official or political work.
Reason: Data is not well organised, dataset has various issues.In case of any
query/issue, please connect through provided username in Footer!
Brief:Data is gathered from three different sources.
Crimes against women data from Data.gov.in(CSV File)
India 2011 Census Data from kaggle.com(CSV File)
Literacy Data(based on Census 2011) from census2011.co.in/district.php(Data
was scraped and then stored into CSV file).
As data is gathered from different sources there are some issues, I tried my best
to minimize them. For example, there were spelling differences with multiple
West Bengal districts and Cyberabad is notmentioned in Census data.Another important
thing is to be noted that the Literacy and Population data isfrom the 2011 India
Census and is used against all years between 2001 to 2014.
Bonus Point: This notebook is made using Plotly library, you can zoom-in the graph,
hover the point using the mouse to see detailed information in the cartesian
planeand last 3D plot can be rotated 360 using mouse and hovering over the Scatter
point will show you multiple information with that particular point
""")
label10=np.arange(1,11)
label20=np.arange(0,20)
ydata=data.groupby("Year").sum().reset_index()
#Lets see the line graph for years between 2001 to 2014
st.subheader('Line graph for years between 2001 to 2014 for all the Crimes against Women')
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=ydata.Year, y=ydata.Rape,
mode='lines+markers',
name='Rape',
hovertext="Rape", ))
fig.add_trace(go.Scatter(x=ydata.Year, y=ydata["Dowry Deaths"],
mode='lines+markers',
name='Dowry Deaths'))
fig.add_trace(go.Scatter(x=ydata.Year, y=ydata["Kidnapping and Abduction"],
mode='lines+markers',
name='Kidnapping and Abduction',
hovertext="Kidnapping and Abduction"))
fig.add_trace(go.Scatter(x=ydata.Year, y=ydata["Importation of Girls"],
mode='lines+markers',
name='Importation of Girls',
hovertext='Importation of Girls'))
fig.add_trace(go.Scatter(x=ydata.Year, y=ydata["Cruelty by Husband or his Relatives"],
mode='lines+markers',
name='Cruelty by Husband or his Relatives',
hovertext='Cruelty by Husband or his Relatives'))
fig.add_trace(go.Scatter(x=ydata.Year, y=ydata["Insult to modesty of Women"],
mode='lines+markers',
name='Insult to modesty of Women',
hovertext='Insult to modesty of Women'))
fig.add_trace(go.Scatter(x=ydata.Year, y=ydata["Assault on women with intent to outrage her modesty"],
mode='lines+markers',
name='Assault on women '
, hovertext="Assault on women with intent to outrage her modesty"))
fig.update_layout(
title="",
plot_bgcolor='rgba(0,0,0,0)', width= 900)
st.plotly_chart(fig)
st.write("We can see a sudden surge in Rape, Assault on women with intent to outrage her modesty and Kidnapping and Abduction after 2012. One of the main reasons for this could be Nirbhaya Delhi Case, after this, India got a moment of awareness and people started coming out against the crimes.")
st.subheader("Total Crimes in each States(Dropdown for Bar Graph and Pie Chart) ")
st.write("Hint: Here we have added all the crimes into one column for each state.")
vis_type=st.selectbox("Select Bar Graph or Pie Chart?",["Bar Graph", "Pie Chart"])
if vis_type=="Bar Graph":
state_total_for_pie=state_total.groupby("STATE/UT").sum()
state_total_for_pie["Total"]= state_total_for_pie["Rape"]+ state_total_for_pie["Kidnapping and Abduction"] + state_total_for_pie["Dowry Deaths"]+state_total_for_pie["Insult to modesty of Women"]+state_total_for_pie["Cruelty by Husband or his Relatives"]+state_total_for_pie["Importation of Girls"]
state_total_for_pie=state_total_for_pie.sort_values(by="Total")
fig=go.Figure(data=[go.Bar(x=state_total_for_pie.index,y=state_total_for_pie['Total'], marker={"color":np.arange(0, len(state_total_for_pie['Total']))})])
fig.update_layout(autosize=True,plot_bgcolor='rgba(0,0,0,0)' )
st.plotly_chart(fig)
elif vis_type=="Pie Chart" :
state_total_for_pie=state_total.groupby("STATE/UT").sum()
state_total_for_pie["Total"]= state_total_for_pie["Rape"]+ state_total_for_pie["Kidnapping and Abduction"] + state_total_for_pie["Dowry Deaths"]+state_total_for_pie["Insult to modesty of Women"]+state_total_for_pie["Cruelty by Husband or his Relatives"]+state_total_for_pie["Importation of Girls"]
state_total_for_pie=state_total_for_pie.sort_values(by="Total")
fig=go.Figure(data=[go.Pie(labels=state_total_for_pie.index,values=state_total_for_pie['Total'])])
fig.update_traces(hoverinfo='label+percent', textinfo='label+percent', textfont_size=10, )
fig.update_layout(autosize=True, )
st.plotly_chart(fig)
st.subheader("Top 20 districts with highest crimes(sum of all crimes) happened against Women")
st.write("Hint: Hover-over the bars to see all informations.")
District_Total["Total"]=District_Total["Rape"]+ District_Total["Kidnapping and Abduction"] + District_Total["Dowry Deaths"]+District_Total["Insult to modesty of Women"]+District_Total["Cruelty by Husband or his Relatives"]+District_Total["Importation of Girls"]
srt=District_Total.sort_values(by="Total",ascending=False)
srt=srt.head(23).reset_index()
srt.drop([0,6,7],axis=0,inplace=True)
# srt.drop(7,axis=0,inplace=True)
fig=px.bar(srt, x="DISTRICT",y="Total" ,
color='State', text='Total',
title="",)
fig.update_traces( textposition='outside')
fig.update_layout(plot_bgcolor='white', width= 900)
st.plotly_chart(fig)
st.subheader("Top 10 States for different Crimes(Dropdown)")
variables=["Top 10 States/UT with Highest number of Rapes",
"Top 10 States/UT with Highest number of Cruelty by Husband or his Relatives",
"Top 10 states with Highest number of Dowry Deaths",
"Top 10 states with Highest number of Kidnapping and Abduction"]
x = st.selectbox("Chose from Dropdown", variables)
if x=="Top 10 States/UT with Highest number of Rapes":
state_total_r=state_total.groupby("STATE/UT").sum().sort_values(by="Rape", ascending=False).head(10).reset_index()
fig=go.Figure(data=[go.Bar(x=state_total_r['STATE/UT'],y=state_total_r["Rape"], marker={'color':state_total_r['Rape']})])
fig.update_layout(title="Total number of Rape vs State/UT",xaxis_title="Name of State/UT", yaxis_title="Number of Rapes",
plot_bgcolor='white' )
fig.data[0].marker.line.width = 3
fig.data[0].marker.line.color = "black"
st.plotly_chart(fig)
elif x=="Top 10 States/UT with Highest number of Cruelty by Husband or his Relatives":
total_data_c=state_total.groupby("STATE/UT").sum().sort_values(by="Cruelty by Husband or his Relatives", ascending=False).head(10)
fig=go.Figure(data=[go.Bar(x=total_data_c.index,y=total_data_c["Cruelty by Husband or his Relatives"] ,marker={'color':label10} )])
fig.update_layout(title="Cruelty by Husband or his Relatives vs State/UT",
xaxis_title="Name of State/UT", yaxis_title="Cruelty by Husband or his Relatives" ,
plot_bgcolor='white' )
fig.data[0].marker.line.width = 3
fig.data[0].marker.line.color = "black"
st.plotly_chart(fig)
elif x=="Top 10 states with Highest number of Dowry Deaths":
state_total_d=state_total.groupby("STATE/UT").sum().sort_values(by="Dowry Deaths", ascending=False).head(10)
fig=go.Figure(data=[go.Bar(x=state_total_d.index,y=state_total_d["Dowry Deaths"], marker={'color':label10} )])
fig.update_layout(title="Dowry Deaths vs State/UT",xaxis_title="Name of State/UT", yaxis_title="Dowry Deaths" ,
plot_bgcolor='white' )
fig.data[0].marker.line.width = 3
fig.data[0].marker.line.color = "black"
st.plotly_chart(fig)
elif x=="Top 10 states with Highest number of Kidnapping and Abduction":
total_data_k=state_total.groupby("STATE/UT").sum().sort_values(by="Kidnapping and Abduction", ascending=False).head(10)
fig=go.Figure(data=[go.Bar(x=total_data_k.index,y=total_data_k["Kidnapping and Abduction"] ,marker={'color':label10} )])
fig.update_layout(title="Kidnapping and Abduction vs State/UT",xaxis_title="Name of State/UT", yaxis_title="Kidnapping and Abduction" ,
plot_bgcolor='white' )
fig.data[0].marker.line.width = 3
fig.data[0].marker.line.color = "black"
st.plotly_chart(fig)
st.write("Madhya Pradesh had the highest number of rapes between 2001 to 2014, lets see top 20 districts from Madhya Pradesh with highest Rapes.")
MP_data=data[data['STATE/UT']=="Madhya Pradesh"]
MP_data_District=MP_data.groupby("DISTRICT").sum().sort_values(by="Rape", ascending=False).head(20)
MP_data_District=pd.merge(MP_data_District,lit,on="DISTRICT", how="inner") # Adding Literacy of Madhya Pradesh's Districts
MP_data_District=pd.merge(MP_data_District,c11p,on="DISTRICT", how="inner") # Adding Population of Madhya Pradesh's Districts
fig=go.Figure(data=[go.Scatter(x=MP_data_District.DISTRICT,y=MP_data_District.Rape,
mode='markers',
marker=dict(size=(MP_data_District.Rape/25), color=label20 ))])
fig.update_layout(title="Top 20 Districts with Highest number of Rapes in Madhya Pradesh",xaxis_title="Name of Districts", yaxis_title="Number of Rapes" ,
plot_bgcolor='white' )
fig.data[0].marker.line.width = 3
fig.data[0].marker.line.color = "black"
st.plotly_chart(fig)
st.subheader("Now We gonna find Districts (with State, Population and Literacy) those have the highest number of rapes in Entire India")
st.write("Hint: Hover-over the bars to see State, Population, Literacy and other infos." )
val=st.slider("Slide to see top N Districts",5,20,15)
District_Total__= data.groupby("DISTRICT").sum().sort_values(by="Rape", ascending=False).reset_index().head(val+2)
District_Total__.drop(0,axis=0,inplace=True)
#Lets add Population and Literacy to every individual District
District_Total__=pd.merge(District_Total__,c11p, on = "DISTRICT", how="left")
District_Total__=pd.merge(District_Total__,lit, on = "DISTRICT", how="left")
District_Total__.drop(2, inplace=True)
District_Total__.drop("Year", axis=1, inplace=True)
#---------#
District_Total= data.groupby("DISTRICT").sum().sort_values(by="Rape", ascending=False).reset_index().head(22)
District_Total.drop(0,axis=0,inplace=True)
District_Total=pd.merge(District_Total,c11p, on = "DISTRICT", how="left")
District_Total=pd.merge(District_Total,lit, on = "DISTRICT", how="left")
District_Total.drop(2, inplace=True)
District_Total.drop("Year", axis=1, inplace=True)
fig= px.bar(District_Total__,x="DISTRICT",y=District_Total__["Rape"],
color=District_Total__["Rape"],
hover_data=["State","Population","Literacy"],
)
fig.update_layout(
xaxis_title="Name of Districts",
yaxis_title="Number of Rapes" ,
plot_bgcolor='white', height= 600, width=700
)
fig.data[0].marker.line.width = 3
fig.data[0].marker.line.color = "black"
st.plotly_chart(fig)
st.subheader("Line graph for India's top Rape Reported district to see if there any relation between Rape and Literacy.")
st.write("Values are normalized using MinMax method, you can read about MinMax normalization on the Internet")
temp_District_Total=District_Total
District_TotalMM = pd.DataFrame(scaler.fit_transform(District_Total[["Literacy","Population","Rape"]]), columns=["Literacy","Population","Rape"])
District_TotalMM.set_index(District_Total.index, inplace = True)
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=temp_District_Total.DISTRICT, y=District_TotalMM.Rape,
mode='lines+markers',
name='Rape',
hovertext="Rape"))
fig.add_trace(go.Scatter(x=temp_District_Total.DISTRICT, y=District_TotalMM.Literacy,
mode='lines+markers',
name='Literacy'))
# Edit the layout
fig.update_layout(title='',
xaxis_title='Districts Name',
yaxis_title='Normalized values in between 0 and 1',
plot_bgcolor='white'
)
st.plotly_chart(fig)
st.subheader("Line graph for India's top Rape Reported district to see if there any relation between Rape and Population.")
st.write("Values are normalized using MinMax method, you can read about MinMax normalization on the Internet")
fig = go.Figure()
fig.add_trace(go.Scatter(x=temp_District_Total.DISTRICT, y=District_TotalMM.Rape,
mode='lines',
name='Rape',
hovertext="Rape"))
fig.add_trace(go.Scatter(x=temp_District_Total.DISTRICT, y=District_TotalMM.Population,
mode='lines',
name='Population'))
# Edit the layout
fig.update_layout(title='',
xaxis_title='Districts Name',
yaxis_title='Normalized values in between 0 and 1', plot_bgcolor='white',)
st.plotly_chart(fig)
st.subheader("3D Scatter Plot for top 20 Rape Reported District!")
fig = px.scatter_3d(District_Total, x='Rape', y='Literacy', z='Population',
color='DISTRICT', symbol="State", )
fig.update_layout(plot_bgcolor='white', paper_bgcolor='rgba(0,0,0,0)', height=600, width=900)
st.plotly_chart(fig)
st.write("Hi there, if you have come so far, it shows your love for exploring things,\
this whole project is made using four open-source libraries pandas, numpy, \
plotly and streamlit.")
st.subheader("Via - Satyampd(Username for Github, Kaggle and LinkedIn)")
if __name__ == "__main__":
app()