-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPlotting.py
355 lines (303 loc) · 9.76 KB
/
Plotting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 23 09:51:38 2019
@author: jessedesimone
"""
#plotting with seaborn and matplotlib
#%%
#load packages
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
#%%
#change working directory
os.chdir()
#list working directory
os.listdir()
#%%
#plotting parameters
#load plotting style
#see style gallery (https://tonysyu.github.io/raw_content/matplotlib-style-gallery/gallery.html)
plt.style.use('seaborn-colorblind')
sns.set(style="whitegrid") #grid color
#set plot context to paper, notebook, talk, poster
sns.set_context("paper", font_scale=1)
#adjust figure size
plt.figure(figsize=(20,5))
#remove grid lines from plot
sns.set_style("whitegrid", {'axes.grid' : False})
#%%
#load dataset from seaborn package
df = sns.load_dataset('iris')
#df.head(10)
#tips = sns.load_dataset("tips")
#tips.head(10)
#%%
#read data from mac using pandas
df = pd.read_csv('/Users/jessedesimone/Documents/Python/DeSimone_Py_scripts/Datasets/IRIS.csv')
df.head(10) #header data
#%%
#check if any missing values
df.info()
#%%
#get counts of variables
print(df['species'].value_counts())
#%%
#get data summary for variables and figure
descript=df.describe()
print(descript)
#%%
df.describe().plot(kind = "area",fontsize=10, figsize=(10,5), table = True ,colormap="rainbow")
#plt.xlabel('Statistics',)
plt.ylabel('Value')
plt.title("General Statistics of Iris Dataset")
plt.savefig("descript.png")
#%%
#count plot and pie chart
plt.figure(figsize=(10,10))
plt.subplot(1,2,1)
sns.countplot('species',data=df)
#pie chart
plt.subplot(1,2,2)
df['species'].value_counts().plot.pie(explode=[0.03,0.03,0.03],autopct='%1.1f%%',shadow=True)
#%%
#pie chart
#f,ax=plt.subplots(1,2,figsize=(18,8))
df['species'].value_counts().plot.pie(explode=[0.03,0.03,0.03],autopct='%1.1f%%',shadow=True,figsize=(8,6))
#df['species'].value_counts().plot.pie(explode=[0.1,0.1,0.1],autopct='%1.1f%%',ax=ax[0],shadow=True)
#ax[0].set_title('Iris Species Count')
#ax[0].set_ylabel('Count')
#sns.countplot('Species',data=df,ax=ax[1])
#ax[1].set_title('Iris Species Count')
plt.show()
#%%
#basic histogram 1 variable
plt.figure(figsize=(5,5))
sns.distplot( df["sepal_length"] ,kde=False, color="skyblue", label="Sepal Length", rug=True)
#%%
#all variables
df.hist(edgecolor='black', linewidth=1, color='red')
fig=plt.gcf()
fig.set_size_inches(12,6)
#%%
#basic histogram 2 variables
plt.figure(figsize=(8,5))
sns.set_style("whitegrid", {'axes.grid' : False})
sns.axes_style("whitegrid")
sns.distplot( df["sepal_length"] ,kde=False, color="skyblue", label="Sepal Length")
sns.distplot( df["sepal_width"] ,kde=False, color="red", label="Sepal Width")
#%%
#joint scatter and histogram
sns.jointplot("sepal_length", "sepal_width", data=df, kind="reg", color='black')
sns.jointplot("sepal_length", "sepal_width", data=df, kind="kde")
g = (sns.jointplot("sepal_length", "sepal_width",data=df, color="k").plot_joint(sns.kdeplot, zorder=0, n_levels=6))
#%%
#facet grid
sns.FacetGrid(df,hue='species',size=5)\
.map(plt.scatter,'sepal_length','sepal_width')\
.add_legend()
#%%
#boxplot or whisker plot
#one variable
fig=plt.gcf()
fig.set_size_inches(10,5)
fig=sns.boxplot(x='species',y='petal_length',
data=df,order=['Iris-virginica','Iris-versicolor','Iris-setosa'],
linewidth=2.5,orient='v',dodge=False)
#all variables
df.boxplot(by="species", figsize=(12, 6)) #all
#%%
#boxplot plus facet grid histogram
sns.boxplot(x = tips["sex"], y = tips["tip"]).set_title("Male/Female Tips")
g = sns.FacetGrid(tips, row = "sex") #first parameter = function
#g.map allows for groups to be plotted on separate axes
g = g.map(plt.hist, "tip") #second parameter = plotting variable
plt.show()
#%%
#scatterplot with matplot
df.plot(kind='scatter',x="sepal_length",y="sepal_width",color='red',label='flower', linewidth=3, alpha=0.9,
grid=True, linestyle='-.', figsize=(10,8))
plt.legend(loc='upper right')
plt.xlabel('Length (mm)')
plt.ylabel('Width (mm)')
plt.title('Speal length vs width')
plt.show()
#%%
#scatterplot with seaborn
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
#%%
#two-factor scatterplot
plt.figure(figsize=(10,8))
ax = sns.scatterplot(x="total_bill", y="tip",
data=tips)
#%%
#three-factor scatterplot
tips = sns.load_dataset("tips")
ax = sns.scatterplot(x="total_bill", y="tip", hue="time",
data=tips)
#%%
#alter size of grouping variable
sns.scatterplot(x="total_bill", y="tip", size="sex", data=tips)
#%%
#alter size and color of grouping variable
sns.scatterplot(x="total_bill", y="tip", hue="sex", size="sex", data=tips)
#%%
#change the min and max point sizes and colors in legend
cmap = sns.cubehelix_palette(dark=0, light=1, as_cmap=True)
sns.scatterplot(x="total_bill", y="tip", hue="day", size="day",
cmap=cmap, sizes=(20,200), data=tips)
#%%
sns.relplot(x="total_bill", y="tip", hue="day", size="day",
palette="muted", sizes=(30,300), data=tips)
#%%
sns.relplot(x="total_bill", y="tip", hue="day", col="time", data=tips)
#%%
g = sns.relplot(x="total_bill", y="tip", hue="day",row="sex", col="time", data=tips)
#%%
#four-factor scatterplot
tips = sns.load_dataset("tips")
plt.style.use('seaborn-colorblind')
ax = sns.scatterplot(x="total_bill", y="tip", hue="day", style="time",
data=tips)
#%%
#use specific set of markers
markers = {"Lunch": "s", "Dinner": "X"}
sns.scatterplot(x="total_bill", y="tip", style="time", markers=markers, data=tips)
#%%
#strip plot
fig=plt.gcf()
fig.set_size_inches(8,7)
fig=sns.stripplot(x='species',y='sepal_length',data=df,jitter=True,
edgecolor='gray',size=8,palette='winter',orient='v')
#%%
#combined box and strip plot
fig=plt.gcf()
fig.set_size_inches(10,7)
fig=sns.boxplot(x='species',y='sepal_length',data=df)
fig=sns.stripplot(x='species',y='sepal_length',data=df,jitter=True, size=8,edgecolor='', color='black')
#%%
ax= sns.boxplot(x="species", y="petal_length", data=df)
ax= sns.stripplot(x="species", y="petal_length", data=df, size=4, jitter=True, edgecolor="gray")
boxtwo = ax.artists[2]
boxtwo.set_facecolor('yellow')
boxtwo.set_edgecolor('black')
boxthree=ax.artists[1]
boxthree.set_facecolor('red')
boxthree.set_edgecolor('black')
boxthree=ax.artists[0]
boxthree.set_facecolor('green')
boxthree.set_edgecolor('black')
plt.show()
#%%
#violin plot
fig=plt.gcf()
fig.set_size_inches(10,7)
fig=sns.violinplot(x='species',y='sepal_length',data=df)
#%%
plt.figure(figsize=(15,10))
plt.subplot(2,2,1)
sns.violinplot(x='species',y='petal_length',data=df)
plt.subplot(2,2,2)
sns.violinplot(x='species',y='petal_width',data=df)
plt.subplot(2,2,3)
sns.violinplot(x='species',y='sepal_length',data=df)
plt.subplot(2,2,4)
sns.violinplot(x='species',y='sepal_width',data=df)
#%%
#pair plots
sns.pairplot(data=df,kind='scatter')
sns.pairplot(df,hue='species')
#%%
#correlation & heat map
df.corr() #correlation values to dataframe
fig=plt.gcf()
fig.set_size_inches(10,7)
fig=sns.heatmap(df.corr(),annot=True,cmap='YlGnBu',
linewidths=1,linecolor='k',square=True,mask=False,
vmin=-1, vmax=1,cbar_kws={"orientation": "vertical"},cbar=True)
#%%
#correlation & heat map
df.corr() #correlation values to dataframe
f, ax= plt.subplots(figsize=(10,10))
sns.heatmap(df.corr(),annot=True, linewidths=1.5,fmt=".1f",ax=ax )
plt.show()
#%%
#swarm plot
sns.set(style="whitegrid")
fig=plt.gcf()
fig.set_size_inches(10,7)
fig = sns.swarmplot(x="species", y="petal_length", data=df)
#%%
#box and swarm plot combines
sns.set(style="darkgrid")
fig=plt.gcf()
fig.set_size_inches(8,5)
fig= sns.boxplot(x="species", y="petal_length", data=df)
fig= sns.swarmplot(x="species", y="petal_length", data=df, color=".2")
#%%
#combined swarm and violin plot
sns.set(style="whitegrid")
fig=plt.gcf()
fig.set_size_inches(10,7)
ax = sns.violinplot(x="species", y="petal_length", data=df, inner=None)
ax = sns.swarmplot(x="species", y="petal_length", data=df,color="white", edgecolor="black")
#%%
#LM plot
sns.set(style="whitegrid")
fig=sns.lmplot(x="petal_length", y="petal_width", hue="species",data=df)
plt.xlabel('petal width (cm)')
#%%
#LM plot facet grid
g = sns.lmplot(x="size", y="total_bill", hue="day", col="day", data=tips, height=6, aspect=.4, x_jitter=.1)
#%%
#https://seaborn.pydata.org/generated/seaborn.lineplot.html
#fMRI time series plot
import seaborn as sns
sns.set(style="darkgrid")
#%%
#load data
fmri = sns.load_dataset("fmri")
#%%
#extract data for a single subject
fmri1 = fmri[fmri['subject'].str.contains("s0")]
#print(fmri1)
#create dataframe with only the years between 2010-2015
fmri1=fmri1[["timepoint","event","region","signal"]]
#print(fmri1)
# Plot the responses for different events and regions for one subject
sns.lineplot(x="timepoint", y="signal",
hue="region", style="event",
data=fmri1)
#%%
#alternate option for single subject
fmris0=fmri[fmri.subject == 's0']
fmris0
sns.lineplot(x="timepoint", y="signal",
hue="region", style="event",
data=fmris0)
plt.title('s0 fMRI signal')
#%%
# Plot the signal grouped by region and styled by event
sns.lineplot(x="timepoint", y="signal",
hue="region", style="event",
data=fmri)
#%%
#Plot the signal grouped by event and styled by region
sns.lineplot(x="timepoint", y="signal",
hue="event", style="region",
data=fmri)
#%%
#plot separate facet grids for each subject
g = sns.relplot(x="timepoint", y="signal", hue="event", style="event",
col="subject", col_wrap=4,height=4, aspect=0.7, kind="line", data=fmri)
#%%
#plot individual data for all subs frontal signal
sns.lineplot(x="timepoint", y="signal", hue="event",
units='subject', estimator=None, lw=1.5,
data=fmri.query("region == 'frontal'"))
plt.xlabel('Time (ms)')
plt.ylabel('BOLD signal')
#%%