-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathapp.py
183 lines (163 loc) · 4.92 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import streamlit as st
import numpy as np
import pandas as pd
from scipy.cluster import hierarchy
import plotly.express as px
import importlib
from io import StringIO
import requests
if importlib.util.find_spec("pyodide") is not None:
from pyodide.http import open_url
st.title("Demo - Interactive Heatmap")
@st.cache(show_spinner=False)
def read_url(url:str, **kwargs):
"""Read the CSV content from a URL"""
# If pyodide is available
if importlib.util.find_spec("pyodide") is not None:
url_contents = open_url(url)
else:
r = requests.get(url)
url_contents = StringIO(r.text)
return pd.read_csv(
url_contents,
**kwargs
)
def plot(
counts:pd.DataFrame,
top_n=1000,
norm="none",
method="average",
metric="euclidean"
):
# Make a list of messages to display after the plot
msgs = []
# Normalize the raw input values
if norm == "prop":
msgs.append("Values normalized to the proportion of each column")
counts = counts / counts.sum()
elif norm == "CLR":
msgs.append("Values transformed to the centered-log-transform of each column")
counts = counts.applymap(np.log10)
gmean = counts.apply(lambda c: c[c > -np.inf].mean())
counts = counts / gmean
counts = counts.clip(lower=counts.apply(lambda c: c[c > -np.inf].min()).min())
# Filter by top_n
counts = counts.reindex(
index=counts.sum(
axis=1
).sort_values(
ascending=False
).head(
int(top_n)
).index.values
)
# Order the rows and columns
counts = counts.reindex(
index=get_index_order(counts, method=method, metric=metric),
columns=get_index_order(counts.T, method=method, metric=metric),
)
# Make the plot
fig = px.imshow(
counts,
color_continuous_scale='RdBu_r',
aspect="auto",
labels=dict(
color=dict(
none="counts",
prop="proportion"
).get(norm, norm),
x="sample"
)
)
# Display the plot
st.plotly_chart(fig)
# Print the messages below the plot
for msg in msgs:
st.text(msg)
def get_index_order(counts, method=None, metric=None):
"""Perform linkage clustering and return the ordered index."""
return counts.index.values[
hierarchy.leaves_list(
hierarchy.linkage(
counts.values,
method=method,
metric=metric
)
)
]
def run():
"""Primary entrypoint."""
# Read the counts specified by the user
counts = read_url(
st.sidebar.text_input(
"Counts Table",
value="https://raw.githubusercontent.com/BRITE-REU/programming-workshops/master/source/workshops/02_R/files/airway_scaledcounts.csv",
help="Read the abundance values from a CSV (URL) which contains a header row and index column"
),
index_col=0
)
# Render the plot
plot(
counts,
top_n=st.sidebar.number_input(
"Show top N rows",
help="Only the subset of rows will be shown which have the highest average values",
min_value=1000,
max_value=counts.shape[0]
),
norm=st.sidebar.selectbox(
"Normalize values by",
help="The raw values in the table can be normalized by the proportion of each column, or by calculating the centered log transform",
index=2,
options=[
"none",
"prop",
"CLR"
]
),
method=st.sidebar.selectbox(
"Ordering - method",
help="The order of rows will be set by linkage clustering using this method",
index=6,
options=[
"average",
"complete",
"single",
"weighted",
"centroid",
"median",
"ward"
]
),
metric=st.sidebar.selectbox(
"Ordering - metric",
help="The order of rows will be set by linkage clustering using this distance metric",
index=7,
options=[
"braycurtis",
"canberra",
"chebyshev",
"cityblock",
"correlation",
"cosine",
"dice",
"euclidean",
"hamming",
"jaccard",
"jensenshannon",
"kulczynski1",
"mahalanobis",
"matching",
"minkowski",
"rogerstanimoto",
"russellrao",
"seuclidean",
"sokalmichener",
"sokalsneath",
"sqeuclidean",
"yule"
]
),
)
if __name__ == "__main__":
run()