-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
123 lines (97 loc) · 4.52 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import streamlit as st
# import tensorflow_hub as hub
import tensorflow as tf
# from tensorflow.keras.layers import TextVectorization
from sklearn.preprocessing import LabelEncoder
import spacy
from spacy.lang.en import English
@st.cache_resource
def model_prediction(abstract):
objective = ''
background = ''
method = ''
conclusion = ''
result = ''
nlp=English()
sentencizer=nlp.add_pipe("sentencizer")
doc=nlp(abstract)
abstract_lines=[str(sent) for sent in list(doc.sents)]
total_lines_in_sample = len(abstract_lines)
# Go through each line in abstract and create a list of dictionaries containing features for each line
sample_lines = []
for i, line in enumerate(abstract_lines):
sample_dict = {}
sample_dict["text"] = str(line)
sample_dict["line_number"] = i
sample_dict["total_lines"] = total_lines_in_sample
sample_lines.append(sample_dict)
def split_chars(text):
return " ".join(list(text))
# Get all line_number values from sample abstract
test_abstract_line_numbers = [line["line_number"] for line in sample_lines]
# One-hot encode to same depth as training data, so model accepts right input shape
test_abstract_line_numbers_one_hot = tf.one_hot(test_abstract_line_numbers, depth=15)
test_abstract_total_lines = [line["total_lines"] for line in sample_lines]
# One-hot encode to same depth as training data, so model accepts right input shape
test_abstract_total_lines_one_hot = tf.one_hot(test_abstract_total_lines, depth=20)
# Split abstract lines into characters
abstract_chars = [split_chars(sentence) for sentence in abstract_lines]
model_path = "saved_model.pb"
loaded_model = tf.keras.models.load_model(model_path)
test_abstract_pred_probs = loaded_model.predict(x=(test_abstract_line_numbers_one_hot,
test_abstract_total_lines_one_hot,
tf.constant(abstract_lines),
tf.constant(abstract_chars)))
test_abstract_preds = tf.argmax(test_abstract_pred_probs, axis=1)
label_encoder=['BACKGROUND', 'CONCLUSIONS', 'METHODS', 'OBJECTIVE', 'RESULTS']
pred = [label_encoder[i] for i in test_abstract_preds]
# lines, pred = make_skimlit_predictions(abstract, model, tokenizer, label_encoder)
# pred, lines = make_predictions(abstract)
for i, line in enumerate(abstract_lines):
if pred[i] == 'OBJECTIVE':
objective = objective + line
elif pred[i] == 'BACKGROUND':
background = background + line
elif pred[i] == 'METHODS':
method = method + line
elif pred[i] == 'RESULTS':
result = result + line
elif pred[i] == 'CONCLUSIONS':
conclusion = conclusion + line
return objective, background, method, conclusion, result
def main():
st.set_page_config(
page_title="SkimLit",
page_icon="📄",
layout="wide",
initial_sidebar_state="expanded"
)
st.title('SkimLit📄')
st.caption('### An NLP model to classify abstract sentences into the role they play (e.g. objective, methods, results, etc..) to enable researchers to skim through the literature and dive deeper when necessary.')
# creating model, tokenizer and labelEncoder
# cnt = 0
# if cnt == 0:
# skimlit_model, tokenizer, label_encoder = create_utils(MODEL_PATH, TOKENIZER_PATH, LABEL_ENOCDER_PATH, EMBEDDING_FILE_PATH)
# cnt = 1
col1, col2 = st.columns(2)
with col1:
st.write('#### Enter the Abstract Here !!')
abstract = st.text_area(label='', height=400)
predict = st.button('Extract !')
# make prediction button logic
if predict:
with st.spinner('Wait for prediction....'):
objective, background, methods, conclusion, result = model_prediction(abstract)
with col2:
st.markdown(f'### Objective : ')
st.write(f'{objective}')
st.markdown(f'### Background : ')
st.write(f'{background}')
st.markdown(f'### Methods : ')
st.write(f'{methods}')
st.markdown(f'### Result : ')
st.write(f'{result}')
st.markdown(f'### Conclusion : ')
st.write(f'{conclusion}')
if __name__=='__main__':
main()