Skip to content

Commit

Permalink
added my notebooks and additions to utils.py
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurcol committed Feb 16, 2022
1 parent 29a7727 commit 885fd77
Show file tree
Hide file tree
Showing 7 changed files with 7,125 additions and 5 deletions.
123 changes: 118 additions & 5 deletions evalstudent/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import pandas as pd
pd.options.mode.chained_assignment = None # Disabling pandas warnings triggered by display_classes
from IPython.core.display import HTML
Expand All @@ -20,15 +21,15 @@ def display_classes(essay_id, train_df):
Prints the essay text (keeping its exact original formatting) using colors to highlight discourse elements and their classes.
Uses only `predictionstring`, which is useful to display models predictions.
'''

# Handling submission format :
discourse_type = "class" if "discourse_type" not in train_df.columns else "discourse_type"

elements_df = train_df[train_df["id"] == essay_id]
essay_text = open(f'../../raw_data/train/{essay_id}.txt').read()
essay_words = essay_text.split()
formatted_essay = ""

# First we make sure discourse elements are in the text order
elements_df["prediction_list"] = elements_df["predictionstring"].map(lambda x : x.split())
elements_df["start_word_index"] = elements_df["prediction_list"].map(lambda x : int(x[0]))
Expand All @@ -38,7 +39,7 @@ def display_classes(essay_id, train_df):
# and then we highlight the exact part of the essay corresponding to the discourse class.
end_char = 0
for i, element in elements_df.iterrows():
start_word = essay_words[element["start_word_index"]]
start_word = essay_words[element["start_word_index"]]
start_char = essay_text[end_char:].find(start_word) + len(essay_text[:end_char])
formatted_essay += essay_text[end_char:start_char]
for word_index in element["prediction_list"]:
Expand Down Expand Up @@ -68,4 +69,116 @@ def generate_predictionstring(discourse_start, discourse_end, essay_text):
word_end = word_start + len(essay_text[char_start:char_end].split())
word_end = min( word_end, len(essay_text.split()) )
predictionstring = " ".join( [str(x) for x in range(word_start,word_end)] )
return predictionstring
return predictionstring

## ADD ARTHUR ##

def get_essay(id,mode='train'):
"""Function to get the full text of an essay from the .txt file.
Args:
id_ (str): id of the essay
mode (str, optional): determines whether to access *train* or *test* texts. \
Defaults to 'train'.
Returns:
str: Returns the full text of the id
"""
with open(os.path.join(os.path.dirname(os.path.dirname(__file__)),
'raw_data',
mode,
f'{id}.txt'),'r') as file:
txt = file.read()
return txt

def slicering(ps,txt):
"""
Allow for predictionstring to match with corresponding words of an essay.
Given a predictionstring of a portion of a text and the full text, the
function returns the portion of the text corresponding to the predictionstring.
Args:
ps (str): predictionstring of a discourse
txt (str): full text of an essay
Returns:
str: portion of the text corresponding to the predictionstring
"""
ps_l = ps.split()
txt = txt.split()

return ' '.join(txt[int(ps_l[0]):int(ps_l[-1])+1])


def css():
"""
Apply custom.css into the notebook
Returns:
str: HTML style tag
"""
styles = open("./styles/custom.css", "r").read()
return HTML('<style>'+styles+'</style>')



def render_html(df):
"""
Transforms each discourse into a html string with appropriates tags for
visualization.
Args:
df (DataFrame): dataframe containing discourse_type and discourse_text
Returns:
str: html string
"""
if 'class' in df.keys():
class_='class'
else:
class_='discourse_type'

html = "<{0} style='padding: 2px'>{1} <strong> [{0}] </strong></{0}>"\
.format(df[class_],df['discourse_text'])

return html


def comparison_text(prediction, ground_truth):
"""
Allow for visual comparison of an essay with predicted classes vs the essay
with the true classes
Args:
prediction (str): essay with predicted classes in html formatting
ground_truth (str): essay with true classes in html formatting
Returns:
html: visual table
"""


html = f"""
<div class="content">
<span style="font-size:16px">Legend --></span>
<lead>Lead</lead>
<Position>Position</Position>
<Claim>Claim</Claim>
<Counterclaim>Counterclaim</Counterclaim>
<Rebuttal>Rebuttal</Rebuttal>
<Evidence>Evidence</Evidence>
<Concluding_Statement>Concluding_Statement</Concluding_Statement>
</div>
<div class="row">
<div class="column">
<h2 class="title">Prediction</h2>
<p style="text-align:justify">{prediction}</p>
</div>
<div class="column">
<h2 class="title">Ground Truth</h2>
<p style="text-align:justify">{ground_truth}</p>
</div>
</div>
"""
return HTML(html)
Loading

0 comments on commit 885fd77

Please sign in to comment.