Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
nulberry authored Mar 14, 2022
2 parents 88f75c6 + fb8c530 commit 050aa30
Show file tree
Hide file tree
Showing 11 changed files with 699 additions and 3 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ crate/blobs
crate/repos

# test
sample.jsonl
example_test.py
example_data/*
!example_data/dp/mlruns/multi*.pkl
Expand Down
26 changes: 26 additions & 0 deletions data-programming/MLproject
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,29 @@ entry_points:
--verbose {verbose}
{train_data}
"
framing:
parameters:
train_data: path
task: {type: string, default: multiclass}
dev_data: {type: int, default: 0}
n_epochs: {type: int, default: 1000}
optimizer: {type: string, default: sgd}
prec_init: {type: float, default: 0.7}
seed: {type: int, default: 0}
parallel: {type: int, default: 0}
device: {type: string, default: cpu}
trld: {type: float, default: 0.5}
encoder: {type: str, default: roberta}
command: "python ./label/example.py
--task {task}
--dev_data {dev_data}
--n_epochs {n_epochs}
--optimizer {optimizer}
--prec_init {prec_init}
--seed {seed}
--parallel {parallel}
--device {device}
--trld {trld}
--encoder {encoder}
{train_data}
"
24 changes: 24 additions & 0 deletions data-programming/label/framing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os

from label import run, parser
from label.lfs import FramingLabels
from label.lfs.framing import get_lfs

REGISTERED_MODEL_NAME = 'FramingLabelModel'
LF_FEATURES = {
'txt_clean_roberta': None,
'txt_clean_use': None,
}
DEV_ANNOTATIONS_PATH = os.path.join('/annotations', 'framing', 'gold_df.pkl')


def main():
parser.add_argument('--trld', default=0.5, type=float, help='cosine similarity threshold')
parser.add_argument('--encoder', default='roberta', choices=('roberta', 'use'), type=str,
help='which encoder embeddings to use')
parsed_args = parser.parse_args()
run.start(REGISTERED_MODEL_NAME, LF_FEATURES, DEV_ANNOTATIONS_PATH, get_lfs, FramingLabels, parsed_args)


if __name__ == '__main__':
main()
13 changes: 13 additions & 0 deletions data-programming/label/lfs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,16 @@ class ExampleLabels(Enum):
bird = 2
horse = 3
snake = 4


class FramingLabels(Enum):
settled_science = 0
uncertain_science = 1
political_or_ideological_struggle = 2
disaster = 3
opportunity = 4
economic = 5
morality_and_ethics = 6
role_of_science = 7
security = 8
health = 9
45 changes: 45 additions & 0 deletions data-programming/label/lfs/framing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import logging
from snorkel.labeling import LabelingFunction
import pandas as pd
from label.lfs import FramingLabels, ABSTAIN
from label import DATABASE_IP
from scipy.spatial.distance import cdist

FRAME_ELEMENT_QUERY = """
SELECT * FROM frame_elements;
"""
TRLD = 0.5


def get_lfs(parsed_args) -> [LabelingFunction]:
"""
This function creates and returns a list of all lfs in this module
:return: A list of LabelingFunctions defined in this module
"""
global TRLD
TRLD = parsed_args.trld
lfs = []
element_lfs = []
frame_elements_df = pd.read_sql(FRAME_ELEMENT_QUERY, DATABASE_IP)
for label in FramingLabels:
element_lfs = element_lfs + [make_element_lf(row.element_id, getattr(row, parsed_args.encoder),
'txt_clean_{}'.format(parsed_args.encoder), label)
for row in frame_elements_df.itertuples(index=False)]
lfs = lfs + element_lfs
logging.info("LFs have been gathered.")
return lfs


def frame_element_similarity(x, element, encoder, label) -> int:
distances = cdist([element], x[encoder], 'cosine')[0]
smallest = min(distances)
similarity = 1 - smallest
return label.value if similarity >= TRLD else ABSTAIN


def make_element_lf(element_id: str, element, encoder: str, label) -> LabelingFunction:
return LabelingFunction(
name=element_id,
f=frame_element_similarity,
resources=dict(element=element, encoder=encoder, label=label)
)
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ services:
- DATABASE_IP=${DATABASE_IP}
- MULTICLASS_EXAMPLE_MODEL_PATH=${MULTICLASS_EXAMPLE_MODEL_PATH}
- MULTILABEL_EXAMPLE_MODEL_PATH=${MULTILABEL_EXAMPLE_MODEL_PATH}
- CLIMATE_FRAMES_MODEL_PATH=${CLIMATE_FRAMES_MODEL_PATH}

networks:
ls_network:
Expand Down
16 changes: 16 additions & 0 deletions label-studio/config/framing_config.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<View>
<Text name="text" value="$txt"/>
<Choices name="frame" toName="txt" choice="single">
<Choice value="settled_science"/>
<Choice value="uncertain_science"/>
<Choice value="political_or_ideological_struggle"/>
<Choice value="disaster"/>
<Choice value="opportunity"/>
<Choice value="economic"/>
<Choice value="morality_and_ethics"/>
<Choice value="role_of_science"/>
<Choice value="security"/>
<Choice value="health"/>
</Choices>
</View>

4 changes: 3 additions & 1 deletion label-studio/ls/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

LABEL_PREFIX = 'worker_'
CLASSIFICATION_TASKS = {
'example': ('cat', 'dog', 'bird', 'horse', 'snake')
'example': ('cat', 'dog', 'bird', 'horse', 'snake'),
'framing': ("settled_science", "uncertain_science", "political_or_ideological_struggle", "disaster", "opportunity",
"economic", "morality_and_ethics", "role_of_science", "security", "health")
}

for task in CLASSIFICATION_TASKS:
Expand Down
84 changes: 82 additions & 2 deletions modelling/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,33 @@ class ExampleModelResponse(BaseModel):
prob_horse: List[float]
prob_snake: List[float]

# Try to load each model from the location given in the .env file. Don't interfere with the startup if one can't load,

class FramingModelResponse(BaseModel):
table: List[str]
id: Set[str]
settled_science: List[int]
uncertain_science: List[int]
political_or_ideological_struggle: List[int]
disaster: List[int]
opportunity: List[int]
economic: List[int]
morality_and_ethics: List[int]
role_of_science: List[int]
security: List[int]
health: List[int]
prob_settled_science: List[float]
prob_uncertain_science: List[float]
prob_political_or_ideological_struggle: List[float]
prob_disaster: List[float]
prob_opportunity: List[float]
prob_economic: List[float]
prob_morality_and_ethics: List[float]
prob_role_of_science: List[float]
prob_security: List[float]
prob_health: List[float]


# Try to load each model from the location given in the .env.dev file. Don't interfere with the startup if one can't load,
# just wait and see if the client tries to get a prediction or do something with the model that failed to load.
try:
with open(os.environ['MULTICLASS_EXAMPLE_MODEL_PATH'], 'rb') as infile:
Expand All @@ -68,10 +94,17 @@ class ExampleModelResponse(BaseModel):
except FileNotFoundError as err:
print("Could not load MultilabelExampleModel: {}".format(err))
multilabel_example_model = None
try:
with open(os.environ['CLIMATE_FRAMES_MODEL_PATH'], 'rb') as infile:
climate_frames_model = pickle.load(infile)
except FileNotFoundError as err:
print("Could not load ClimateFramesModel: {}".format(err))
climate_frames_model = None

loaded_models_dict = {
'Multiclass Example Model': multiclass_example_model is not None,
'Multilabel Example Model': multilabel_example_model is not None
'Multilabel Example Model': multilabel_example_model is not None,
'Climate Frames Model': climate_frames_model is not None
}


Expand Down Expand Up @@ -159,3 +192,50 @@ def predict_multilabel_example(data_point_item: DataPointItem):
}

return response_dict


@app.post("/predict_climate_frames", response_model=FramingModelResponse)
def predict_climate_frames(data_point_item: DataPointItem):
"""
Takes a list of data point ids from a table. Looks up the appropriate features,
then returns predictions as columns 'class_1' ... 'class_n' and binary
values indicating the presence/absence of the class in the prediction.
"""
try:
assert loaded_models_dict['Climate Frames Model']
except Exception as error:
raise ModelException(name="ClimateFramesModel", code="load")
json_data_point_item = jsonable_encoder(data_point_item)
id_json = json.dumps(json_data_point_item)
id_df = pd.read_json(id_json, dtype={'id': str, 'table': str})

try:
result_df = climate_frames_model.predict(id_df)
except Exception as error:
raise ModelException(name="ClimateFramesModel", code='predict', error=error)
response_dict = {
'table': result_df.table.tolist(),
'id': result_df.id.tolist(),
'settled_science': result_df.settled_science.tolist(),
'uncertain_science': result_df.uncertain_science.tolist(),
'political_or_ideological_struggle': result_df.political_or_ideological_struggle.tolist(),
'disaster': result_df.disaster.tolist(),
'opportunity': result_df.opportunity.tolist(),
'economic': result_df.economic.tolist(),
'morality_and_ethics': result_df.morality_and_ethics.tolist(),
'role_of_science': result_df.role_of_science(),
'security': result_df.security.tolist(),
'health': result_df.health.tolist(),
'prob_settled_science': result_df.prob_settled_science.tolist(),
'prob_uncertain_science': result_df.prob_uncertain_science.tolist(),
'prob_political_or_ideological_struggle': result_df.prob_political_or_ideological_struggle.tolist(),
'prob_disaster': result_df.prob_disaster.tolist(),
'prob_opportunity': result_df.prob_opportunity.tolist(),
'prob_economic': result_df.prob_economic.tolist(),
'prob_morality_and_ethics': result_df.prob_morality_and_ethics.tolist(),
'prob_role_of_science': result_df.prob_role_of_science.tolist(),
'prob_security': result_df.prob_security.tolist(),
'prob_health': result_df.prob_health.tolist()
}

return response_dict
Loading

0 comments on commit 050aa30

Please sign in to comment.