-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdataset_typing.py
88 lines (73 loc) · 2.18 KB
/
dataset_typing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from typing import List, Dict, NamedTuple, Optional, Any
## Material and quantity data type
class MatQuant(NamedTuple):
class Amount(NamedTuple):
value: float
unit: str
amount: List[Amount]
material: str
## Sentence-level data types
class Sentence(NamedTuple):
class ProcedureStep(NamedTuple):
class SentCondition(NamedTuple):
max: float
min: float
tok_ids: List[int]
values: List[float]
unit: str
env_ids: Optional[List[List[int]]]
env_toks: Optional[List[str]]
op_id: int
op_token: str
op_type: str
ref_op: bool
subject: str
subsent: List[int]
temp_values: Optional[SentCondition]
time_values: Optional[SentCondition]
all_materials: List[MatQuant]
other_materials: List[str]
precursors: List[MatQuant]
target: List[str]
procedure_graph: List[ProcedureStep]
## Paragraph-level data types
class Paragraph(NamedTuple):
class SynthAction(NamedTuple):
class ParaCondition(NamedTuple):
class ParaValue(NamedTuple):
max_value: float
min_value: float
values: List[float]
unit: str
temperature: Optional[ParaValue]
time: Optional[ParaValue]
conditions: ParaCondition
string: str
type: str
class MorphInfo(NamedTuple):
descriptors: List[str]
measurements: List[str]
morphologies: List[str]
sizes: List[str]
units: List[str]
class MorphNER(NamedTuple):
annotation: str
end: int
start: int
text: str
_id: str
contains_recipe: bool
contains_characterization: bool
materials_and_quantities: Optional[MatQuant]
morphological_information: Optional[MorphInfo]
morphology_ner_tokens: Optional[List[MorphNER]]
seed_mediated: Optional[bool]
sentences: Optional[List[Sentence]]
synth_actions: Optional[List[SynthAction]]
text: str
## Paper-level data types
class Paper(NamedTuple):
doi: str
publication_year: int
times_referenced: int
paragraphs: List[Paragraph]