-
Notifications
You must be signed in to change notification settings - Fork 59
/
Copy pathvalidate_module_specification.py
281 lines (235 loc) · 12.5 KB
/
validate_module_specification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
# This module provides a single function that validates a module YAML file
# and normalizes some fields like setting a missing 'min' on a multiple-choice
# question to 0.
from collections import OrderedDict
import re
import json
import rtyaml
from guidedmodules.module_logic import render_content
class ValidationError(ValueError):
def __init__(self, context, message):
self.context = context
self.message = message
def __str__(self):
return self.context + ": " + self.message
def validate_module(spec, app, is_authoring_tool=False):
# The module must have a title.
for field in ("title",):
if field not in spec:
raise ValidationError("module specification", "Missing '%s' field." % field)
# Validate that the introduction and output documents are renderable.
# If they refer to external files, slurp in the content of those files.
if "introduction" in spec:
spec["introduction"] = validate_document(spec["introduction"], "module introduction", app)
if not isinstance(spec.get("output", []), list):
raise ValidationError("module output", "Must be a list, not a %s." % type(spec.get("output")).__name__)
for i, doc in enumerate(spec.get("output", [])):
spec["output"][i] = validate_document(spec["output"][i], "output document #{}".format(i+1), app)
# 'introduction' fields are an alias for an interstitial
# question that comes before all other questions, and since
# it is first it will be asked first. Except in projects,
# where it's just a renderable field.
if "introduction" in spec and spec.get("type") != "project":
q = {
"id": "_introduction",
"title": "Module Introduction",
"type": "interstitial",
"prompt": spec["introduction"]["template"],
}
spec.setdefault("questions", []).insert(0, q)
# Validate an app protocol.
if "protocol" in spec:
if spec.get("type") != "project":
raise ValidationError("module specification", "A protocol cannot be specified in this type of module.")
if isinstance(spec["protocol"], str):
# If a single protocol is given, turn it into a list of one.
spec["protocol"] = [spec["protocol"]]
elif isinstance(spec["protocol"], list):
# If it's a list, the values must be strings.
for x in spec["protocol"]:
if not isinstance(x, str):
raise ValidationError("protocols", "Protocols must be strings (not %s)." % repr(x))
else:
raise ValidationError("module specification", "protocol must be a string or a list of strings (not %s)." % repr(spec["protocol"]))
# Validate the questions.
if not isinstance(spec.get("questions"), (type(None), list)):
raise ValidationError("module questions", "Invalid data type of value for 'questions'.")
for i, q in enumerate(spec.get("questions", [])):
spec["questions"][i] = validate_question(spec, spec["questions"][i])
return spec
def validate_document(doc, error_message_name, app):
# The document must be either a string which points to another
# file holding the document, or a dictionary. But the string
# form isn't available if we're validating a new spec submitted
# by the authoring tool since we don't have the app virtual
# filesystem at that point.
if app:
if not isinstance(doc, (str, dict)):
raise ValidationError(error_message_name, "Must be a file name or dictionary, not a %s." % type(doc).__name__)
else:
if not isinstance(doc, dict):
raise ValidationError(error_message_name, "Must be a dictionary, not a %s." % type(doc).__name__)
# If it's a string, slurp in the document from an external file.
# The document begins with YAML dictionary terminated by a line
# containing three dots. The subsequent content is stored in
# the dictionary's 'template' field. The file name is stored
# in the 'filename' field so that we can re-generate the original
# filesystem layout in Module::serialize_to_disk.
if isinstance(doc, str):
error_message_name += " ({})".format(doc)
# Read the external file.
blob = app.read_file(doc)
# Split the file on the first ocurrence of three dots. This
# is YAML's standard end-of-stream marker. But PyYAML doesn't
# have a way to read just up to the "...", so we handle that
# ourselves.
blob = blob.replace("\r\n", "\n") # fixes Windows compatibility issues
sep = "\n...\n"
if sep not in blob:
raise ValidationError(error_message_name, "File does not contain a line with just '...'.")
data, template = blob.split(sep, 1)
# Parse the YAML above the "...".
data = rtyaml.load(data)
# Trim the template so that it looks good if the revised
# module spec is serialized to YAML.
template = template.rstrip() + "\n"
# If the template format is json or yaml, then parse that format.
# Other template formats are loaded as strings.
try:
if data.get("format") == "json":
template = json.loads(template)
if data.get("format") == "yaml":
template = rtyaml.load(template)
except Exception as e:
raise ValidationError(error_message_name, "Invalid %s template syntax: %s" % (data["format"], str(e)))
# Store the filename and template in it.
data['filename'] = doc
data['template'] = template
doc = data
# Check that the template is valid.
try:
render_content(doc, None, "PARSE_ONLY", "(document template)")
except KeyError as e:
raise ValidationError(error_message_name, "Missing field: %s" % str(e))
except ValueError as e:
raise ValidationError(error_message_name, "Invalid template: %s" % str(e))
return doc
def validate_question(mspec, spec):
if not spec.get("id"):
raise ValidationError("module questions", "A question is missing an id.")
def invalid(msg):
raise ValidationError("question %s" % spec['id'], msg)
# clone dict before updating
spec = OrderedDict(spec)
# Since question IDs become Jinja2 identifiers, they must be valid
# Jinaj2 identifiers. http://jinja.pocoo.org/docs/2.9/api/#notes-on-identifiers
if not re.match("^[a-zA-Z_][a-zA-Z0-9_]*$", spec["id"]):
invalid("The question ID may only contain ASCII letters, numbers, and underscores, and the first character must be a letter or underscore.")
# Perform type conversions, validation, and fill in some defaults in the YAML
# schema so that the values are ready to use in the database.
if spec.get("type") == "multiple-choice":
# validate and type-convert min and max
spec["min"] = spec.get("min", 0)
if not isinstance(spec["min"], int) or spec["min"] < 0:
invalid("min must be a positive integer")
spec["max"] = None if ("max" not in spec) else spec["max"]
if spec["max"] is not None:
if not isinstance(spec["max"], int) or spec["max"] < 0:
invalid("max must be a positive integer")
if not isinstance(spec["max"], int) or spec["max"] < spec["min"]:
invalid("max must be an integer greater than or equal to min")
elif spec.get("type") == "datagrid":
# validate and type-convert min and max
spec["min"] = spec.get("min", 0)
if not isinstance(spec["min"], int) or spec["min"] < 0:
invalid("min must be a positive integer")
spec["max"] = None if ("max" not in spec) else spec["max"]
if spec["max"] is not None:
if not isinstance(spec["max"], int) or spec["max"] < 0:
invalid("max must be a positive integer")
if not isinstance(spec["max"], int) or spec["max"] < spec["min"]:
invalid("max must be an integer greater than or equal to min")
elif spec.get("type") in ("module", "module-set"):
if "module-id" in spec:
# Resolve the relative module ID to an absolute path relative
# to the root of this app. It's optional because a protocol
# can be specified instead.
spec["module-id"] = resolve_relative_module_id(mspec, spec.get("module-id"))
if "protocol" in spec:
if isinstance(spec["protocol"], str):
# If a single protocol is given, turn it into a list of one.
spec["protocol"] = [spec["protocol"]]
elif isinstance(spec["protocol"], list):
# If it's a list, the values must be strings.
for x in spec["protocol"]:
if not isinstance(x, str):
invalid("Protocol value must be a string (not %s)." % repr(x))
else:
invalid("Protocol must be either a string or a list of strings.")
if ("module-id" not in spec and "protocol" not in spec) or ("module-id" in spec and "protocol" in spec):
invalid("Question must have either a module-id or protocol field.")
elif spec.get("type") == None:
invalid("Question is missing a type.")
# Check that required fields are present.
if not isinstance(spec.get("title"), str):
invalid("Question title is missing or has an invalid data type (must be a string).")
if spec.get("prompt") is None:
# Prompts are optional in project and system modules but required elsewhere.
if mspec.get("type") not in ("project", "system-project"):
invalid("Question prompt is missing.")
# Check that the prompt, placeholder, and default are valid Jinja2 templates.
for field in ("prompt", "placeholder", "default"):
if field not in spec: continue
if not isinstance(spec.get(field), str):
invalid("Question %s must be a string, not a %s." % (field, type(spec.get(field)).__name__))
try:
render_content({
"format": "markdown",
"template": spec[field],
},
None, "PARSE_ONLY", "(question %s)" % field)
except ValueError as e:
invalid("Question %s is an invalid Jinja2 template: %s" % (field, e))
# Validate impute conditions.
imputes = spec.get("impute", [])
if not isinstance(imputes, list):
invalid("Impute's value must be a list.")
for i, rule in enumerate(imputes):
def invalid_rule(msg):
raise ValidationError(mspec['id'] + " question %s, impute condition %d" % (spec['id'], i+1), msg)
# Check that the condition is a string, and that it's a valid Jinja2 expression.
# The condition can be omitted (it means "always true").
from jinja2.sandbox import SandboxedEnvironment
env = SandboxedEnvironment()
if "condition" in rule:
if not isinstance(rule.get("condition"), str):
invalid_rule("Impute condition must be a string, not a %s." % type(rule["condition"]).__name__)
try:
env.compile_expression(rule["condition"])
except Exception as e:
invalid_rule("Impute condition %s is an invalid Jinja2 expression: %s." % (repr(rule["condition"]), str(e)))
# Check that the value is valid. If the value-mode is raw, which
# is the default, then any Python/YAML value is valid --- but it
# most be present. The "expression" mode requires a valid Jinja2
# expression, which we can check. The "template" mode requires
# a valid Jinaja2 template.
if "value" not in rule:
invalid_rule("Impute condition value is missing.")
if rule.get("value-mode") == "expression":
try:
env.compile_expression(rule["value"])
except Exception as e:
invalid_rule("Impute condition value %s is an invalid Jinja2 expression: %s." % (repr(rule["value"]), str(e)))
if rule.get("value-mode") == "template":
try:
env.from_string(rule["value"])
except Exception as e:
invalid_rule("Impute condition value %s is an invalid Jinja2 template: %s." % (repr(rule["value"]), str(e)))
return spec
def resolve_relative_module_id(within_module, module_id):
# Module IDs specified in the YAML are relative to the directory in which
# they are found. Unless they start with '/'.
# See modules.Module.getReferenceTo for the inverse function.
if module_id.startswith("/"):
return module_id[1:]
return "/".join(within_module["id"].split("/")[:-1] + [module_id])