Skip to content

Commit

Permalink
Extract types from XSD; use types to enforce maxlen (#322)
Browse files Browse the repository at this point in the history
* Extract types from XSD; use types to enforce maxlen

* Review feedback
  • Loading branch information
mdemare authored Jul 29, 2024
1 parent be13308 commit d4a71f7
Show file tree
Hide file tree
Showing 5 changed files with 272 additions and 19 deletions.
38 changes: 34 additions & 4 deletions dev/xsd_to_edn/main.clj
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
(if (:ref attrs) (assoc attrs :type (name-to-type (:ref attrs)))
attrs))

(defn -main [& args]
(defn process-xsd []
(let [d (clj-xml/parse-str (subs (slurp "resources/DUO_RIO_Beheren_OnderwijsOrganisatie_V4.xsd") 1)) ; remove BOM with subs
name-to-type (reduce
(fn [h {:keys [attrs]}] (assoc h (:name attrs) (:type attrs)))
Expand All @@ -78,13 +78,17 @@
(= "element" (name (:tag %))))
(:content d)))

;; Parse document, take children, select complexType elements, remove tag names from tuple with tag-name, attributes, children
;; Parse document, take children, select simpleType elements
st (map rest (filter #(= "simpleType" (first %))
(last (parse d))))

;; Parse document, take children, select complexType elements
ct (map rest (filter #(= "complexType" (first %))
(last (parse d))))

;; Index by name, remove requests and responses
entities (into {}
(filter (fn [[k v]]
(filter (fn [[k _v]]
(not (or (str/ends-with? k "_request")
(str/ends-with? k "_response"))))
(zipmap (map #(:name (first %)) ct)
Expand Down Expand Up @@ -114,4 +118,30 @@
with-kenmerken (map-hash (fn [[k v]]
[k (merge-kenmerken v (result (str "Kenmerkwaardenbereik_" k)))])
result)]
(pprint/pprint (select-keys with-kenmerken interesting-types))))
{:interesting-types interesting-types
:with-kenmerken with-kenmerken
:simple-types st}))

;; A constraint looks like:
;; ["maxLength" {:value "60"} nil]
(defn parse-constraint [[name {value :value}]]
{(keyword name) (cond-> value (#{"maxLength" "minLength"} name) Integer/parseInt)})

;; A simple-type looks like:
;;({:name "AangebodenOpleidingExterneIdentificatie-v01"}
;; [["restriction"
;; {:base "IdentificatiecodeType"}
;; [["maxLength" {:value "60"} nil]]]])
(defn- simple-type-reducer [h [tag [[_name attrs constraints]]]]
(let [restrictions (reduce merge {} (map parse-constraint constraints))]
(assoc h
(-> tag :name)
(merge (select-keys attrs [:base])
(when-not (empty? restrictions) {:restrictions restrictions})))))

(defn -main [kind & _args]
(let [{:keys [:interesting-types :with-kenmerken :simple-types]} (process-xsd)]
(pprint/pprint
(case kind
"schema" (select-keys with-kenmerken interesting-types) ; see resources/beheren-schema.edn
"types" (reduce simple-type-reducer {} simple-types))))) ; see resources/beheren-types.edn
3 changes: 2 additions & 1 deletion project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@
;; This will regenerate `src/nl/surf/eduhub_rio_mapper/enums.clj`
"generate-enums" ["run" "-m" "generate-enums.main"]
;; Simple prepopulated call to the raadplegen section of the rio test api. Spits out response body to STDOUT.
"beheren-edn" ["run" "-m" "xsd-to-edn.main"]
"beheren-edn" ["run" "-m" "xsd-to-edn.main" "schema"]
"types-edn" ["run" "-m" "xsd-to-edn.main" "types"]
"mapper" ["run" "-m" "nl.surf.eduhub-rio-mapper.main"]
"proof-specs" ["run" "-m" "nl.jomco.proof-specs"
"--verbose"
Expand Down
204 changes: 204 additions & 0 deletions resources/beheren-types.edn
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
{"VrijeTekstType" {:base "xsd:string"},
"URL"
{:base "VrijeTekstType",
:restrictions {:minLength 0, :maxLength 320}},
"InstroomperiodeCohortbegindatum-v01" {:base "DatumType"},
"EisenWerkzaamheden-v01" {:base "WaardenlijstType-v01"},
"IdentificatiecodeBedrijfsdocument-v02"
{:base "IdentificatiecodeType",
:restrictions
{:minLength 1, :maxLength 50, :pattern "[A-Za-z0-9_\\-]{1,50}"}},
"NLQFniveau-v01"
{:base "WaardenlijstType-v01",
:restrictions {:minLength 1, :maxLength 2}},
"Aanmeldingscohorteinddatum-v01" {:base "DatumType"},
"BijzondereInrichtingBo-v01" {:base "WaardenlijstType-v01"},
"HuisnummertoevoegingNEN-v02"
{:base "VrijeTekstType", :restrictions {:minLength 1, :maxLength 6}},
"OpleidingsUUID-v01"
{:base "UUIDType", :restrictions {:minLength 36, :maxLength 36}},
"DatumTijdBedrijfsdocument" {:base "DatumTijdLangType"},
"Cohortbedragsoort-v01"
{:base "WaardenlijstType-v01", :restrictions {:maxLength 30}},
"Aanmeldingscohortbegindatum-v01" {:base "DatumType"},
"LeveringGoedgekeurd" {:base "IndicatorType"},
"OpleidingskenmerkcodePedagogischConcept-v03"
{:base "WaardenlijstType-v01"},
"UitBedrijfdatum-v01" {:base "DatumType"},
"Plaatsnaam-v02"
{:base "Teksttype", :restrictions {:minLength 1, :maxLength 40}},
"OnderwijsbestuurID-v01"
{:base "VrijeTekstType",
:restrictions
{:minLength 7, :maxLength 7, :pattern "(\\d{3}B\\d{3})"}},
"Foutcode"
{:base "IdentificatiecodeType",
:restrictions {:minLength 1, :maxLength 60}},
"InstroomperiodeCohorteinddatum-v01" {:base "DatumType"},
"Sleutelnaam-v01"
{:base "VrijeTekstType", :restrictions {:minLength 1, :maxLength 25}},
"EersteInstroomDatum-v01" {:base "DatumType"},
"IdentificatiecodeType"
{:base "xsd:normalizedString", :restrictions {:minLength 1}},
"DeficientieMogelijk-v01" {:base "WaardenlijstType-v01"},
"WaardeType" {:base "xsd:decimal"},
"Onderwijslocatiegebruikbegindatum-v01" {:base "DatumType"},
"OrganisatorischeEenheidnaam-v01"
{:base "VrijeTekstType", :restrictions {:minLength 1, :maxLength 80}},
"Niveaucode-v06" {:base "WaardenlijstType-v01"},
"InternationaleNaam-v01"
{:base "VrijeTekstType",
:restrictions {:minLength 1, :maxLength 225}},
"Postcode-v02"
{:base "VrijeTekstType",
:restrictions {:maxLength 6, :pattern "([1-9]{1}[0-9]{3}[A-Z]{2})"}},
"EQFniveau-v01"
{:base "WaardenlijstType-v01",
:restrictions {:minLength 1, :maxLength 1}},
"OnderwijslocatiegebruikExterneIdentificatie-v01"
{:base "IdentificatiecodeType", :restrictions {:maxLength 60}},
"PositiefBedrag-v01"
{:base "WaardeType",
:restrictions
{:minInclusive 0, :maxInclusive 1000000, :fractionDigits 2}},
"DatumType" {:base "xsd:date"},
"DatumTijdLangType" {:base "xsd:dateTime"},
"Kwalificatieniveau-v03" {:base "WaardenlijstType-v01"},
"LeertrajectMBO-v02" {:base "WaardenlijstType-v01"},
"ISCED-v01"
{:base "NummerKortType",
:restrictions {:minInclusive 0, :maxInclusive 999}},
"InBedrijfdatum-v01" {:base "DatumType"},
"KenmerkwaardeDatum-v01" {:base "DatumType"},
"OnderwijsaanbiederID-v01"
{:base "VrijeTekstType",
:restrictions
{:minLength 7, :maxLength 7, :pattern "(\\d{3}A\\d{3})"}},
"OpleidingseenheidID-v01"
{:base "Teksttype",
:restrictions
{:minLength 9, :maxLength 9, :pattern "(\\d{4}O\\d{4})"}},
"Opleidingcode"
{:base "VrijeTekstType", :restrictions {:minLength 1, :maxLength 8}},
"PeriodeEinddatum-v01" {:base "DatumType"},
"OmschrijvingBuitenlandsePartner-v01"
{:base "VrijeTekstType", :restrictions {:maxLength 250}},
"OpleidingskenmerkcodeAangebodenVoOpleiding-v01"
{:base "WaardenlijstType-v01"},
"Latitude-v01"
{:base "WaardeType",
:restrictions
{:minInclusive -90, :maxInclusive 90, :fractionDigits 8}},
"NaamOpleidingLang"
{:base "Teksttype", :restrictions {:minLength 1, :maxLength 225}},
"Kenmerknaam-v01"
{:base "VrijeTekstType", :restrictions {:minLength 1, :maxLength 50}},
"Huisnummer-v03"
{:base "NummerType",
:restrictions {:minInclusive 1, :maxInclusive 99999}},
"Studieduureenheid-v01" {:base "WaardenlijstType-v01"},
"Longitude-v01"
{:base "WaardeType",
:restrictions
{:minInclusive -180, :maxInclusive 180, :fractionDigits 8}},
"StudielastZwaarte"
{:base "NummerType", :restrictions {:pattern "\\d{0,5}"}},
"NaamOpleidingKort"
{:base "Teksttype", :restrictions {:minLength 1, :maxLength 40}},
"KenmerkwaardeBoolean-v01" {:base "IndicatorType"},
"Cohortaanvangsdatum-v01" {:base "DatumType"},
"KenmerkwaardeGetal-v01"
{:base "WaardeType", :restrictions {:fractionDigits 6}},
"Cohorttoegangscode-v01"
{:base "IdentificatiecodeType", :restrictions {:maxLength 60}},
"PropedeutischeFase-v01" {:base "WaardenlijstType-v01"},
"AangebodenOpleidingOpleidingsvorm-v01"
{:base "WaardenlijstType-v01"},
"Opleidingsstelsel-v07" {:base "WaardenlijstType-v01"},
"ToestemmingAanmeldingscohortVereist-v01"
{:base "WaardenlijstType-v01"},
"AangebodenOpleidingExterneIdentificatie-v01"
{:base "IdentificatiecodeType", :restrictions {:maxLength 60}},
"Taalcode-v01"
{:base "WaardenlijstType-v01",
:restrictions {:minLength 3, :maxLength 3}},
"VerzendendeInstantie-v02"
{:base "IdentificatiecodeType",
:restrictions {:minLength 1, :maxLength 22}},
"Gemeentenaam"
{:base "VrijeTekstType", :restrictions {:minLength 1, :maxLength 40}},
"OpleidingskenmerkcodeDoelgroeponderwijsSoVso-v02"
{:base "WaardenlijstType-v01"},
"OpleidingExterneIdentificatie-v01"
{:base "IdentificatiecodeType", :restrictions {:maxLength 60}},
"IndicatorType" {:base "xsd:boolean"},
"KenmerkwaardeTekst1000-v01"
{:base "VrijeTekstType",
:restrictions {:minLength 1, :maxLength 1000}},
"OpleidingskenmerkcodeDoorlopendeLeerlijnenBoVo-v01"
{:base "WaardenlijstType-v01"},
"KenmerkwaardeEnumeratiewaarde-v01"
{:base "VrijeTekstType",
:restrictions {:minLength 1, :maxLength 70, :pattern "[A-Z0-9_]*"}},
"InternationaleNaamOpleiding-v02"
{:base "Teksttype", :restrictions {:minLength 1, :maxLength 225}},
"Teksttype" {:base "xsd:token"},
"VersneldHO-v01" {:base "WaardenlijstType-v01"},
"Keuzeruimtesoort-v03" {:base "WaardenlijstType-v01"},
"SCRtoestemmingVerleendTotDeelname-v01"
{:base "WaardenlijstType-v01", :restrictions {:maxLength 30}},
"NummerKortType"
{:base "xsd:short",
:restrictions {:minInclusive -32768, :maxInclusive 32767}},
"WaardenlijstType-v01"
{:base "xsd:token",
:restrictions
{:minLength 1, :maxLength 70, :pattern "[A-Za-z0-9._\\-/ ]*"}},
"OnderwijsaanbiederExterneIdentificatie-v01"
{:base "IdentificatiecodeType", :restrictions {:maxLength 60}},
"Studieduur-v01" {:base "NummerType"},
"Sleutelwaarde-v01"
{:base "VrijeTekstType", :restrictions {:minLength 1, :maxLength 25}},
"OnderwijslocatieID-v01"
{:base "VrijeTekstType",
:restrictions
{:minLength 7, :maxLength 7, :pattern "(\\d{3}X\\d{3})"}},
"UUIDType"
{:base "xsd:normalizedString",
:restrictions
{:pattern
"[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}"}},
"DoelgroeponderwijsBO-v03" {:base "WaardenlijstType-v01"},
"ErkendeOrganisatiecode-v01"
{:base "IdentificatiecodeType",
:restrictions {:minLength 1, :maxLength 22}},
"Opleidingscapaciteit-v01"
{:base "NummerKortType",
:restrictions {:minInclusive 1, :maxInclusive 32000}},
"Omschrijving3000-v01"
{:base "VrijeTekstType", :restrictions {:maxLength 3000}},
"Fouttekst" {:base "VrijeTekstType", :restrictions {:maxLength 200}},
"Toelatingseisen-v01" {:base "WaardenlijstType-v01"},
"SoortWaardedocument-v01" {:base "WaardenlijstType-v01"},
"NummerType" {:base "xsd:integer"},
"ToegankelijkVoor-v01"
{:base "WaardenlijstType-v01", :restrictions {:maxLength 4}},
"OpleidingskenmerkcodeOnderwijsintensiteit-v01"
{:base "WaardenlijstType-v01"},
"Cohortstatus-v01" {:base "WaardenlijstType-v01"},
"StudielastEenheid-v03" {:base "WaardenlijstType-v01"},
"ToestemmingAanmeldingscohorttoelichting-v01"
{:base "VrijeTekstType",
:restrictions {:minLength 1, :maxLength 3000}},
"Studiekeuzecheck-v01" {:base "WaardenlijstType-v01"},
"OpleidingskenmerkcodeDoorlopendeLeerlijnenVmboMbo-v01"
{:base "WaardenlijstType-v01", :restrictions {:maxLength 30}},
"OntvangendeInstantie-v02"
{:base "IdentificatiecodeType",
:restrictions {:minLength 1, :maxLength 22}},
"PeriodeBegindatum-v01" {:base "DatumType"},
"Onderwijslocatiegebruikeinddatum-v01" {:base "DatumType"},
"AangebodenOpleidingscohortidentifcatie-v01"
{:base "IdentificatiecodeType", :restrictions {:maxLength 60}},
"NfoOpleidingscategorie-v01" {:base "WaardenlijstType-v01"},
"HoOpleidingsoort-v01" {:base "WaardenlijstType-v01"}}
28 changes: 23 additions & 5 deletions src/nl/surf/eduhub_rio_mapper/rio/helper.clj
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

(def specifications (edn/read (PushbackReader. (io/reader (io/resource "ooapi-mappings.edn")))))
(def xsd-beheren (edn/read (PushbackReader. (io/reader (io/resource "beheren-schema.edn")))))
(def xsd-types (edn/read (PushbackReader. (io/reader (io/resource "beheren-types.edn")))))

(defn ooapi-mapping [name key]
{:pre [(string? name)]}
Expand Down Expand Up @@ -135,11 +136,28 @@
(log/warnf "Missing type for kenmerk (%s), assuming it's :enum" attr-name)
:enum)))

(defn- process-attribute [attr-name attr-value kenmerk]
(defn truncate [s n]
{:pre [(and (integer? n) (pos? n))]}
(if (string? s)
(subs s 0 (min (count s) n))
s))

(defn- render-name-value [attr-name attr-value type]
(let [type-data (xsd-types type)
max-len (-> type-data :restrictions :maxLength)
;; Both Teksttype and VrijeTekstType seem to be used for free-form text, the kind of text
;; that may be truncated. Types like WaardenlijstType-v01 and IdentificatiecodeType also have
;; max-length restrictions, but for those, we prefer to fail rather than silently truncate.
value (if (and max-len (#{"Teksttype" "VrijeTekstType"} (:base type-data)))
(truncate attr-value max-len)
attr-value)]
[(duoize attr-name) value]))

(defn- process-attribute [attr-name attr-value kenmerk type]
(condp apply [attr-value]
vector?
(->> attr-value
(mapcat #(process-attribute attr-name % kenmerk))
(mapcat #(process-attribute attr-name % kenmerk type))
vec)

map?
Expand All @@ -148,7 +166,7 @@

[(if kenmerk
(kenmerken attr-name (attr-name->kenmerk-type attr-name) attr-value)
[(duoize attr-name) attr-value])]))
(render-name-value attr-name attr-value type))]))

(defn wrapper-periodes-cohorten [rio-obj]
(fn [key]
Expand All @@ -165,11 +183,11 @@

(declare ->xml)

(defn- process-attributes [{:keys [kenmerk name]} rio-obj]
(defn- process-attributes [{:keys [kenmerk name type]} rio-obj]
{:pre [(or (fn? rio-obj)
(map? rio-obj))]}
(when-let [attr-value (rio-obj (keyword name))]
(process-attribute name attr-value kenmerk)))
(process-attribute name attr-value kenmerk type)))

(defn- process-children [child-type rio-obj]
(->> (rio-obj child-type)
Expand Down
18 changes: 9 additions & 9 deletions test/nl/surf/eduhub_rio_mapper/rio_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@
[nl.surf.eduhub-rio-mapper.utils.keystore :as keystore]
[nl.surf.eduhub-rio-mapper.utils.soap :as soap]
[nl.surf.eduhub-rio-mapper.utils.xml-utils :as xml-utils])
(:import clojure.lang.ExceptionInfo
java.io.PushbackReader))
(:import java.io.PushbackReader))

(deftest canonicalization-and-digestion
(let [canonicalizer (fn [id] (str "<wsa:Action "
Expand Down Expand Up @@ -102,16 +101,17 @@
::ooapi/type "course"
:client-id "rio-mapper-dev.jomco.nl"}))

;; eigenNaamInternationaal max 225 chars
(deftest test-and-validate-program-4-invalid
;; eigenNaamInternationaal is over 225 chars, which is > max-length
;; but no exception, since fields gets truncated.
(deftest test-and-validate-program-4-valid
(let [request (test-handler {::ooapi/id "29990000-0000-0000-0000-000000000000"
::ooapi/type "program"
:client-id "rio-mapper-dev.jomco.nl"})]
(is (thrown? ExceptionInfo
(-> request
prep-body
(soap/guard-valid-sexp mutator/validator)))
"guard should throw an exception")))
(is (= :duo:aanleveren_aangebodenOpleiding_request
(first (-> request
prep-body
(soap/guard-valid-sexp mutator/validator))))
"guard throws an exception if XML invalid according to XSD")))

(defn collect-paths
"If leaf-node, add current path (and node if include-leaves is true) to acc.
Expand Down

0 comments on commit d4a71f7

Please sign in to comment.