diff --git a/dev/xsd_to_edn/main.clj b/dev/xsd_to_edn/main.clj index d7ea8dc6..86313723 100644 --- a/dev/xsd_to_edn/main.clj +++ b/dev/xsd_to_edn/main.clj @@ -69,7 +69,7 @@ (if (:ref attrs) (assoc attrs :type (name-to-type (:ref attrs))) attrs)) -(defn -main [& args] +(defn process-xsd [] (let [d (clj-xml/parse-str (subs (slurp "resources/DUO_RIO_Beheren_OnderwijsOrganisatie_V4.xsd") 1)) ; remove BOM with subs name-to-type (reduce (fn [h {:keys [attrs]}] (assoc h (:name attrs) (:type attrs))) @@ -78,13 +78,17 @@ (= "element" (name (:tag %)))) (:content d))) - ;; Parse document, take children, select complexType elements, remove tag names from tuple with tag-name, attributes, children + ;; Parse document, take children, select simpleType elements + st (map rest (filter #(= "simpleType" (first %)) + (last (parse d)))) + + ;; Parse document, take children, select complexType elements ct (map rest (filter #(= "complexType" (first %)) (last (parse d)))) ;; Index by name, remove requests and responses entities (into {} - (filter (fn [[k v]] + (filter (fn [[k _v]] (not (or (str/ends-with? k "_request") (str/ends-with? k "_response")))) (zipmap (map #(:name (first %)) ct) @@ -114,4 +118,30 @@ with-kenmerken (map-hash (fn [[k v]] [k (merge-kenmerken v (result (str "Kenmerkwaardenbereik_" k)))]) result)] - (pprint/pprint (select-keys with-kenmerken interesting-types)))) + {:interesting-types interesting-types + :with-kenmerken with-kenmerken + :simple-types st})) + +;; A constraint looks like: +;; ["maxLength" {:value "60"} nil] +(defn parse-constraint [[name {value :value}]] + {(keyword name) (cond-> value (#{"maxLength" "minLength"} name) Integer/parseInt)}) + +;; A simple-type looks like: +;;({:name "AangebodenOpleidingExterneIdentificatie-v01"} +;; [["restriction" +;; {:base "IdentificatiecodeType"} +;; [["maxLength" {:value "60"} nil]]]]) +(defn- simple-type-reducer [h [tag [[_name attrs constraints]]]] + (let [restrictions (reduce merge {} (map parse-constraint constraints))] + (assoc h + (-> tag :name) + (merge (select-keys attrs [:base]) + (when-not (empty? restrictions) {:restrictions restrictions}))))) + +(defn -main [kind & _args] + (let [{:keys [:interesting-types :with-kenmerken :simple-types]} (process-xsd)] + (pprint/pprint + (case kind + "schema" (select-keys with-kenmerken interesting-types) ; see resources/beheren-schema.edn + "types" (reduce simple-type-reducer {} simple-types))))) ; see resources/beheren-types.edn diff --git a/project.clj b/project.clj index c161d3ee..bcf1f21f 100644 --- a/project.clj +++ b/project.clj @@ -62,7 +62,8 @@ ;; This will regenerate `src/nl/surf/eduhub_rio_mapper/enums.clj` "generate-enums" ["run" "-m" "generate-enums.main"] ;; Simple prepopulated call to the raadplegen section of the rio test api. Spits out response body to STDOUT. - "beheren-edn" ["run" "-m" "xsd-to-edn.main"] + "beheren-edn" ["run" "-m" "xsd-to-edn.main" "schema"] + "types-edn" ["run" "-m" "xsd-to-edn.main" "types"] "mapper" ["run" "-m" "nl.surf.eduhub-rio-mapper.main"] "proof-specs" ["run" "-m" "nl.jomco.proof-specs" "--verbose" diff --git a/resources/beheren-types.edn b/resources/beheren-types.edn new file mode 100644 index 00000000..6e042b48 --- /dev/null +++ b/resources/beheren-types.edn @@ -0,0 +1,204 @@ +{"VrijeTekstType" {:base "xsd:string"}, + "URL" + {:base "VrijeTekstType", + :restrictions {:minLength 0, :maxLength 320}}, + "InstroomperiodeCohortbegindatum-v01" {:base "DatumType"}, + "EisenWerkzaamheden-v01" {:base "WaardenlijstType-v01"}, + "IdentificatiecodeBedrijfsdocument-v02" + {:base "IdentificatiecodeType", + :restrictions + {:minLength 1, :maxLength 50, :pattern "[A-Za-z0-9_\\-]{1,50}"}}, + "NLQFniveau-v01" + {:base "WaardenlijstType-v01", + :restrictions {:minLength 1, :maxLength 2}}, + "Aanmeldingscohorteinddatum-v01" {:base "DatumType"}, + "BijzondereInrichtingBo-v01" {:base "WaardenlijstType-v01"}, + "HuisnummertoevoegingNEN-v02" + {:base "VrijeTekstType", :restrictions {:minLength 1, :maxLength 6}}, + "OpleidingsUUID-v01" + {:base "UUIDType", :restrictions {:minLength 36, :maxLength 36}}, + "DatumTijdBedrijfsdocument" {:base "DatumTijdLangType"}, + "Cohortbedragsoort-v01" + {:base "WaardenlijstType-v01", :restrictions {:maxLength 30}}, + "Aanmeldingscohortbegindatum-v01" {:base "DatumType"}, + "LeveringGoedgekeurd" {:base "IndicatorType"}, + "OpleidingskenmerkcodePedagogischConcept-v03" + {:base "WaardenlijstType-v01"}, + "UitBedrijfdatum-v01" {:base "DatumType"}, + "Plaatsnaam-v02" + {:base "Teksttype", :restrictions {:minLength 1, :maxLength 40}}, + "OnderwijsbestuurID-v01" + {:base "VrijeTekstType", + :restrictions + {:minLength 7, :maxLength 7, :pattern "(\\d{3}B\\d{3})"}}, + "Foutcode" + {:base "IdentificatiecodeType", + :restrictions {:minLength 1, :maxLength 60}}, + "InstroomperiodeCohorteinddatum-v01" {:base "DatumType"}, + "Sleutelnaam-v01" + {:base "VrijeTekstType", :restrictions {:minLength 1, :maxLength 25}}, + "EersteInstroomDatum-v01" {:base "DatumType"}, + "IdentificatiecodeType" + {:base "xsd:normalizedString", :restrictions {:minLength 1}}, + "DeficientieMogelijk-v01" {:base "WaardenlijstType-v01"}, + "WaardeType" {:base "xsd:decimal"}, + "Onderwijslocatiegebruikbegindatum-v01" {:base "DatumType"}, + "OrganisatorischeEenheidnaam-v01" + {:base "VrijeTekstType", :restrictions {:minLength 1, :maxLength 80}}, + "Niveaucode-v06" {:base "WaardenlijstType-v01"}, + "InternationaleNaam-v01" + {:base "VrijeTekstType", + :restrictions {:minLength 1, :maxLength 225}}, + "Postcode-v02" + {:base "VrijeTekstType", + :restrictions {:maxLength 6, :pattern "([1-9]{1}[0-9]{3}[A-Z]{2})"}}, + "EQFniveau-v01" + {:base "WaardenlijstType-v01", + :restrictions {:minLength 1, :maxLength 1}}, + "OnderwijslocatiegebruikExterneIdentificatie-v01" + {:base "IdentificatiecodeType", :restrictions {:maxLength 60}}, + "PositiefBedrag-v01" + {:base "WaardeType", + :restrictions + {:minInclusive 0, :maxInclusive 1000000, :fractionDigits 2}}, + "DatumType" {:base "xsd:date"}, + "DatumTijdLangType" {:base "xsd:dateTime"}, + "Kwalificatieniveau-v03" {:base "WaardenlijstType-v01"}, + "LeertrajectMBO-v02" {:base "WaardenlijstType-v01"}, + "ISCED-v01" + {:base "NummerKortType", + :restrictions {:minInclusive 0, :maxInclusive 999}}, + "InBedrijfdatum-v01" {:base "DatumType"}, + "KenmerkwaardeDatum-v01" {:base "DatumType"}, + "OnderwijsaanbiederID-v01" + {:base "VrijeTekstType", + :restrictions + {:minLength 7, :maxLength 7, :pattern "(\\d{3}A\\d{3})"}}, + "OpleidingseenheidID-v01" + {:base "Teksttype", + :restrictions + {:minLength 9, :maxLength 9, :pattern "(\\d{4}O\\d{4})"}}, + "Opleidingcode" + {:base "VrijeTekstType", :restrictions {:minLength 1, :maxLength 8}}, + "PeriodeEinddatum-v01" {:base "DatumType"}, + "OmschrijvingBuitenlandsePartner-v01" + {:base "VrijeTekstType", :restrictions {:maxLength 250}}, + "OpleidingskenmerkcodeAangebodenVoOpleiding-v01" + {:base "WaardenlijstType-v01"}, + "Latitude-v01" + {:base "WaardeType", + :restrictions + {:minInclusive -90, :maxInclusive 90, :fractionDigits 8}}, + "NaamOpleidingLang" + {:base "Teksttype", :restrictions {:minLength 1, :maxLength 225}}, + "Kenmerknaam-v01" + {:base "VrijeTekstType", :restrictions {:minLength 1, :maxLength 50}}, + "Huisnummer-v03" + {:base "NummerType", + :restrictions {:minInclusive 1, :maxInclusive 99999}}, + "Studieduureenheid-v01" {:base "WaardenlijstType-v01"}, + "Longitude-v01" + {:base "WaardeType", + :restrictions + {:minInclusive -180, :maxInclusive 180, :fractionDigits 8}}, + "StudielastZwaarte" + {:base "NummerType", :restrictions {:pattern "\\d{0,5}"}}, + "NaamOpleidingKort" + {:base "Teksttype", :restrictions {:minLength 1, :maxLength 40}}, + "KenmerkwaardeBoolean-v01" {:base "IndicatorType"}, + "Cohortaanvangsdatum-v01" {:base "DatumType"}, + "KenmerkwaardeGetal-v01" + {:base "WaardeType", :restrictions {:fractionDigits 6}}, + "Cohorttoegangscode-v01" + {:base "IdentificatiecodeType", :restrictions {:maxLength 60}}, + "PropedeutischeFase-v01" {:base "WaardenlijstType-v01"}, + "AangebodenOpleidingOpleidingsvorm-v01" + {:base "WaardenlijstType-v01"}, + "Opleidingsstelsel-v07" {:base "WaardenlijstType-v01"}, + "ToestemmingAanmeldingscohortVereist-v01" + {:base "WaardenlijstType-v01"}, + "AangebodenOpleidingExterneIdentificatie-v01" + {:base "IdentificatiecodeType", :restrictions {:maxLength 60}}, + "Taalcode-v01" + {:base "WaardenlijstType-v01", + :restrictions {:minLength 3, :maxLength 3}}, + "VerzendendeInstantie-v02" + {:base "IdentificatiecodeType", + :restrictions {:minLength 1, :maxLength 22}}, + "Gemeentenaam" + {:base "VrijeTekstType", :restrictions {:minLength 1, :maxLength 40}}, + "OpleidingskenmerkcodeDoelgroeponderwijsSoVso-v02" + {:base "WaardenlijstType-v01"}, + "OpleidingExterneIdentificatie-v01" + {:base "IdentificatiecodeType", :restrictions {:maxLength 60}}, + "IndicatorType" {:base "xsd:boolean"}, + "KenmerkwaardeTekst1000-v01" + {:base "VrijeTekstType", + :restrictions {:minLength 1, :maxLength 1000}}, + "OpleidingskenmerkcodeDoorlopendeLeerlijnenBoVo-v01" + {:base "WaardenlijstType-v01"}, + "KenmerkwaardeEnumeratiewaarde-v01" + {:base "VrijeTekstType", + :restrictions {:minLength 1, :maxLength 70, :pattern "[A-Z0-9_]*"}}, + "InternationaleNaamOpleiding-v02" + {:base "Teksttype", :restrictions {:minLength 1, :maxLength 225}}, + "Teksttype" {:base "xsd:token"}, + "VersneldHO-v01" {:base "WaardenlijstType-v01"}, + "Keuzeruimtesoort-v03" {:base "WaardenlijstType-v01"}, + "SCRtoestemmingVerleendTotDeelname-v01" + {:base "WaardenlijstType-v01", :restrictions {:maxLength 30}}, + "NummerKortType" + {:base "xsd:short", + :restrictions {:minInclusive -32768, :maxInclusive 32767}}, + "WaardenlijstType-v01" + {:base "xsd:token", + :restrictions + {:minLength 1, :maxLength 70, :pattern "[A-Za-z0-9._\\-/ ]*"}}, + "OnderwijsaanbiederExterneIdentificatie-v01" + {:base "IdentificatiecodeType", :restrictions {:maxLength 60}}, + "Studieduur-v01" {:base "NummerType"}, + "Sleutelwaarde-v01" + {:base "VrijeTekstType", :restrictions {:minLength 1, :maxLength 25}}, + "OnderwijslocatieID-v01" + {:base "VrijeTekstType", + :restrictions + {:minLength 7, :maxLength 7, :pattern "(\\d{3}X\\d{3})"}}, + "UUIDType" + {:base "xsd:normalizedString", + :restrictions + {:pattern + "[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}"}}, + "DoelgroeponderwijsBO-v03" {:base "WaardenlijstType-v01"}, + "ErkendeOrganisatiecode-v01" + {:base "IdentificatiecodeType", + :restrictions {:minLength 1, :maxLength 22}}, + "Opleidingscapaciteit-v01" + {:base "NummerKortType", + :restrictions {:minInclusive 1, :maxInclusive 32000}}, + "Omschrijving3000-v01" + {:base "VrijeTekstType", :restrictions {:maxLength 3000}}, + "Fouttekst" {:base "VrijeTekstType", :restrictions {:maxLength 200}}, + "Toelatingseisen-v01" {:base "WaardenlijstType-v01"}, + "SoortWaardedocument-v01" {:base "WaardenlijstType-v01"}, + "NummerType" {:base "xsd:integer"}, + "ToegankelijkVoor-v01" + {:base "WaardenlijstType-v01", :restrictions {:maxLength 4}}, + "OpleidingskenmerkcodeOnderwijsintensiteit-v01" + {:base "WaardenlijstType-v01"}, + "Cohortstatus-v01" {:base "WaardenlijstType-v01"}, + "StudielastEenheid-v03" {:base "WaardenlijstType-v01"}, + "ToestemmingAanmeldingscohorttoelichting-v01" + {:base "VrijeTekstType", + :restrictions {:minLength 1, :maxLength 3000}}, + "Studiekeuzecheck-v01" {:base "WaardenlijstType-v01"}, + "OpleidingskenmerkcodeDoorlopendeLeerlijnenVmboMbo-v01" + {:base "WaardenlijstType-v01", :restrictions {:maxLength 30}}, + "OntvangendeInstantie-v02" + {:base "IdentificatiecodeType", + :restrictions {:minLength 1, :maxLength 22}}, + "PeriodeBegindatum-v01" {:base "DatumType"}, + "Onderwijslocatiegebruikeinddatum-v01" {:base "DatumType"}, + "AangebodenOpleidingscohortidentifcatie-v01" + {:base "IdentificatiecodeType", :restrictions {:maxLength 60}}, + "NfoOpleidingscategorie-v01" {:base "WaardenlijstType-v01"}, + "HoOpleidingsoort-v01" {:base "WaardenlijstType-v01"}} diff --git a/src/nl/surf/eduhub_rio_mapper/rio/helper.clj b/src/nl/surf/eduhub_rio_mapper/rio/helper.clj index 6d33871b..334788ee 100644 --- a/src/nl/surf/eduhub_rio_mapper/rio/helper.clj +++ b/src/nl/surf/eduhub_rio_mapper/rio/helper.clj @@ -25,6 +25,7 @@ (def specifications (edn/read (PushbackReader. (io/reader (io/resource "ooapi-mappings.edn"))))) (def xsd-beheren (edn/read (PushbackReader. (io/reader (io/resource "beheren-schema.edn"))))) +(def xsd-types (edn/read (PushbackReader. (io/reader (io/resource "beheren-types.edn"))))) (defn ooapi-mapping [name key] {:pre [(string? name)]} @@ -135,11 +136,28 @@ (log/warnf "Missing type for kenmerk (%s), assuming it's :enum" attr-name) :enum))) -(defn- process-attribute [attr-name attr-value kenmerk] +(defn truncate [s n] + {:pre [(and (integer? n) (pos? n))]} + (if (string? s) + (subs s 0 (min (count s) n)) + s)) + +(defn- render-name-value [attr-name attr-value type] + (let [type-data (xsd-types type) + max-len (-> type-data :restrictions :maxLength) + ;; Both Teksttype and VrijeTekstType seem to be used for free-form text, the kind of text + ;; that may be truncated. Types like WaardenlijstType-v01 and IdentificatiecodeType also have + ;; max-length restrictions, but for those, we prefer to fail rather than silently truncate. + value (if (and max-len (#{"Teksttype" "VrijeTekstType"} (:base type-data))) + (truncate attr-value max-len) + attr-value)] + [(duoize attr-name) value])) + +(defn- process-attribute [attr-name attr-value kenmerk type] (condp apply [attr-value] vector? (->> attr-value - (mapcat #(process-attribute attr-name % kenmerk)) + (mapcat #(process-attribute attr-name % kenmerk type)) vec) map? @@ -148,7 +166,7 @@ [(if kenmerk (kenmerken attr-name (attr-name->kenmerk-type attr-name) attr-value) - [(duoize attr-name) attr-value])])) + (render-name-value attr-name attr-value type))])) (defn wrapper-periodes-cohorten [rio-obj] (fn [key] @@ -165,11 +183,11 @@ (declare ->xml) -(defn- process-attributes [{:keys [kenmerk name]} rio-obj] +(defn- process-attributes [{:keys [kenmerk name type]} rio-obj] {:pre [(or (fn? rio-obj) (map? rio-obj))]} (when-let [attr-value (rio-obj (keyword name))] - (process-attribute name attr-value kenmerk))) + (process-attribute name attr-value kenmerk type))) (defn- process-children [child-type rio-obj] (->> (rio-obj child-type) diff --git a/test/nl/surf/eduhub_rio_mapper/rio_test.clj b/test/nl/surf/eduhub_rio_mapper/rio_test.clj index f8ce9de0..197c8871 100644 --- a/test/nl/surf/eduhub_rio_mapper/rio_test.clj +++ b/test/nl/surf/eduhub_rio_mapper/rio_test.clj @@ -33,8 +33,7 @@ [nl.surf.eduhub-rio-mapper.utils.keystore :as keystore] [nl.surf.eduhub-rio-mapper.utils.soap :as soap] [nl.surf.eduhub-rio-mapper.utils.xml-utils :as xml-utils]) - (:import clojure.lang.ExceptionInfo - java.io.PushbackReader)) + (:import java.io.PushbackReader)) (deftest canonicalization-and-digestion (let [canonicalizer (fn [id] (str " max-length +;; but no exception, since fields gets truncated. +(deftest test-and-validate-program-4-valid (let [request (test-handler {::ooapi/id "29990000-0000-0000-0000-000000000000" ::ooapi/type "program" :client-id "rio-mapper-dev.jomco.nl"})] - (is (thrown? ExceptionInfo - (-> request - prep-body - (soap/guard-valid-sexp mutator/validator))) - "guard should throw an exception"))) + (is (= :duo:aanleveren_aangebodenOpleiding_request + (first (-> request + prep-body + (soap/guard-valid-sexp mutator/validator)))) + "guard throws an exception if XML invalid according to XSD"))) (defn collect-paths "If leaf-node, add current path (and node if include-leaves is true) to acc.