Skip to content

Commit

Permalink
Mise-à-jour des fixtures Validata en préparation de changements cassa…
Browse files Browse the repository at this point in the history
…nts (#4141)

Co-authored-by: Antoine Augusti <[email protected]>
  • Loading branch information
pierrecamilleri and AntoineAugusti authored Feb 17, 2025
1 parent 1399510 commit e616e02
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 69 deletions.
51 changes: 27 additions & 24 deletions apps/shared/lib/validation/tableschema_validator.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ defmodule Shared.Validation.TableSchemaValidator.Wrapper do
"""
defp impl, do: Application.get_env(:transport, :tableschema_validator_impl)

@callback validate(binary(), binary()) :: map() | nil
@callback validate(binary(), binary(), binary()) :: map() | nil
@callback validate(binary(), binary()) :: map() | :source_error | nil
@callback validate(binary(), binary(), binary()) :: map() | :source_error | nil
def validate(schema_name, url), do: impl().validate(schema_name, url)
def validate(schema_name, url, schema_version), do: impl().validate(schema_name, url, schema_version)

Expand All @@ -23,11 +23,13 @@ defmodule Shared.Validation.TableSchemaValidator do
"""
import Transport.Shared.Schemas
@behaviour Shared.Validation.TableSchemaValidator.Wrapper

@timeout 180_000
@max_nb_errors 100
@validata_web_url URI.parse("https://validata.fr/table-schema")
@validata_api_url URI.parse("https://api.validata.etalab.studio/validate")
# https://git.opendatafrance.net/validata/validata-core/-/blob/75ee5258010fc43b6a164122eff2579c2adc01a7/validata_core/helpers.py#L152
@structure_tags ["#head", "#structure"]
# https://gitlab.com/validata-table/validata-table/-/blob/main/src/validata_core/domain/helpers.py#L57
@structure_tags MapSet.new(["#structure", "#header"])

@impl true
def validate(schema_name, url, schema_version \\ "latest") when is_binary(schema_name) and is_binary(url) do
Expand Down Expand Up @@ -70,42 +72,43 @@ defmodule Shared.Validation.TableSchemaValidator do
|> URI.to_string()
end

defp build_report(
%{"report" => %{"tasks" => tasks}, "_meta" => %{"validata-table-version" => validata_version}} = payload
) do
if Enum.count(tasks) != 1 do
raise "tasks should have a length of 1 for response #{payload}"
end

raw_errors = hd(tasks)["errors"]
# We count the errors on our side, because the error count given by the report can be wrong
# see https://git.opendatafrance.net/validata/validata-core/-/issues/37
nb_errors = Enum.count(raw_errors)

{row_errors, structure_errors} =
raw_errors |> Enum.split_with(&MapSet.disjoint?(MapSet.new(&1["tags"]), MapSet.new(@structure_tags)))
defp build_report(%{
"report" => %{"valid" => valid, "stats" => %{"errors" => nb_errors}, "errors" => errors},
"version" => validata_version
}) do
{structure_errors, row_errors} = Enum.split_with(errors, &structure_error?/1)

structure_errors = structure_errors |> Enum.map(&~s(#{&1["name"]} : #{&1["message"]}))
structure_errors = Enum.map(structure_errors, & &1["message"])

row_errors =
row_errors
|> Enum.map(fn row ->
~s(#{row["name"]} : colonne #{row["fieldName"]}, ligne #{row["rowPosition"]}. #{row["message"]})
Enum.map(row_errors, fn row ->
~s(#{row["message"]} Colonne `#{row["fieldName"]}`, ligne #{row["rowNumber"]}.)
end)

errors = (structure_errors ++ row_errors) |> Enum.take(100)
errors = (structure_errors ++ row_errors) |> Enum.take(@max_nb_errors)

%{
"has_errors" => nb_errors > 0,
"has_errors" => not valid,
"errors_count" => nb_errors,
"errors" => errors,
"validator" => __MODULE__,
"validata_api_version" => validata_version
}
end

# When the remote file cannot be loaded/is a 404
defp build_report(%{"error" => %{"type" => "source-error"}}), do: :source_error

defp build_report(_), do: nil

defp structure_error?(%{"tags" => tags, "type" => type} = _row) do
has_structure_tags = not MapSet.disjoint?(MapSet.new(tags), @structure_tags)
# May not need to rely on error type in the future.
# https://gitlab.com/validata-table/validata-table/-/issues/154
eligible_error_type = type in ["check-error"]
has_structure_tags or eligible_error_type
end

defp ensure_schema_is_tableschema!(schema_name) do
unless Enum.member?(tableschema_names(), schema_name) do
raise "#{schema_name} is not a tableschema"
Expand Down
1 change: 1 addition & 0 deletions apps/shared/test/fixtures/validata_source_error.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"schema": "https://raw.githubusercontent.com/etalab/lieux-covoiturage/refs/heads/master/schema.json","url": "https://example.com/file","options": {"ignore_header_case": "false"},"error": {"message": "The data source could not be successfully loaded: HTTPConnectionPool(host='example.com', port=443): Max retries exceeded with url: file (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7f7dd9a6a910>, 'Connection to example.com timed out. (connect timeout=10)'))","type": "source-error"}}

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion apps/shared/test/fixtures/validata_with_errors.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion apps/shared/test/fixtures/validata_with_file_error.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"_meta":{"args":{"schema":"https:\/\/schema.data.gouv.fr\/schemas\/etalab\/schema-irve\/latest\/schema.json","url":"https:\/\/www.data.gouv.fr\/fr\/datasets\/r\/099eb6ff-bcf4-42be-bda7-61dfe1ca4c9f"},"validata-table-version":"0.6.1","validata-core-version":"0.8.3"},"error":{"message":"impossible de lire le contenu","name":"source-error"}}
{"schema":"https://schema.data.gouv.fr/schemas/etalab/schema-irve/latest/schema.json","url":"https://www.data.gouv.fr/fr/datasets/r/099eb6ff-bcf4-42be-bda7-61dfe1ca4c9f","options":{"ignore_header_case":"false"},"error":{"message":"JSON non valide ; Expecting value: line 1 column 1 (char 0)","type":"json-format-error"}}
2 changes: 1 addition & 1 deletion apps/shared/test/fixtures/validata_with_no_errors.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"_meta":{"args":{"schema":"https:\/\/schema.data.gouv.fr\/schemas\/etalab\/schema-lieux-covoiturage\/0.2.4\/schema.json","url":"https:\/\/gist.githubusercontent.com\/AntoineAugusti\/59889a3e82ecfcdcc40cddaf8bfbd8cc\/raw\/4cd50116e4b8232be0a86136f57153992f737306\/covoit_invalide.csv"},"validata-table-version":"0.6.1","validata-core-version":"0.8.4"},"report":{"date":"2022-04-28T13:08:04.054599+00:00","errors":[],"stats":{"errors":1,"tasks":1},"tasks":[{"errors":[{"cell":"lundi \u00e0 dimanche","cells":["76217-C-001","None","Gare SNCF de Dieppe","2 Boulevard Georges Clemenceau","Dieppe","76217","Parking","2019-06-25","True","217602176","1.081183","49.921823","20","2","None","lundi \u00e0 dimanche","Ville de Dieppe","True","Correspondance avec la ligne TER Dieppe-Rouen"],"code":"opening-hours-value","description":"","fieldName":"horaires","fieldNumber":16,"fieldPosition":16,"message":"La valeur 'lundi \u00e0 dimanche' n'est pas une d\u00e9finition d'horaire d'ouverture correcte.\n\n Celle-ci doit respecter la sp\u00e9cification [OpenStreetMap](https:\/\/wiki.openstreetmap.org\/wiki\/Key:opening_hours) de description d'horaires d'ouverture.","name":"Horaires d'ouverture incorrects","note":"","rowNumber":1,"rowPosition":2,"tags":["#body"]}],"partial":false,"resource":{"data":[["id_lieu","id_local","nom_lieu","ad_lieu","com_lieu","insee","type","date_maj","ouvert","source","Xlong","Ylat","nbre_pl","nbre_pmr","duree","horaires","proprio","lumiere","comm"],["76217-C-001","","Gare SNCF de Dieppe","2 Boulevard Georges Clemenceau","Dieppe","76217","Parking","2019-06-25","true","217602176","1.081183","49.921823","20","2","","lundi \u00e0 dimanche","Ville de Dieppe","true","Correspondance avec la ligne TER Dieppe-Rouen"]],"format":"inline","hashing":"md5","layout":{"limitRows":100000},"name":"memory","profile":"tabular-data-resource","schema":{"$schema":"https:\/\/frictionlessdata.io\/schemas\/table-schema.json","author":"Antoine Augusti pour Etalab","contact":"[email protected]","contributors":[{"email":"[email protected]","organisation":"transport.data.gouv.fr","role":"contributor","title":"Miryad Ali et Francis Chabouis"},{"email":"[email protected]","organisation":"transport.data.gouv.fr","role":"contributor","title":"Ishan Bhojwani"},{"email":"[email protected]","organisation":"Etalab","role":"contributor","title":"Antoine Augusti"},{"email":"[email protected] ","organisation":"Open Data France","role":"contributor","title":"Lo\u00efc Hay"},{"email":"[email protected]","organisation":"Agence d'Am\u00e9nagement et d'Urbanisme de Corse","role":"contributor","title":"Sophie Raspail"},{"email":"","organisation":"","role":"contributor","title":"Tristan Roussel"},{"email":"[email protected]","organisation":"Jailbreak","role":"contributor","title":"Johan Richer"}],"countryCode":"FR","created":"2019-06-25","custom_checks":[{"name":"french-siren-value","params":{"column":"source"}},{"name":"opening-hours-value","params":{"column":"horaires"}}],"description":"Sp\u00e9cification des lieux permettant le covoiturage","example":"https:\/\/github.com\/etalab\/schema-lieux-covoiturage\/raw\/v0.2.4\/exemple-valide.csv","fields":[{"constraints":{"pattern":"^([013-9]\\d|2[AB1-9])\\d{3}-C-\\d{3}$","required":true},"description":"Identifiant du lieu de covoiturage, d\u00e9livr\u00e9 par le point d'acc\u00e8s national selon la r\u00e8gle INSEE-C-XXX o\u00f9 INSEE est le code INSEE de la commune et XXX est le num\u00e9ro d\u2019ordre d'arriv\u00e9e dans la base sur 3 chiffres, commen\u00e7ant par 001","example":"35238-C-001 pour la premi\u00e8re aire r\u00e9f\u00e9renc\u00e9e dans la commune de code INSEE 35238","name":"id_lieu","type":"string"},{"constraints":{"required":false},"description":"Identifiant du lieu de covoiturage fix\u00e9 par le producteur de la donn\u00e9e pour son propre usage","example":"23X01","name":"id_local","type":"string"},{"constraints":{"required":true},"description":"Le nom du lieu de covoiturage. Recommandation : inutile de r\u00e9p\u00e9ter la nature du type de covoiturage","example":"Les Romains","name":"nom_lieu","type":"string"},{"constraints":{"required":false},"description":"L'adresse du lieu compr\u00e9hensible par le grand public pour assurer la coordination entre le passager et le conducteur. Exemple : \"3, rue de la gare\" ; pour les lieux proches des sorties d'autoroute ou de nationale : \"A11 sortie 7 Le Mans Nord\" ; pour les zones rurales sans adresse : \"croisement de route 1 - route 2\" ou \"le long de route X apr\u00e8s le passage \u00e0 niveau\"","example":"3, rue de la Gare","name":"ad_lieu","type":"string"},{"constraints":{"required":false},"description":"La commune \/ le lieu-dit du covoiturage","example":"Rouen","name":"com_lieu","type":"string"},{"constraints":{"pattern":"^([013-9]\\d|2[AB1-9])\\d{3}$","required":true},"description":"Le code INSEE de la commune d'implantation","example":"76540","name":"insee","type":"string"},{"constraints":{"enum":["Aire de covoiturage","Sortie d'autoroute","Parking","Supermarch\u00e9","Parking relais","D\u00e9laiss\u00e9 routier","Auto-stop"],"required":true},"description":"Le type de lieu de covoiturage","example":"Parking","name":"type","type":"string"},{"constraints":{"required":true},"description":"Date de derni\u00e8re mise \u00e0 jour des donn\u00e9es. Notation ISO 8601, format AAAA-MM-DD","example":"2016-10-31","format":"%Y-%m-%d","name":"date_maj","type":"date"},{"constraints":{"required":true},"description":"Le lieu est il actuellement accessible (actif ou inactif)","example":"true","name":"ouvert","type":"boolean"},{"constraints":{"pattern":"^\\d{9}$","required":false},"description":"SIREN de l'entit\u00e9 ayant fourni la donn\u00e9e","example":"225300011","name":"source","type":"string"},{"constraints":{"maximum":180,"minimum":-180,"required":true},"description":"La longitude en degr\u00e9s d\u00e9cimaux (point comme s\u00e9parateur d\u00e9cimal, avec au moins 4 chiffres apr\u00e8s le point d\u00e9cimal) de la localisation de l\u2019entr\u00e9e du lieu de covoiturage exprim\u00e9e dans le syst\u00e8me de coordonn\u00e9es WGS84","example":"1.452323","name":"Xlong","type":"number"},{"constraints":{"maximum":90,"minimum":-90,"required":true},"description":"La latitude en degr\u00e9s d\u00e9cimaux (point comme s\u00e9parateur d\u00e9cimal, avec au moins 4 chiffres apr\u00e8s le point d\u00e9cimal) de la localisation de l\u2019entr\u00e9e du lieu de covoiturage exprim\u00e9e dans le syst\u00e8me de coordonn\u00e9es WGS84","example":"46.59698","name":"Ylat","type":"number"},{"constraints":{"minimum":0,"required":false},"description":"Le nombre de places r\u00e9serv\u00e9es au stationnement disponibles","example":"42","name":"nbre_pl","type":"integer"},{"constraints":{"minimum":0,"required":false},"description":"Le nombre de places PMR disponibles","example":"3","name":"nbre_pmr","type":"integer"},{"constraints":{"minimum":0,"required":false},"description":"S'il existe une restriction sur la dur\u00e9e de stationnement autoris\u00e9e, la dur\u00e9e maximale de stationnement autoris\u00e9e exprim\u00e9e en minutes","example":"60","name":"duree","type":"integer"},{"constraints":{"required":false},"description":"Ce champ permet de renseigner, si l'information est connue, les jours et horaires d'ouverture de l'\u00e9quipement","example":"Mo-Fr 08:00-20:00","name":"horaires","type":"string"},{"constraints":{"required":false},"description":"Le nom de l'am\u00e9nageur, c'est-\u00e0-dire de l'entit\u00e9 publique ou priv\u00e9e propri\u00e9taire des infrastructures","example":"D\u00e9partement","name":"proprio","type":"string"},{"constraints":{"required":false},"description":"Un \u00e9clairage nocturne est-il pr\u00e9sent","example":false,"name":"lumiere","type":"boolean"},{"constraints":{"required":false},"description":"Commentaires \u00e9ventuels sur les commodit\u00e9s mises \u00e0 disposition du grand public comme : le num\u00e9ro de t\u00e9l\u00e9phone unique qui indique les services disponibles au moment de l'arriv\u00e9e sur l'aire pour r\u00e9aliser le dernier kilom\u00e8tre ; la pr\u00e9sence de prises 220V ou USB ; acc\u00e8s \u00e0 du r\u00e9seau (t\u00e9l\u00e9com, WiFi) ; sanitaires ; intermodalit\u00e9 en transports","example":"Pr\u00e9sence de sanitaires et acc\u00e8s \u00e0 de l'eau courante","name":"comm","type":"string"}],"homepage":"https:\/\/github.com\/etalab\/schema-lieux-covoiturage","keywords":["covoiturage","transport","mobilit\u00e9"],"licenses":[{"name":"etalab-2.0","path":"https:\/\/www.etalab.gouv.fr\/licence-ouverte-open-licence","title":"Licence Ouverte"}],"missingValues":[""],"primaryKey":"id_lieu","resources":[{"name":"exemple-valide","path":"https:\/\/github.com\/etalab\/schema-lieux-covoiturage\/raw\/v0.2.2\/exemple-valide.csv","title":"Ressource valide"},{"name":"exemple-invalide","path":"https:\/\/github.com\/etalab\/schema-lieux-covoiturage\/raw\/v0.2.2\/exemple-invalide.csv","title":"Ressource invalide"}],"title":"Lieux de covoiturage","updated":"2022-04-13","uri":"https:\/\/github.com\/etalab\/schema-lieux-covoiturage\/raw\/v0.2.4\/schema.json","version":"0.2.4"},"scheme":"","stats":{"bytes":0,"fields":19,"hash":"","rows":1}},"scope":["hash-count-error","byte-count-error","field-count-error","row-count-error","blank-header","extra-label","missing-label","blank-label","duplicate-label","incorrect-label","blank-row","primary-key-error","foreign-key-error","extra-cell","missing-cell","type-error","constraint-error","unique-error"],"stats":{"errors":1},"structure_warnings":[],"time":0.026,"valid":false}],"time":0.026,"valid":false,"version":"4.18.2"}}
{"schema":"https://schema.data.gouv.fr/schemas/etalab/schema-lieux-covoiturage/0.2.4/schema.json","url":"https://gist.githubusercontent.com/AntoineAugusti/59889a3e82ecfcdcc40cddaf8bfbd8cc/raw/4cd50116e4b8232be0a86136f57153992f737306/covoit_invalide.csv","options":{"ignore_header_case":"false"},"date":"2024-08-23T12:02:37.487948+00:00","version":"0.12.0","report":{"valid":false,"stats":{"errors":1,"warnings":0,"seconds":0.04,"fields":19,"rows":1,"rows_processed":1},"warnings":[],"errors":[{"message":"La valeur 'lundi à dimanche' n'est pas une définition d'horaire d'ouverture correcte.\n\n Celle-ci doit respecter la spécification [OpenStreetMap](https://wiki.openstreetmap.org/wiki/Key:opening_hours) de description d'horaires d'ouverture.","type":"opening-hours-value","tags":["#body"],"rowNumber":2,"fieldName":"horaires","fieldNumber":16,"cell":"lundi à dimanche"}]}}
Loading

0 comments on commit e616e02

Please sign in to comment.