diff --git a/api/src/pdc/providers/datagouv/DataGouvAPIProvider.ts b/api/src/pdc/providers/datagouv/DataGouvAPIProvider.ts new file mode 100644 index 0000000000..69ba6fff63 --- /dev/null +++ b/api/src/pdc/providers/datagouv/DataGouvAPIProvider.ts @@ -0,0 +1,159 @@ +import { ConfigInterfaceResolver, NotFoundException, provider } from "@/ilos/common/index.ts"; +import { readFile } from "@/lib/file/index.ts"; +import { logger } from "@/lib/logger/index.ts"; +import { basename } from "@/lib/path/index.ts"; +import { Dataset, Metadata, Resource } from "@/pdc/providers/datagouv/DataGouvAPITypes.ts"; +import { DataGouvAPIConfig } from "@/pdc/services/export/config/datagouv.ts"; + +@provider() +export class DataGouvAPIProvider { + protected _dataset: Dataset | null = null; + protected _resource: Resource | null = null; + protected config: DataGouvAPIConfig; + + constructor(protected configStore: ConfigInterfaceResolver) { + this.config = configStore.get("datagouv.api"); + } + + // ------------------------------------------------------------------------------------------------------------------- + // PUBLIC API + // ------------------------------------------------------------------------------------------------------------------- + + /** + * Get all dataset metadata + * + * Includes organisations and the list of resources + */ + public async dataset(): Promise { + if (this._dataset) { + return this._dataset; + } + + const dataset = await this.get(`datasets/${this.config.dataset}`); + if (!dataset) { + throw new NotFoundException(`Dataset not found: ${this.config.dataset}`); + } + + this._dataset = dataset; + + return this._dataset; + } + + /** + * Get a specific resource from the dataset by title. + * + * Defaults to the latest resource if no title is provided. + * + * @param title + * @returns + */ + public async resource(title: string | null = null): Promise { + const dataset = await this.dataset(); + const resource = title + ? dataset.resources.find((r) => r.title === title) + : dataset.resources.find((r) => r.latest.includes(r.id)); + + if (!resource) { + throw new NotFoundException(`Resource not found for dataset ${dataset.id}`); + } + + this._resource = resource; + return this._resource; + } + + public async exists(title: string): Promise { + try { + await this.resource(title); + return true; + } catch { + return false; + } + } + + public async upload(filepath: string): Promise { + const title = basename(filepath); + let url = `datasets/${this.config.dataset}/upload/`; + + if (await this.exists(title)) { + logger.info(`Resource ${title} already exists, replacing...`); + url = `datasets/${this.config.dataset}/resources/${this._resource!.id}/upload/`; + } + + const form = new FormData(); + const file = new File([await readFile(filepath)], title); + form.append("file", file); + + const resource = await this.post(url, form); + if (!resource) { + throw new Error(`Failed to upload resource for dataset ${this.config.dataset}`); + } + + this._resource = resource; + + return resource; + } + + public async setMetadata(resource: Resource, metadata: Metadata): Promise { + const r = await this.put( + `datasets/${this.config.dataset}/resources/${resource.id}`, + JSON.stringify({ + title: resource.title, + ...metadata, + }), + ); + + if (!r) { + throw new Error(`Failed to update resource ${resource.id} metadata`); + } + + this._resource = r; + + return this._resource; + } + + // ------------------------------------------------------------------------------------------------------------------- + // PRIVATE REQUEST HELPERS + // ------------------------------------------------------------------------------------------------------------------- + + protected async get(url: string): Promise { + return this._query(url); + } + + protected async post(url: string, body: BodyInit): Promise { + return this._query(url, "POST", body); + } + + protected async put(url: string, body: BodyInit): Promise { + return this._query(url, "PUT", body); + } + + // ------------------------------------------------------------------------------------------------------------------- + // INTERNALS + // ------------------------------------------------------------------------------------------------------------------- + + protected async _query( + url: string, + method: "GET" | "POST" | "PUT" = "GET", + body: BodyInit | null = null, + ): Promise { + const baseURL = this.config.url; + const headers: HeadersInit = { + "Accept": "application/json", + "Content-Type": "application/json", + "X-API-KEY": this.config.key, + }; + + const init: RequestInit = { method, headers }; + if (body) { + init.body = body; + if (body instanceof FormData) delete headers["Content-Type"]; + } + + const response = await fetch(`${baseURL}/${url}`, init); + if (!response.ok) { + throw Error(response.statusText); + } + + return response.json() as T; + } +} diff --git a/api/src/pdc/providers/datagouv/DataGouvAPITypes.ts b/api/src/pdc/providers/datagouv/DataGouvAPITypes.ts new file mode 100644 index 0000000000..b05874ba38 --- /dev/null +++ b/api/src/pdc/providers/datagouv/DataGouvAPITypes.ts @@ -0,0 +1,116 @@ +export type Organisation = { + acronym: string; + badges: { kind: string }[]; + class: string; + id: string; + logo: string; + logo_thumbnail: string; + name: string; + page: string; + slug: string; + uri: string; +}; + +export type Dataset = { + acronym: string; + archived: null | string; + badges: { kind: string }[]; + contact_point: null | string; + created_at: string; + deleted: null | string; + description: string; + extras: { + "recommendations-externals": { + id: string; + messages: { en: object; fr: object }; + score: number; + source: string; + }[]; + "recommendations:sources": string[]; + }; + frequency: string; + frequency_date: string; + harvest: null | string; + id: string; + internal: { + created_at_internal: string; + last_modified_internal: string; + }; + last_modified: string; + last_update: string; + license: string; + metrics: { + discussions: number; + followers: number; + resources_downloads: number; + reuses: number; + views: number; + }; + organization: Organisation; + owner: null | string; + page: string; + private: boolean; + quality: { + all_resources_available: boolean; + dataset_description_quality: boolean; + has_open_format: boolean; + has_resources: boolean; + license: boolean; + resources_documentation: boolean; + score: number; + spatial: boolean; + temporal_coverage: boolean; + update_frequency: boolean; + update_fulfilled_in_time: boolean; + }; + resources: Resource[]; + schema: null | string; + slug: string; + spatial: { + geom: null | string; + granularity: string; + zones: string[]; + }; + tags: string[]; + temporal_coverage: { end: string; start: string }; + title: string; + uri: string; +}; + +export type Resource = { + checksum: { + type: "sha1"; + value: string; + }; + created_at: string; + description: null | string; + extras: { + "check:available": boolean; + "check:date": string; + "check:headers:content-type": string; + "check:status": number; + "check:timeout": boolean; + }; + filesize: number; + filetype: string; + format: string; + harvest: null | string; + id: string; + internal: { + created_at_internal: string; + last_modified_internal: string; + }; + last_modified: string; + latest: string; + metrics: Record; + mime: string; + preview_url: null | string; + schema: { name: null | string; url: null | string; version: null | string }; + title: string; + type: string; + url: string; +}; + +export type Metadata = { + description: string; +}; diff --git a/api/src/pdc/providers/datagouv/DataGouvMetadataProvider.ts b/api/src/pdc/providers/datagouv/DataGouvMetadataProvider.ts new file mode 100644 index 0000000000..2f51e7f481 --- /dev/null +++ b/api/src/pdc/providers/datagouv/DataGouvMetadataProvider.ts @@ -0,0 +1,16 @@ +import { provider } from "@/ilos/common/Decorators.ts"; +import { ConfigInterfaceResolver } from "@/ilos/common/index.ts"; +import { DataGouvAPIConfig } from "@/pdc/services/export/config/datagouv.ts"; + +@provider() +export class DataGouvMetadataProvider { + protected config: DataGouvAPIConfig; + + constructor(configStore: ConfigInterfaceResolver) { + this.config = configStore.get("datagouv.api"); + } + + description(): string { + return new Date().toISOString(); + } +} diff --git a/api/src/pdc/services/export/ExportServiceProvider.ts b/api/src/pdc/services/export/ExportServiceProvider.ts index b4573fed8f..6707b714f1 100644 --- a/api/src/pdc/services/export/ExportServiceProvider.ts +++ b/api/src/pdc/services/export/ExportServiceProvider.ts @@ -2,6 +2,8 @@ import { CommandExtension } from "@/ilos/cli/index.ts"; import { ExtensionInterface, NewableType, serviceProvider } from "@/ilos/common/index.ts"; import { ServiceProvider as AbstractServiceProvider } from "@/ilos/core/index.ts"; import { DefaultTimezoneMiddleware } from "@/pdc/middlewares/DefaultTimezoneMiddleware.ts"; +import { DataGouvAPIProvider } from "@/pdc/providers/datagouv/DataGouvAPIProvider.ts"; +import { DataGouvMetadataProvider } from "@/pdc/providers/datagouv/DataGouvMetadataProvider.ts"; import { defaultMiddlewareBindings } from "@/pdc/providers/middleware/index.ts"; import { S3StorageProvider } from "@/pdc/providers/storage/index.ts"; import { ValidatorExtension, ValidatorMiddleware } from "@/pdc/providers/validator/index.ts"; @@ -51,7 +53,7 @@ const repositories = [ ]; // External providers are from the @pdc namespace -const externalProviders = [S3StorageProvider]; +const externalProviders = [S3StorageProvider, DataGouvAPIProvider, DataGouvMetadataProvider]; // Commands are from the ./commands folder // and are used to implement the CLI commands. diff --git a/api/src/pdc/services/export/commands/DataGouvCommand.ts b/api/src/pdc/services/export/commands/DataGouvCommand.ts index 0afd219e3f..4e38a42982 100644 --- a/api/src/pdc/services/export/commands/DataGouvCommand.ts +++ b/api/src/pdc/services/export/commands/DataGouvCommand.ts @@ -1,14 +1,16 @@ import { coerceDate } from "@/ilos/cli/index.ts"; -import { command, CommandInterface } from "@/ilos/common/index.ts"; +import { command, CommandInterface, ConfigInterfaceResolver } from "@/ilos/common/index.ts"; import { logger } from "@/lib/logger/index.ts"; import { today, toTzString } from "@/pdc/helpers/dates.helper.ts"; +import { DataGouvAPIProvider } from "@/pdc/providers/datagouv/DataGouvAPIProvider.ts"; +import { DataGouvMetadataProvider } from "@/pdc/providers/datagouv/DataGouvMetadataProvider.ts"; import { Timezone } from "@/pdc/providers/validator/index.ts"; import { CSVWriter } from "@/pdc/services/export/models/CSVWriter.ts"; +import { ExportTarget } from "@/pdc/services/export/models/Export.ts"; import { ExportParams } from "@/pdc/services/export/models/ExportParams.ts"; import { CarpoolOpenDataListType } from "@/pdc/services/export/repositories/queries/CarpoolOpenDataQuery.ts"; import { NotificationService } from "@/pdc/services/export/services/NotificationService.ts"; import { StorageService } from "@/pdc/services/export/services/StorageService.ts"; -import { ExportTarget } from "../models/Export.ts"; import { ExportRepositoryInterfaceResolver } from "../repositories/ExportRepository.ts"; import { FieldServiceInterfaceResolver } from "../services/FieldService.ts"; import { LogServiceInterfaceResolver } from "../services/LogService.ts"; @@ -51,6 +53,9 @@ function defaultDate(offset = 0): Date { }) export class DataGouvCommand implements CommandInterface { constructor( + protected config: ConfigInterfaceResolver, + protected api: DataGouvAPIProvider, + protected metadata: DataGouvMetadataProvider, protected exportRepository: ExportRepositoryInterfaceResolver, protected fileCreatorService: OpenDataFileCreatorServiceInterfaceResolver, protected fieldService: FieldServiceInterfaceResolver, @@ -61,6 +66,11 @@ export class DataGouvCommand implements CommandInterface { ) {} public async call(options: Options): Promise { + if (this.config.get("datagouv.api.enabled") === false) { + logger.warn("DataGouv Export is DISABLED"); + return; + } + // 1. Get and configure parameters // 2. Export the file // 3. FileCreator @@ -78,14 +88,21 @@ export class DataGouvCommand implements CommandInterface { const s = toTzString(params.get().start_at, "Europe/Paris", "yyyy-MM"); const e = toTzString(params.get().end_at, "Europe/Paris", "yyyy-MM"); - logger.info(`Exporting ${filename} from ${s} to ${e}`); + logger.info(`Exporting ${filename} from ${s}-01 to ${e}-01`); - await this.fileCreatorService.write( + const path = await this.fileCreatorService.write( params, new CSVWriter(filename, { tz: options.tz, compress: false, fields }), ); + // TODO : calculate stats to build metadata + // upload to storage + const dataset = await this.api.dataset(); + const resource = await this.api.upload(path); + await this.api.setMetadata(resource, { description: this.metadata.description() }); + + logger.info(`Resource uploaded to ${dataset.page}`); } } diff --git a/api/src/pdc/services/export/config/datagouv.ts b/api/src/pdc/services/export/config/datagouv.ts index ae1c2cfcd9..200bf8c397 100644 --- a/api/src/pdc/services/export/config/datagouv.ts +++ b/api/src/pdc/services/export/config/datagouv.ts @@ -2,12 +2,22 @@ import { env } from "@/lib/env/index.ts"; import { FieldFilter, Fields } from "@/pdc/services/export/models/CSVWriter.ts"; import { CarpoolOpenDataListType } from "@/pdc/services/export/repositories/queries/CarpoolOpenDataQuery.ts"; -export const datagouv = { +export type DataGouvAPIConfig = { + enabled: boolean; + notify: boolean; + contact: string | null; + key: string; + url: string; + dataset: string; +}; + +export const api = { enabled: env("APP_DATAGOUV_ENABLED") === "true", notify: env("APP_DATAGOUV_NOTIFY") === "true", contact: env("APP_DATAGOUV_CONTACT") || null, - key: env("APP_DATAGOUV_KEY") || null, + key: env("APP_DATAGOUV_KEY"), url: env("APP_DATAGOUV_URL") || "https://api.gouv.fr/api/1/datasets/", + dataset: env("APP_DATAGOUV_DATASET"), }; /**