Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upload data.gouv.fr #2771

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 159 additions & 0 deletions api/src/pdc/providers/datagouv/DataGouvAPIProvider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import { ConfigInterfaceResolver, NotFoundException, provider } from "@/ilos/common/index.ts";
import { readFile } from "@/lib/file/index.ts";
import { logger } from "@/lib/logger/index.ts";
import { basename } from "@/lib/path/index.ts";
import { Dataset, Metadata, Resource } from "@/pdc/providers/datagouv/DataGouvAPITypes.ts";
import { DataGouvAPIConfig } from "@/pdc/services/export/config/datagouv.ts";

@provider()
export class DataGouvAPIProvider {
protected _dataset: Dataset | null = null;
protected _resource: Resource | null = null;
protected config: DataGouvAPIConfig;

constructor(protected configStore: ConfigInterfaceResolver) {
this.config = configStore.get("datagouv.api");
}

// -------------------------------------------------------------------------------------------------------------------
// PUBLIC API
// -------------------------------------------------------------------------------------------------------------------

/**
* Get all dataset metadata
*
* Includes organisations and the list of resources
*/
public async dataset(): Promise<Dataset> {
if (this._dataset) {
return this._dataset;
}

const dataset = await this.get<Dataset>(`datasets/${this.config.dataset}`);
if (!dataset) {
throw new NotFoundException(`Dataset not found: ${this.config.dataset}`);
}

this._dataset = dataset;

return this._dataset;
}

/**
* Get a specific resource from the dataset by title.
*
* Defaults to the latest resource if no title is provided.
*
* @param title
* @returns
*/
public async resource(title: string | null = null): Promise<Resource> {
const dataset = await this.dataset();
const resource = title
? dataset.resources.find((r) => r.title === title)
: dataset.resources.find((r) => r.latest.includes(r.id));

if (!resource) {
throw new NotFoundException(`Resource not found for dataset ${dataset.id}`);
}

this._resource = resource;
return this._resource;
}

public async exists(title: string): Promise<boolean> {
try {
await this.resource(title);
return true;
} catch {
return false;
}
}

public async upload(filepath: string): Promise<Resource> {
const title = basename(filepath);
let url = `datasets/${this.config.dataset}/upload/`;

if (await this.exists(title)) {
logger.info(`Resource ${title} already exists, replacing...`);
url = `datasets/${this.config.dataset}/resources/${this._resource!.id}/upload/`;
}

const form = new FormData();
const file = new File([await readFile(filepath)], title);
form.append("file", file);

const resource = await this.post<Resource>(url, form);
if (!resource) {
throw new Error(`Failed to upload resource for dataset ${this.config.dataset}`);
}

this._resource = resource;

return resource;
}

public async setMetadata(resource: Resource, metadata: Metadata): Promise<Resource> {
const r = await this.put<Resource>(
`datasets/${this.config.dataset}/resources/${resource.id}`,
JSON.stringify({
title: resource.title,
...metadata,
}),
);

if (!r) {
throw new Error(`Failed to update resource ${resource.id} metadata`);
}

this._resource = r;

return this._resource;
}

// -------------------------------------------------------------------------------------------------------------------
// PRIVATE REQUEST HELPERS
// -------------------------------------------------------------------------------------------------------------------

protected async get<T>(url: string): Promise<T> {
return this._query<T>(url);
}

protected async post<T>(url: string, body: BodyInit): Promise<T> {
return this._query<T>(url, "POST", body);
}

protected async put<T>(url: string, body: BodyInit): Promise<T> {
return this._query<T>(url, "PUT", body);
}

// -------------------------------------------------------------------------------------------------------------------
// INTERNALS
// -------------------------------------------------------------------------------------------------------------------

protected async _query<T>(
url: string,
method: "GET" | "POST" | "PUT" = "GET",
body: BodyInit | null = null,
): Promise<T> {
const baseURL = this.config.url;
const headers: HeadersInit = {
"Accept": "application/json",
"Content-Type": "application/json",
"X-API-KEY": this.config.key,
};

const init: RequestInit = { method, headers };
if (body) {
init.body = body;
if (body instanceof FormData) delete headers["Content-Type"];
}

const response = await fetch(`${baseURL}/${url}`, init);
if (!response.ok) {
throw Error(response.statusText);
}

return response.json() as T;
}
}
116 changes: 116 additions & 0 deletions api/src/pdc/providers/datagouv/DataGouvAPITypes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
export type Organisation = {
acronym: string;
badges: { kind: string }[];
class: string;
id: string;
logo: string;
logo_thumbnail: string;
name: string;
page: string;
slug: string;
uri: string;
};

export type Dataset = {
acronym: string;
archived: null | string;
badges: { kind: string }[];
contact_point: null | string;
created_at: string;
deleted: null | string;
description: string;
extras: {
"recommendations-externals": {
id: string;
messages: { en: object; fr: object };
score: number;
source: string;
}[];
"recommendations:sources": string[];
};
frequency: string;
frequency_date: string;
harvest: null | string;
id: string;
internal: {
created_at_internal: string;
last_modified_internal: string;
};
last_modified: string;
last_update: string;
license: string;
metrics: {
discussions: number;
followers: number;
resources_downloads: number;
reuses: number;
views: number;
};
organization: Organisation;
owner: null | string;
page: string;
private: boolean;
quality: {
all_resources_available: boolean;
dataset_description_quality: boolean;
has_open_format: boolean;
has_resources: boolean;
license: boolean;
resources_documentation: boolean;
score: number;
spatial: boolean;
temporal_coverage: boolean;
update_frequency: boolean;
update_fulfilled_in_time: boolean;
};
resources: Resource[];
schema: null | string;
slug: string;
spatial: {
geom: null | string;
granularity: string;
zones: string[];
};
tags: string[];
temporal_coverage: { end: string; start: string };
title: string;
uri: string;
};

export type Resource = {
checksum: {
type: "sha1";
value: string;
};
created_at: string;
description: null | string;
extras: {
"check:available": boolean;
"check:date": string;
"check:headers:content-type": string;
"check:status": number;
"check:timeout": boolean;
};
filesize: number;
filetype: string;
format: string;
harvest: null | string;
id: string;
internal: {
created_at_internal: string;
last_modified_internal: string;
};
last_modified: string;
latest: string;
metrics: Record<string, unknown>;
mime: string;
preview_url: null | string;
schema: { name: null | string; url: null | string; version: null | string };
title: string;
type: string;
url: string;
};

export type Metadata = {
description: string;
};
34 changes: 34 additions & 0 deletions api/src/pdc/providers/datagouv/DataGouvMetadataProvider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { provider } from "@/ilos/common/Decorators.ts";
import { ConfigInterfaceResolver } from "@/ilos/common/index.ts";
import { DataGouvAPIConfig } from "@/pdc/services/export/config/datagouv.ts";
import fr from "npm:date-fns@^3.6/locale/fr";

export type DataGouvDescriptionParams = {
start_at: Date;
};

@provider()
export class DataGouvMetadataProvider {
protected config: DataGouvAPIConfig;

constructor(configStore: ConfigInterfaceResolver) {
this.config = configStore.get("datagouv.api");
}

description(params: DataGouvDescriptionParams): string {
const dateFr = fr(params.start_at);

return `
Spécificités jeu de données June 2024

Les données concernent également les trajets dont le point de départ OU d'arrivée est situé en dehors du territoire français.

Nombre trajets collectés et validés par le registre de preuve de covoiturage 1000521
Nombre de trajets exposés dans le jeu de données : 973640
Nombre de trajets supprimés du jeu de données : 26881 = 14885 + 15058 - 3062
Nombre de trajets dont l'occurrence du code INSEE de départ est < 6 : 14885
Nombre de trajets dont l'occurrence du code INSEE d'arrivée est < 6 : 15058
Nombre de trajets dont l'occurrence du code INSEE de départ ET d'arrivée est < 6 : 3062
`;
}
}
4 changes: 3 additions & 1 deletion api/src/pdc/services/export/ExportServiceProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import { CommandExtension } from "@/ilos/cli/index.ts";
import { ExtensionInterface, NewableType, serviceProvider } from "@/ilos/common/index.ts";
import { ServiceProvider as AbstractServiceProvider } from "@/ilos/core/index.ts";
import { DefaultTimezoneMiddleware } from "@/pdc/middlewares/DefaultTimezoneMiddleware.ts";
import { DataGouvAPIProvider } from "@/pdc/providers/datagouv/DataGouvAPIProvider.ts";
import { DataGouvMetadataProvider } from "@/pdc/providers/datagouv/DataGouvMetadataProvider.ts";
import { defaultMiddlewareBindings } from "@/pdc/providers/middleware/index.ts";
import { S3StorageProvider } from "@/pdc/providers/storage/index.ts";
import { ValidatorExtension, ValidatorMiddleware } from "@/pdc/providers/validator/index.ts";
Expand Down Expand Up @@ -51,7 +53,7 @@ const repositories = [
];

// External providers are from the @pdc namespace
const externalProviders = [S3StorageProvider];
const externalProviders = [S3StorageProvider, DataGouvAPIProvider, DataGouvMetadataProvider];

// Commands are from the ./commands folder
// and are used to implement the CLI commands.
Expand Down
29 changes: 25 additions & 4 deletions api/src/pdc/services/export/commands/DataGouvCommand.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import { coerceDate } from "@/ilos/cli/index.ts";
import { command, CommandInterface } from "@/ilos/common/index.ts";
import { command, CommandInterface, ConfigInterfaceResolver } from "@/ilos/common/index.ts";
import { logger } from "@/lib/logger/index.ts";
import { today, toTzString } from "@/pdc/helpers/dates.helper.ts";
import { DataGouvAPIProvider } from "@/pdc/providers/datagouv/DataGouvAPIProvider.ts";
import { DataGouvMetadataProvider } from "@/pdc/providers/datagouv/DataGouvMetadataProvider.ts";
import { Timezone } from "@/pdc/providers/validator/index.ts";
import { CSVWriter } from "@/pdc/services/export/models/CSVWriter.ts";
import { ExportTarget } from "@/pdc/services/export/models/Export.ts";
import { ExportParams } from "@/pdc/services/export/models/ExportParams.ts";
import { CarpoolOpenDataListType } from "@/pdc/services/export/repositories/queries/CarpoolOpenDataQuery.ts";
import { NotificationService } from "@/pdc/services/export/services/NotificationService.ts";
import { StorageService } from "@/pdc/services/export/services/StorageService.ts";
import { ExportTarget } from "../models/Export.ts";
import { ExportRepositoryInterfaceResolver } from "../repositories/ExportRepository.ts";
import { FieldServiceInterfaceResolver } from "../services/FieldService.ts";
import { LogServiceInterfaceResolver } from "../services/LogService.ts";
Expand Down Expand Up @@ -51,6 +53,9 @@ function defaultDate(offset = 0): Date {
})
export class DataGouvCommand implements CommandInterface {
constructor(
protected config: ConfigInterfaceResolver,
protected api: DataGouvAPIProvider,
protected metadata: DataGouvMetadataProvider,
protected exportRepository: ExportRepositoryInterfaceResolver,
protected fileCreatorService: OpenDataFileCreatorServiceInterfaceResolver,
protected fieldService: FieldServiceInterfaceResolver,
Expand All @@ -61,6 +66,11 @@ export class DataGouvCommand implements CommandInterface {
) {}

public async call(options: Options): Promise<void> {
if (this.config.get("datagouv.api.enabled") === false) {
logger.warn("DataGouv Export is DISABLED");
return;
}

// 1. Get and configure parameters
// 2. Export the file
// 3. FileCreator
Expand All @@ -78,14 +88,25 @@ export class DataGouvCommand implements CommandInterface {

const s = toTzString(params.get().start_at, "Europe/Paris", "yyyy-MM");
const e = toTzString(params.get().end_at, "Europe/Paris", "yyyy-MM");
logger.info(`Exporting ${filename} from ${s} to ${e}`);
logger.info(`Exporting ${filename} from ${s}-01 to ${e}-01`);

await this.fileCreatorService.write(
const path = await this.fileCreatorService.write(
params,
new CSVWriter<CarpoolOpenDataListType>(filename, { tz: options.tz, compress: false, fields }),
);

// TODO : calculate stats to build metadata

// upload to storage
const dataset = await this.api.dataset();
const resource = await this.api.upload(path);
await this.api.setMetadata(resource, {
description: this.metadata.description({
start_at: params.get().start_at,
}),
});

logger.info(`Resource uploaded to ${dataset.page}`);
}
}

Expand Down
Loading
Loading