Skip to content

Commit

Permalink
add upload to data.gouv feature
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathanfallon committed Jan 28, 2025
1 parent c7e1746 commit 9bcb53e
Show file tree
Hide file tree
Showing 6 changed files with 327 additions and 7 deletions.
159 changes: 159 additions & 0 deletions api/src/pdc/providers/datagouv/DataGouvAPIProvider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import { ConfigInterfaceResolver, NotFoundException, provider } from "@/ilos/common/index.ts";
import { readFile } from "@/lib/file/index.ts";
import { logger } from "@/lib/logger/index.ts";
import { basename } from "@/lib/path/index.ts";
import { Dataset, Metadata, Resource } from "@/pdc/providers/datagouv/DataGouvAPITypes.ts";
import { DataGouvAPIConfig } from "@/pdc/services/export/config/datagouv.ts";

@provider()
export class DataGouvAPIProvider {
protected _dataset: Dataset | null = null;
protected _resource: Resource | null = null;
protected config: DataGouvAPIConfig;

constructor(protected configStore: ConfigInterfaceResolver) {
this.config = configStore.get("datagouv.api");
}

// -------------------------------------------------------------------------------------------------------------------
// PUBLIC API
// -------------------------------------------------------------------------------------------------------------------

/**
* Get all dataset metadata
*
* Includes organisations and the list of resources
*/
public async dataset(): Promise<Dataset> {
if (this._dataset) {
return this._dataset;
}

const dataset = await this.get<Dataset>(`datasets/${this.config.dataset}`);
if (!dataset) {
throw new NotFoundException(`Dataset not found: ${this.config.dataset}`);
}

this._dataset = dataset;

return this._dataset;
}

/**
* Get a specific resource from the dataset by title.
*
* Defaults to the latest resource if no title is provided.
*
* @param title
* @returns
*/
public async resource(title: string | null = null): Promise<Resource> {
const dataset = await this.dataset();
const resource = title
? dataset.resources.find((r) => r.title === title)
: dataset.resources.find((r) => r.latest.includes(r.id));

if (!resource) {
throw new NotFoundException(`Resource not found for dataset ${dataset.id}`);
}

this._resource = resource;
return this._resource;
}

public async exists(title: string): Promise<boolean> {
try {
await this.resource(title);
return true;
} catch {
return false;
}
}

public async upload(filepath: string): Promise<Resource> {
const title = basename(filepath);
let url = `datasets/${this.config.dataset}/upload/`;

if (await this.exists(title)) {
logger.info(`Resource ${title} already exists, replacing...`);
url = `datasets/${this.config.dataset}/resources/${this._resource!.id}/upload/`;
}

const form = new FormData();
const file = new File([await readFile(filepath)], title);
form.append("file", file);

const resource = await this.post<Resource>(url, form);
if (!resource) {
throw new Error(`Failed to upload resource for dataset ${this.config.dataset}`);
}

this._resource = resource;

return resource;
}

public async setMetadata(resource: Resource, metadata: Metadata): Promise<Resource> {
const r = await this.put<Resource>(
`datasets/${this.config.dataset}/resources/${resource.id}`,
JSON.stringify({
title: resource.title,
...metadata,
}),
);

if (!r) {
throw new Error(`Failed to update resource ${resource.id} metadata`);
}

this._resource = r;

return this._resource;
}

// -------------------------------------------------------------------------------------------------------------------
// PRIVATE REQUEST HELPERS
// -------------------------------------------------------------------------------------------------------------------

protected async get<T>(url: string): Promise<T> {
return this._query<T>(url);
}

protected async post<T>(url: string, body: BodyInit): Promise<T> {
return this._query<T>(url, "POST", body);
}

protected async put<T>(url: string, body: BodyInit): Promise<T> {
return this._query<T>(url, "PUT", body);
}

// -------------------------------------------------------------------------------------------------------------------
// INTERNALS
// -------------------------------------------------------------------------------------------------------------------

protected async _query<T>(
url: string,
method: "GET" | "POST" | "PUT" = "GET",
body: BodyInit | null = null,
): Promise<T> {
const baseURL = this.config.url;
const headers: HeadersInit = {
"Accept": "application/json",
"Content-Type": "application/json",
"X-API-KEY": this.config.key,
};

const init: RequestInit = { method, headers };
if (body) {
init.body = body;
if (body instanceof FormData) delete headers["Content-Type"];
}

const response = await fetch(`${baseURL}/${url}`, init);
if (!response.ok) {
throw Error(response.statusText);
}

return response.json() as T;
}
}
116 changes: 116 additions & 0 deletions api/src/pdc/providers/datagouv/DataGouvAPITypes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
export type Organisation = {
acronym: string;
badges: { kind: string }[];
class: string;
id: string;
logo: string;
logo_thumbnail: string;
name: string;
page: string;
slug: string;
uri: string;
};

export type Dataset = {
acronym: string;
archived: null | string;
badges: { kind: string }[];
contact_point: null | string;
created_at: string;
deleted: null | string;
description: string;
extras: {
"recommendations-externals": {
id: string;
messages: { en: object; fr: object };
score: number;
source: string;
}[];
"recommendations:sources": string[];
};
frequency: string;
frequency_date: string;
harvest: null | string;
id: string;
internal: {
created_at_internal: string;
last_modified_internal: string;
};
last_modified: string;
last_update: string;
license: string;
metrics: {
discussions: number;
followers: number;
resources_downloads: number;
reuses: number;
views: number;
};
organization: Organisation;
owner: null | string;
page: string;
private: boolean;
quality: {
all_resources_available: boolean;
dataset_description_quality: boolean;
has_open_format: boolean;
has_resources: boolean;
license: boolean;
resources_documentation: boolean;
score: number;
spatial: boolean;
temporal_coverage: boolean;
update_frequency: boolean;
update_fulfilled_in_time: boolean;
};
resources: Resource[];
schema: null | string;
slug: string;
spatial: {
geom: null | string;
granularity: string;
zones: string[];
};
tags: string[];
temporal_coverage: { end: string; start: string };
title: string;
uri: string;
};

export type Resource = {
checksum: {
type: "sha1";
value: string;
};
created_at: string;
description: null | string;
extras: {
"check:available": boolean;
"check:date": string;
"check:headers:content-type": string;
"check:status": number;
"check:timeout": boolean;
};
filesize: number;
filetype: string;
format: string;
harvest: null | string;
id: string;
internal: {
created_at_internal: string;
last_modified_internal: string;
};
last_modified: string;
latest: string;
metrics: Record<string, unknown>;
mime: string;
preview_url: null | string;
schema: { name: null | string; url: null | string; version: null | string };
title: string;
type: string;
url: string;
};

export type Metadata = {
description: string;
};
16 changes: 16 additions & 0 deletions api/src/pdc/providers/datagouv/DataGouvMetadataProvider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import { provider } from "@/ilos/common/Decorators.ts";
import { ConfigInterfaceResolver } from "@/ilos/common/index.ts";
import { DataGouvAPIConfig } from "@/pdc/services/export/config/datagouv.ts";

@provider()
export class DataGouvMetadataProvider {
protected config: DataGouvAPIConfig;

constructor(configStore: ConfigInterfaceResolver) {
this.config = configStore.get("datagouv.api");
}

description(): string {
return new Date().toISOString();
}
}
4 changes: 3 additions & 1 deletion api/src/pdc/services/export/ExportServiceProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import { CommandExtension } from "@/ilos/cli/index.ts";
import { ExtensionInterface, NewableType, serviceProvider } from "@/ilos/common/index.ts";
import { ServiceProvider as AbstractServiceProvider } from "@/ilos/core/index.ts";
import { DefaultTimezoneMiddleware } from "@/pdc/middlewares/DefaultTimezoneMiddleware.ts";
import { DataGouvAPIProvider } from "@/pdc/providers/datagouv/DataGouvAPIProvider.ts";
import { DataGouvMetadataProvider } from "@/pdc/providers/datagouv/DataGouvMetadataProvider.ts";
import { defaultMiddlewareBindings } from "@/pdc/providers/middleware/index.ts";
import { S3StorageProvider } from "@/pdc/providers/storage/index.ts";
import { ValidatorExtension, ValidatorMiddleware } from "@/pdc/providers/validator/index.ts";
Expand Down Expand Up @@ -51,7 +53,7 @@ const repositories = [
];

// External providers are from the @pdc namespace
const externalProviders = [S3StorageProvider];
const externalProviders = [S3StorageProvider, DataGouvAPIProvider, DataGouvMetadataProvider];

// Commands are from the ./commands folder
// and are used to implement the CLI commands.
Expand Down
25 changes: 21 additions & 4 deletions api/src/pdc/services/export/commands/DataGouvCommand.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import { coerceDate } from "@/ilos/cli/index.ts";
import { command, CommandInterface } from "@/ilos/common/index.ts";
import { command, CommandInterface, ConfigInterfaceResolver } from "@/ilos/common/index.ts";
import { logger } from "@/lib/logger/index.ts";
import { today, toTzString } from "@/pdc/helpers/dates.helper.ts";
import { DataGouvAPIProvider } from "@/pdc/providers/datagouv/DataGouvAPIProvider.ts";
import { DataGouvMetadataProvider } from "@/pdc/providers/datagouv/DataGouvMetadataProvider.ts";
import { Timezone } from "@/pdc/providers/validator/index.ts";
import { CSVWriter } from "@/pdc/services/export/models/CSVWriter.ts";
import { ExportTarget } from "@/pdc/services/export/models/Export.ts";
import { ExportParams } from "@/pdc/services/export/models/ExportParams.ts";
import { CarpoolOpenDataListType } from "@/pdc/services/export/repositories/queries/CarpoolOpenDataQuery.ts";
import { NotificationService } from "@/pdc/services/export/services/NotificationService.ts";
import { StorageService } from "@/pdc/services/export/services/StorageService.ts";
import { ExportTarget } from "../models/Export.ts";
import { ExportRepositoryInterfaceResolver } from "../repositories/ExportRepository.ts";
import { FieldServiceInterfaceResolver } from "../services/FieldService.ts";
import { LogServiceInterfaceResolver } from "../services/LogService.ts";
Expand Down Expand Up @@ -51,6 +53,9 @@ function defaultDate(offset = 0): Date {
})
export class DataGouvCommand implements CommandInterface {
constructor(
protected config: ConfigInterfaceResolver,
protected api: DataGouvAPIProvider,
protected metadata: DataGouvMetadataProvider,
protected exportRepository: ExportRepositoryInterfaceResolver,
protected fileCreatorService: OpenDataFileCreatorServiceInterfaceResolver,
protected fieldService: FieldServiceInterfaceResolver,
Expand All @@ -61,6 +66,11 @@ export class DataGouvCommand implements CommandInterface {
) {}

public async call(options: Options): Promise<void> {
if (this.config.get("datagouv.api.enabled") === false) {
logger.warn("DataGouv Export is DISABLED");
return;
}

// 1. Get and configure parameters
// 2. Export the file
// 3. FileCreator
Expand All @@ -78,14 +88,21 @@ export class DataGouvCommand implements CommandInterface {

const s = toTzString(params.get().start_at, "Europe/Paris", "yyyy-MM");
const e = toTzString(params.get().end_at, "Europe/Paris", "yyyy-MM");
logger.info(`Exporting ${filename} from ${s} to ${e}`);
logger.info(`Exporting ${filename} from ${s}-01 to ${e}-01`);

await this.fileCreatorService.write(
const path = await this.fileCreatorService.write(
params,
new CSVWriter<CarpoolOpenDataListType>(filename, { tz: options.tz, compress: false, fields }),
);

// TODO : calculate stats to build metadata

// upload to storage
const dataset = await this.api.dataset();
const resource = await this.api.upload(path);
await this.api.setMetadata(resource, { description: this.metadata.description() });

logger.info(`Resource uploaded to ${dataset.page}`);
}
}

Expand Down
Loading

0 comments on commit 9bcb53e

Please sign in to comment.