diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e8541355..d2611516 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,17 +12,17 @@ jobs: uses: actions/setup-java@v1 with: java-version: 1.8 - - name: Install metafacture-core - run: | - git clone https://github.com/metafacture/metafacture-core.git - cd metafacture-core - git checkout 5.7.0-rc1 - ./gradlew publishToMavenLocal + #- name: Install metafacture-core + # run: | + # git clone https://github.com/metafacture/metafacture-core.git + # cd metafacture-core + # git checkout metafacture-core-5.7.0 + # ./gradlew publishToMavenLocal - name: Install metafacture-fix run: | git clone https://github.com/metafacture/metafacture-fix.git cd metafacture-fix - git checkout master + git checkout 0.7.0 ./gradlew publishToMavenLocal - name: Run tests run: sbt update test diff --git a/app/controllers/Accept.java b/app/controllers/Accept.java index c35d79e4..0fdccbff 100644 --- a/app/controllers/Accept.java +++ b/app/controllers/Accept.java @@ -23,6 +23,7 @@ enum Format { HTML("html", "text/html"), // JAVASCRIPT("js", "text/javascript", "application/javascript"), // CSV("csv", "text/csv"), // + TSV("tsv", "text/tab-separated-values"), // BULK("bulk", "application/x-jsonlines"), // RDF_XML("rdf", "application/rdf+xml", "application/xml", "text/xml"), // N_TRIPLE("nt", "application/n-triples", "text/plain"), // diff --git a/app/controllers/Application.java b/app/controllers/Application.java index 8d0d3c6c..c601641f 100644 --- a/app/controllers/Application.java +++ b/app/controllers/Application.java @@ -421,6 +421,14 @@ private static Result searchResult(String q, String location, int from, "attachment; filename=organisations.csv"); return ok(csvExport(format, orgs)).as("text/csv; charset=utf-8"); }); + results.put("tsv", () -> { + String queryResultString = + searchQueryResult(q, location, from, size, aggregations); + String orgs = Json.parse(queryResultString).get("member").toString(); + response().setHeader("Content-Disposition", + "attachment; filename=organisations.tsv"); + return ok(csvExport(format, orgs, CsvExport.TAB_SEPARATOR)).as("text/tab-separated-values; charset=utf-8"); + }); Supplier json = () -> { String queryResultString = searchQueryResult(q, location, from, size, aggregations); @@ -490,11 +498,21 @@ private static Optional getOptional(JsonNode json, String field) { return Optional.ofNullable(json.get(field)); } - private static String csvExport(String format, String orgs) { + private static String csvExport(String format, String orgs, String separator) { String[] formatConfig = format.split(FORMAT_CONFIG_SEP); // e.g. csv:name,id String fields = formatConfig.length > 1 && !formatConfig[1].isEmpty() ? formatConfig[1] : defaultFields(); - return new CsvExport(orgs).of(fields); + if (separator == null) { + return new CsvExport(orgs).of(fields); + } + else { + String fieldsWithNonDefaultSeparator=fields.replaceAll(",", separator); + return new CsvExport(orgs).of(fieldsWithNonDefaultSeparator, separator); + } + } + + private static String csvExport(String format, String orgs) { + return csvExport(format, orgs, CsvExport.DEFAULT_SEPARATOR); } private static String defaultFields() { @@ -705,6 +723,12 @@ private static Result resultFor(String id, JsonNode json, String format) { return ok(csvExport(format, "[" + json.toString() + "]")) .as("text/csv; charset=utf-8"); }); + results.put("tsv", () -> { + response().setHeader("Content-Disposition", + String.format("attachment; filename=%s.tsv", id)); + return ok(csvExport(format, "[" + json.toString() + "]", CsvExport.TAB_SEPARATOR)) + .as("text/tab-separated-values; charset=utf-8"); + }); Pair contentAndType = contentAndType(json, format); Supplier rdfSupplier = () -> ok(contentAndType.getLeft()).as(contentAndType.getRight()); diff --git a/app/transformation/CsvExport.java b/app/transformation/CsvExport.java index aeb21dd9..aaa9f104 100644 --- a/app/transformation/CsvExport.java +++ b/app/transformation/CsvExport.java @@ -15,13 +15,16 @@ import play.libs.Json; /** - * Export organisations JSON data as CSV. + * Export organisations JSON data as CSV. Allows defining an other + * separator than comma. * * @author Fabian Steeg (fsteeg) */ public class CsvExport { private final JsonNode organisations; + public final static String DEFAULT_SEPARATOR = ","; + public final static String TAB_SEPARATOR = "\t"; /** * @param json The organisations JSON data to export @@ -35,24 +38,32 @@ public CsvExport(String json) { * @return The data for the given fields in CSV format */ public String of(String fields) { + return of(fields, DEFAULT_SEPARATOR); + } + + /** + * @param fields The JSON fields to include in the export + * @param separator The separator to separate entries in the CSV + * @return The data for the given fields in [C*]SV format + */ + public String of(final String fields, final String separator) { StringBuilder csv = new StringBuilder(fields + "\n"); - for (Iterator iter = organisations.elements(); iter.hasNext();) { + for (Iterator iter = organisations.elements(); iter.hasNext(); ) { JsonNode org = iter.next(); - csv.append(Arrays.asList(fields.split(",")).stream().map(field -> { + csv.append(Arrays.asList(fields.split(separator)).stream().map(field -> { try { Object value = JsonPath.read(Configuration.defaultConfiguration() .jsonProvider().parse(org.toString()), "$." + field); - return String.format("\"%s\"", - value.toString().replaceAll("\"", "\"\"")); + return separator==DEFAULT_SEPARATOR ? String.format("\"%s\"", + value.toString().replaceAll("\"", "\"\"")) : value.toString(); } catch (PathNotFoundException x) { Logger.trace(x.getMessage()); // https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#empty-and-quoted-cells return ""; } - }).collect(Collectors.joining(","))).append("\n"); + }).collect(Collectors.joining(separator))).append("\n"); } return csv.toString(); } - } diff --git a/app/views/api.scala.html b/app/views/api.scala.html index dff799b6..e75a41d5 100644 --- a/app/views/api.scala.html +++ b/app/views/api.scala.html @@ -59,6 +59,7 @@

@Messages.get("api.content_types.header") curl http://lobid.org@routes.Application.get("DE-6")

@Messages.get("api.content_types.negotiate")

curl --header "Accept: text/csv" http://lobid.org@routes.Application.search("kunst")

+

curl --header "Accept: text/tab-separated-values" http://lobid.org@routes.Application.search("kunst")

curl --header "Accept: application/x-jsonlines" http://lobid.org@routes.Application.search("kunst") > kunst.jsonl

@Messages.get("api.content_types.override") @routes.Application.get("DE-6", format="json")

@Messages.get("api.content_types.dotFormat") @routes.Application.getDotFormat("DE-6", format="json")

@@ -70,6 +71,10 @@

@Messages.get("api.csv.header") @Messages.get("api.tsv.header")

+ @desc(Messages.get("api.tsv.default"), routes.Application.search("kunst", size=300, format="tsv")) + @desc(Messages.get("api.tsv.custom"), routes.Application.search("kunst", size=300, format="tsv:name,isil,url,classification.label.de")) +

@Messages.get("api.autocomplete.header")

@Messages.get("api.autocomplete.intro")

@desc(Messages.get("api.autocomplete.name") + " \"format=json:name\"", routes.Application.search("name:dnb OR alternateName:dnb", format="json:name")) @@ -111,4 +116,4 @@

OpenRefine @if(controllers.Application.currentLang()=="de"){} -} \ No newline at end of file +} diff --git a/app/views/search.scala.html b/app/views/search.scala.html index b79614f2..f807a916 100644 --- a/app/views/search.scala.html +++ b/app/views/search.scala.html @@ -154,6 +154,6 @@

@Messages.get("search.location") @if(!location.isEmpty){ @defining(if(!q.isEmpty) q else "*") { qParam =>

@Html(Messages.get("search.footer.api_text", routes.Application.search(q=qParam, from=from, format="json", location=location), - routes.Application.search(q=qParam, from=from, format="csv", location=location), routes.Application.api()))

+ routes.Application.search(q=qParam, from=from, format="csv", location=location), routes.Application.search(q=qParam, from=from, format="tsv", location=location), routes.Application.api()))

}} else { @if(!q.isEmpty) {} } } -}} \ No newline at end of file +}} diff --git a/build.sbt b/build.sbt index c49185f1..fbc9fae3 100644 --- a/build.sbt +++ b/build.sbt @@ -17,18 +17,18 @@ libraryDependencies ++= Seq( "com.fasterxml.jackson.core" % "jackson-annotations" % "2.15.1", "com.github.jsonld-java" % "jsonld-java" % "0.13.4", "org.apache.jena" % "jena-arq" % "3.17.0", - "org.metafacture" % "metamorph" % "5.7.0-rc1" exclude("org.slf4j", "slf4j-simple"), - "org.metafacture" % "metafacture-elasticsearch" % "5.7.0-rc1", - "org.metafacture" % "metamorph-test" % "5.7.0-rc1", - "org.metafacture" % "metafacture-json" % "5.7.0-rc1", - "org.metafacture" % "metafacture-csv" % "5.7.0-rc1", - "org.metafacture" % "metafacture-io" % "5.7.0-rc1", - "org.metafacture" % "metafacture-triples" % "5.7.0-rc1", - "org.metafacture" % "metafacture-biblio" % "5.7.0-rc1", - "org.metafacture" % "metafacture-xml" % "5.7.0-rc1", - "org.metafacture" % "metafacture-framework" % "5.7.0-rc1", - "org.metafacture" % "metafacture-strings" % "5.7.0-rc1", - "org.metafacture" % "metafix" % "0.6.0-SNAPSHOT", + "org.metafacture" % "metamorph" % "5.7.0" exclude("org.slf4j", "slf4j-simple"), + "org.metafacture" % "metafacture-elasticsearch" % "5.7.0", + "org.metafacture" % "metamorph-test" % "5.7.0", + "org.metafacture" % "metafacture-json" % "5.7.0", + "org.metafacture" % "metafacture-csv" % "5.7.0", + "org.metafacture" % "metafacture-io" % "5.7.0", + "org.metafacture" % "metafacture-triples" % "5.7.0", + "org.metafacture" % "metafacture-biblio" % "5.7.0", + "org.metafacture" % "metafacture-xml" % "5.7.0", + "org.metafacture" % "metafacture-framework" % "5.7.0", + "org.metafacture" % "metafacture-strings" % "5.7.0", + "org.metafacture" % "metafix" % "0.7.0", "org.xbib.elasticsearch.plugin" % "elasticsearch-plugin-bundle" % "2.3.2.0", "com.jayway.jsonpath" % "json-path" % "2.2.0", "net.java.dev.jna" % "jna" % "4.1.0", diff --git a/conf/dataset.jsonld b/conf/dataset.jsonld index ee821109..7e0eeef1 100644 --- a/conf/dataset.jsonld +++ b/conf/dataset.jsonld @@ -137,6 +137,7 @@ "documentation": "http://lobid.org/organisations/api", "encodingFormat": [ "text/csv", + "text/tab-separated-values", "application/json", "application/ld+json" ], diff --git a/conf/messages.de b/conf/messages.de index ea2e26b7..bbb97817 100644 --- a/conf/messages.de +++ b/conf/messages.de @@ -37,7 +37,7 @@ search.location = Standort search.prev = vorige search.next = nächste search.total_results = Trefferzahl -search.footer.api_text = Sie können auf diese Daten auch als
JSON oder CSV über unsere Programmierschnittstelle zugreifen. +search.footer.api_text = Sie können auf diese Daten auch als JSON oder CSV (resp. TSV TSV) über unsere Programmierschnittstelle zugreifen. search.footer.no_results = Keine Ergebnisse für {0}. search.type = Typ search.collects = Bestandsgröße @@ -64,7 +64,7 @@ api.location.distance = Suche über Distanz zu einem Punkt ("location": Koordina api.content_types.header = Inhaltstypen api.content_types.default = Standardmäßig liefert dieser Dienst strukturierte API-Antworten (als JSON): -api.content_types.negotiate = Er unterstützt Content-Negotiation über den Accept-Header für JSON (application/json), CSV (text/csv), JSON lines (application/x-jsonlines) oder HTML (text/html): +api.content_types.negotiate = Er unterstützt Content-Negotiation über den Accept-Header für JSON (application/json), CSV (text/csv), TSV (text/tab-separated-values), JSON lines (application/x-jsonlines) oder HTML (text/html): api.content_types.override = Der Query-Parameter "format" kann verwendet werden, um den Accept-Header aufzuheben, z.B. zur Anzeige von JSON im Browser: api.content_types.dotFormat = Der Wert des Format-Parameters kann für Einzeltreffer auch in URLs als Dateiendung verwendet werden: api.content_types.compress = Für größere Anfragen kann die Antwort als gzip komprimiert werden: @@ -74,6 +74,10 @@ api.csv.header = CSV-Export api.csv.default = Standardfelder ("format=csv") api.csv.custom = Benutzerdefinierte Felder ("format": zu verwendende Felder, mit Punkten für geschachtelte Felder im Format "csv:feld1,feld2.unterfeld") +api.tsv.header = TSV-Export +api.tsv.default = Standardfelder ("format=tsv") +api.tsv.custom = Benutzerdefinierte Felder ("format": zu verwendende Felder, mit Punkten für geschachtelte Felder im Format "tsv:feld1,feld2.unterfeld") + api.autocomplete.header = Autovervollständigung api.autocomplete.intro = Die API unterstützt ein spezielles Antwortformat mit Vorschlägen zur Vervollständigung aus einem angegebenen Feld: api.autocomplete.name = Name vorschlagen: diff --git a/conf/messages.en b/conf/messages.en index 200f8505..1caa8317 100644 --- a/conf/messages.en +++ b/conf/messages.en @@ -37,7 +37,7 @@ search.location = Location search.prev = prev search.next = next search.total_results = Total results -search.footer.api_text = You can also access this data as JSON or CSV using our API. +search.footer.api_text = You can also access this data as JSON or CSV (resp. TSV) using our API. search.footer.no_results = No results for {0}. search.type = Type search.collects = Stock size @@ -64,7 +64,7 @@ api.location.distance = Query with distance ("location": coordinate of a point a api.content_types.header = Content types api.content_types.default = By default, this service returns structured API responses (as JSON): -api.content_types.negotiate = It supports content negotiation based on the "Accept" header to serve JSON (application/json), CSV (text/csv), JSON lines (application/x-jsonlines), or HTML (text/html): +api.content_types.negotiate = It supports content negotiation based on the "Accept" header to serve JSON (application/json), CSV (text/csv), TSV (text/tab-separated-values), JSON lines (application/x-jsonlines), or HTML (text/html): api.content_types.override = An optional "format" query parameter can be used to override the "Accept" header, e.g. to display JSON in a browser: api.content_types.dotFormat = For individual organisations, the format parameter values can be used as file extensions in URLs: api.content_types.compress = For larger requests, the response can be compressed as gzip: @@ -74,6 +74,10 @@ api.csv.header = CSV export api.csv.default = Default fields ("format=csv") api.csv.custom = Custom fields ("format": fields to use, with dots for nested fields "csv:field1,field2.subfield") +api.tsv.header = TSV export +api.tsv.default = Default fields ("format=tsv") +api.tsv.custom = Custom fields ("format": fields to use, with dots for nested fields "tsv:field1,field2.subfield") + api.autocomplete.header = Auto-complete api.autocomplete.intro = The API supports a response format for auto-complete suggestions using a specified field: api.autocomplete.name = Suggest name: diff --git a/test/transformation/CsvExportTest.java b/test/transformation/CsvExportTest.java index 83c51104..bbbd312d 100644 --- a/test/transformation/CsvExportTest.java +++ b/test/transformation/CsvExportTest.java @@ -15,9 +15,27 @@ @SuppressWarnings("javadoc") public class CsvExportTest { + @Test + public void testFlatFieldsDefaultSeparator() { + testFlatFields(CsvExport.DEFAULT_SEPARATOR); + } + + @Test + public void testFlatFieldsTabulatorSeparator() { + testFlatFields(CsvExport.TAB_SEPARATOR); + } + + @Test + public void testNestedFieldsDefaultSeparator() { + testNestedFields(CsvExport.DEFAULT_SEPARATOR); + } @Test - public void testFlatFields() { + public void testNestedFieldsTabulatorSeparator() { + testNestedFields(CsvExport.TAB_SEPARATOR); + } + + private void testFlatFields(final String sep) { ObjectNode node1 = Json.newObject(); node1.put("field1", "org1-value1"); node1.put("field2", "org1-value2"); @@ -28,15 +46,17 @@ public void testFlatFields() { node2.put("field3", "org2-value3"); List orgs = Arrays.asList(node1, node2); CsvExport export = new CsvExport(Json.stringify(Json.toJson(orgs))); - String expected = String.format("%s,%s\n%s,%s\n%s,%s\n", // + String expected = String.format("%s" + sep + "%s\n%s" + sep + "%s\n%s" + sep + "%s\n", // "field1", "field3", // "\"org1-value1\"", "\"org1-value3\"", // "\"org2-value1\"", "\"org2-value3\""); - assertThat(export.of("field1,field3")).isEqualTo(expected); + if (sep.equals(CsvExport.TAB_SEPARATOR)) { + expected=expected.replaceAll("\"",""); + } + assertThat(export.of("field1" + sep + "field3", sep)).isEqualTo(expected); } - @Test - public void testNestedFields() { + private void testNestedFields(final String sep) { ObjectNode org1 = Json.newObject(); ObjectNode sub1 = Json.newObject(); org1.put("field1", "org1-value1"); @@ -55,15 +75,17 @@ public void testNestedFields() { sub2.put("field3", "org2-sub3"); List orgs = Arrays.asList(org1, org2); CsvExport export = new CsvExport(Json.stringify(Json.toJson(orgs))); - String expected = String.format("%s,%s\n%s,%s\n%s,%s\n", // + String expected = String.format("%s" + sep + "%s\n%s" + sep + "%s\n%s" + sep + "%s\n", // "field1", "field3.field2", // "\"org1-value1\"", "\"org1-sub2\"", // "\"org2-value1\"", "\"org2-sub2\""); - assertThat(export.of("field1,field3.field2")).isEqualTo(expected); + if (sep.equals(CsvExport.TAB_SEPARATOR)) { + expected=expected.replaceAll("\"",""); + } + assertThat(export.of("field1" + sep + "field3.field2", sep)).isEqualTo(expected); } - @Test - public void testMissingField() { + private void testMissingField() { ObjectNode org = Json.newObject(); org.put("field1", "org1-value1"); org.put("field2", "org1-value2");