Skip to content

Commit

Permalink
https://github.com/water-fountains/datablue/issues/39
Browse files Browse the repository at this point in the history
  • Loading branch information
Ralf Hauser committed Jan 13, 2020
1 parent 01663c5 commit df83f38
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 55 deletions.
41 changes: 29 additions & 12 deletions server/api/controllers/controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ export class Controller {

// return either the full or reduced version, depending on the "essential" parameter of the query
if(req.query.essential){
res.json(r_essential);
doJson(res,r_essential,'r_essential'); //res.json(r_essential);
}else{
res.json(fountainCollection);
doJson(res,fountainCollection,'fountainCollection'); //res.json(fountainCollection);
}

// also create list of processing errors (for proximap#206)
Expand All @@ -131,9 +131,9 @@ export class Controller {
// otherwise, get the data from storage
else{
if(req.query.essential){
res.json(cityCache.get(req.query.city + '_essential'));
doJson(res,cityCache.get(req.query.city + '_essential'),'fromCache essential'); //res.json(cityCache.get(req.query.city + '_essential'));
}else{
res.json(cityCache.get(req.query.city));
doJson(res,cityCache.get(req.query.city), 'fromCache'); //res.json(cityCache.get(req.query.city));
}
}
}
Expand All @@ -144,7 +144,7 @@ export class Controller {
* it simply returns the object created by fountain.properties.js
*/
getPropertyMetadata(req, res) {
res.json(fountain_property_metadata);
doJson(res,fountain_property_metadata,'getPropertyMetadata'); //res.json(fountain_property_metadata);
l.info("controller.js: getPropertyMetadata sent "+new Date().toISOString());
}

Expand All @@ -153,7 +153,7 @@ export class Controller {
*/
getLocationMetadata(req, res) {
// let gak = locations.gak;
res.json(locations);
doJson(res,locations,'getLocationMetadata'); //res.json(locations);
l.info("controller.js: getLocationMetadata sent "+new Date().toISOString());
}

Expand All @@ -171,7 +171,7 @@ export class Controller {
}
cityCache.get(key, (err, value) => {
if (!err) {
res.json(value)
doJson(res,value,'cityCache.get '+key); //res.json(value);
l.info("controller.js: getProcessingErrors !err sent "+new Date().toISOString());
} else {
let errMsg = 'Error with cache: ' + err;
Expand All @@ -184,6 +184,22 @@ export class Controller {
}
export default new Controller();

function doJson(resp, obj, dbg) {
//TODO consider using https://github.com/timberio/timber-js/issues/69 or rather https://github.com/davidmarkclements/fast-safe-stringify
try {
if (null == obj) {
let errS = 'controller.js doJson null == obj: '+dbg+' '+new Date().toISOString();
l.error(errS);
}
let res = resp.json(obj);
return res;
} catch (err) {
let errS = 'controller.js doJson errors: '+err+' '+dbg+' '+new Date().toISOString();
l.error(errS);
console.trace(errS);
}
}

/**
* Function to respond to request by returning the fountain as defined by the provided identifier
*/
Expand All @@ -205,10 +221,11 @@ function byId(req, res, dbg){
}
return f.properties['id_'+req.query.database].value === req.query.idval
});
res.json(fountain)
doJson(res,fountain, 'byId '+dbg); // res.json(fountain);
l.info('controller.js byId: of '+cityS+' res.json '+dbg+' '+new Date().toISOString());
}catch (e) {
l.error(`controller.js byId: Error finding fountain in preprocessed data: ${e} `+cityS+ ' '+dbg+' '+new Date().toISOString());
l.error(`controller.js byId: Error finding fountain in preprocessed data: ${e} , city: `+cityS+ ' '+dbg+' '+new Date().toISOString());
l.error(e.stack);
}

}
Expand Down Expand Up @@ -245,7 +262,7 @@ function reprocessFountainAtCoords(req, res, dbg) {
l.error(`Error collecting Wikidata data: ${e}`);
res.status(500).send(e.stack);
});

let debugAll = true;
// When both OSM and Wikidata data have been collected, continue with joint processing
Promise.all([osmPromise, wikidataPromise])

Expand All @@ -256,7 +273,7 @@ function reprocessFountainAtCoords(req, res, dbg) {
.then(r => conflate({
osm: r.osm,
wikidata: r.wikidata
},dbg))
},dbg, debugAll))

// return only the fountain that is closest to the coordinates of the query
.then(r => {
Expand All @@ -279,7 +296,7 @@ function reprocessFountainAtCoords(req, res, dbg) {
// Update cache with newly processed fountain
.then(r=>{
let closest = updateCacheWithFountain(cityCache, r[0], req.query.city);
res.json(closest);
doJson(res,closest,'after updateCacheWithFountain'); // res.json(closest);
})
.catch(e=>{
l.error(`Error collecting data: ${e}`);
Expand Down
39 changes: 25 additions & 14 deletions server/api/services/conflate.data.service.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const idwd_path_osm = fountain_property_metadata.id_wikidata.src_config.osm.src_
// This service finds matching fountains from osm and wikidata
// and merges their properties

export function conflate(ftns, dbg) {
export function conflate(ftns, dbg,debugAll) {
return new Promise((resolve, reject)=>{

let conflated = {
Expand All @@ -40,18 +40,18 @@ export function conflate(ftns, dbg) {
if(_.min([ftns.osm.length, ftns.wikidata.length])>0) {

// first conflate by wikidata identifiers (QID)
conflated.wikidata = conflateByWikidata(ftns,dbg);
conflated.wikidata = conflateByWikidata(ftns,dbg,debugAll);

// then conflate by coordinates
conflated.coord = conflateByCoordinates(ftns,dbg);
conflated.coord = conflateByCoordinates(ftns,dbg, debugAll);
}

// process remaining fountains that were not matched by either QID or coordinates
let unmatched = {};
unmatched.osm = _.map(ftns.osm, f_osm =>{
return mergeFountainProperties({osm:f_osm, wikidata:false}, 'unmatched')});
return mergeFountainProperties({osm:f_osm, wikidata:false}, 'unmatched.osm', null,debugAll,dbg)});
unmatched.wikidata = _.map(ftns.wikidata, f_wd =>{
return mergeFountainProperties({osm:false, wikidata:f_wd}, 'unmatched')});
return mergeFountainProperties({osm:false, wikidata:f_wd}, 'unmatched.wikidata',null, debugAll,dbg)});

// append the matched (conflated) and unmatched fountains to the list "conflated_fountains_all"
let conflated_fountains_all = _.concat(
Expand All @@ -70,13 +70,15 @@ export function conflate(ftns, dbg) {
* This function finds matching pairs of fountains between osm and wikidata. It returns the list of matches and removes the matched fountains from the 'ftns' argument
* @param {Object} ftns - Object (passed by reference) with two properties: 'osm' is a list of fountains returned from OSM and 'wikidata' is list from wikidata
*/
function conflateByWikidata(ftns,dbg) {
function conflateByWikidata(ftns,dbg, debugAll) {
// Holder for conflated (matched) fountains
let conflated_fountains = [];
// Holders for matched fountain indexes
let matched_idx_osm = [];
let matched_idx_wd = [];
//l.info(ftns+' ftns conflateByWikidata '+dbg);
if (debugAll) {
l.info('conflate.data.service.js conflateByWikidata: '+ftns+' ftns '+dbg+' '+new Date().toISOString());
}
// loop through OSM fountains
for(const [idx_osm, f_osm] of ftns.osm.entries()){

Expand Down Expand Up @@ -104,15 +106,15 @@ function conflateByWikidata(ftns,dbg) {
{
osm: ftns.osm[idx_osm],
wikidata: ftns.wikidata[idx_wd]
}, 'merged by wikidata id', d));
}, 'merged by wikidata id', d, debugAll,dbg));
// document the indexes of the matched fountains so the fountains can be removed from the lists
matched_idx_osm.push(idx_osm);
matched_idx_wd.push(idx_wd);
}
}

// remove matched fountains from lists
cleanFountainCollections(ftns, matched_idx_osm, matched_idx_wd);
cleanFountainCollections(ftns, matched_idx_osm, matched_idx_wd, debugAll, dbg);

return conflated_fountains;
}
Expand All @@ -123,7 +125,10 @@ function conflateByWikidata(ftns,dbg) {
* @param {[number]} matched_idx_osm - List of matched OSM IDs
* @param {[number]} matched_idx_wd - List of matched wikidata IDs
*/
function cleanFountainCollections(ftns, matched_idx_osm, matched_idx_wd) {
function cleanFountainCollections(ftns, matched_idx_osm, matched_idx_wd, debugAll, dbg) {
if (debugAll) {
l.info('conflate.data.service.js cleanFountainCollections: '+ftns+' ftns '+dbg+' '+new Date().toISOString());
}
matched_idx_osm = _.orderBy(matched_idx_osm);
for (let i = matched_idx_osm.length -1; i >= 0; i--)
ftns.osm.splice(matched_idx_osm[i],1);
Expand All @@ -139,7 +144,10 @@ function cleanFountainCollections(ftns, matched_idx_osm, matched_idx_wd) {
* Find matching fountains based on coordinates alone
* @param {Object} ftns - Object (passed by reference) with two properties: 'osm' is a list of fountains returned from OSM and 'wikidata' is list from wikidata
*/
function conflateByCoordinates(ftns,dbg) {
function conflateByCoordinates(ftns,dbg, debugAll) {
if (debugAll) {
l.info('conflate.data.service.js conflateByCoordinates: '+ftns+' ftns '+dbg+' '+new Date().toISOString());
}
// Holder for conflated fountains
let conflated_fountains = [];
// Temporary holders for matched fountain indexes
Expand Down Expand Up @@ -170,22 +178,25 @@ function conflateByCoordinates(ftns,dbg) {
{
osm: ftns.osm[idx_osm],
wikidata: ftns.wikidata[idx_wd]
}, `merged by location`, dMin));
}, `merged by location`, dMin, debugAll,dbg));
// document the indexes for removal
matched_idx_osm.push(idx_osm);
matched_idx_wd.push(idx_wd);
//todo: if matching is ambiguous, add a note for community
}
}
// remove matched fountains from lists
cleanFountainCollections(ftns, matched_idx_osm, matched_idx_wd);
cleanFountainCollections(ftns, matched_idx_osm, matched_idx_wd, debugAll, dbg);


return conflated_fountains;
}


function mergeFountainProperties(fountains, mergeNotes='', mergeDistance=null){
function mergeFountainProperties(fountains, mergeNotes='', mergeDistance=null, debugAll, dbg){
if (debugAll) {
l.info('conflate.data.service.js mergeFountainProperties: '+fountains+' ftns, '+mergeNotes+' '+dbg+' '+new Date().toISOString());
}
// combines fountain properties from osm and wikidata
// For https://github.com/water-fountains/proximap/issues/160 we keep values from both sources when possible
let mergedProperties = {};
Expand Down
15 changes: 10 additions & 5 deletions server/api/services/generateLocationData.service.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ import {
* @param {string} locationName - the code name of the location for which fountains should be processed
*/
function generateLocationData(locationName){
l.info(`generateLocationData.service.js: processing all fountains from "${locationName}" `+ new Date().toISOString());
const start = new Date();
l.info(`generateLocationData.service.js: processing all fountains from "${locationName}" `+ start.toISOString());
return new Promise((resolve, reject)=>{
// get bounding box of location
if(!locations.hasOwnProperty(locationName)){
Expand All @@ -42,19 +43,23 @@ function generateLocationData(locationName){
.idsByBoundingBox(bbox.latMin, bbox.lngMin, bbox.latMax, bbox.lngMax,locationName)
.then(r=>WikidataService.byIds(r, locationName));

let debugAll = -1 != locationName.indexOf('test');

// conflate
Promise.all([osmPromise, wikidataPromise])
// get any missing wikidata fountains for #212
.then(r=>fillInMissingWikidataFountains(r[0], r[1]))
.then(r => conflate({
osm: r.osm,
wikidata: r.wikidata
},locationName))
.then(r => defaultCollectionEnhancement(r, locationName))
},locationName, debugAll))
.then(r => defaultCollectionEnhancement(r, locationName, debugAll))
.then(r => createUniqueIds(r))
.then(r => {
l.info('generateLocationData.service.js: successfully processed all (size '+r.length+
`) fountains from ${locationName} `);
const end = new Date();
const elapse = (end - start)/1000;
l.info('generateLocationData.service.js: after '+elapse.toFixed(1)+' secs successfully processed all (size '+r.length+
`) fountains from ${locationName} \nstart: `+start.toISOString()+'\nend: '+end.toISOString());
resolve({
type: 'FeatureCollection',
features: r
Expand Down
44 changes: 33 additions & 11 deletions server/api/services/processing.service.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,32 +15,52 @@ import l from '../../common/logger';
import {fountain_property_metadata} from "../../../config/fountain.properties"
import {PROP_STATUS_INFO, PROP_STATUS_OK} from "../../common/constants";

export function defaultCollectionEnhancement(fountainCollection,dbg) {
export function defaultCollectionEnhancement(fountainCollection,dbg, debugAll) {
l.info('processing.service.js defaultCollectionEnhancement: '+dbg+' '+new Date().toISOString());
return new Promise((resolve, reject)=>{
fillImageGalleries(fountainCollection,dbg)
.then(r => fillOutNames(r))
fillImageGalleries(fountainCollection,dbg, debugAll)
.then(r => fillOutNames(r,dbg))
.then(r => fillWikipediaSummaries(r,dbg))
.then(r => fillArtistNames(r,dbg))
.then(r => fillOperatorInfo(r))
.then(r => fillOperatorInfo(r,dbg))
.then(r => resolve(r))
.catch(err=>reject(err))
})
}


export function fillImageGalleries(fountainCollection, city){
export function fillImageGalleries(fountainCollection, city, debugAll){
// takes a collection of fountains and returns the same collection,
// enhanced with image galleries when available or default images

l.info('processing.service.js starting fillImageGalleries: '+city+' debugAll '+debugAll+' '+new Date().toISOString());
return new Promise((resolve, reject) => {
let promises = [];
let i = 0;
let tot = fountainCollection.length;
let step = 1;
if (50 < tot) {
step = 10;
if (300 < tot) {
step = 50;
if (600 < tot) {
step = 100;
if (1000 < tot) {
step = 200;
if (2000 < tot) {
step = 500;
}
}
}
}
}
let dbgAll = debugAll;
_.forEach(fountainCollection, fountain =>{
i=i+1;
let dbg = i+'/'+tot;
promises.push(WikimediaService.fillGallery(fountain, dbg, city));
if (!debugAll) {
dbgAll = 0 ==i % step;
}
const dbg = i+'/'+tot;
promises.push(WikimediaService.fillGallery(fountain, dbg, city, dbgAll));
});

Promise.all(promises)
Expand All @@ -54,7 +74,7 @@ export function fillImageGalleries(fountainCollection, city){
export function fillArtistNames(fountainCollection,dbg){
// takes a collection of fountains and returns the same collection,
// enhanced with artist names if only QID was given
l.info('processing.service.js starting fillArtistNames: '+dbg+' '+new Date().toISOString());
return new Promise((resolve, reject) => {
let promises = [];
_.forEach(fountainCollection, fountain =>{
Expand All @@ -72,7 +92,7 @@ export function fillArtistNames(fountainCollection,dbg){
export function fillOperatorInfo(fountainCollection, dbg){
// takes a collection of fountains and returns the same collection,
// enhanced with operator information if that information is available in Wikidata

l.info('processing.service.js starting fillOperatorInfo: '+dbg+' '+new Date().toISOString());
return new Promise((resolve, reject) => {
let promises = [];
_.forEach(fountainCollection, fountain =>{
Expand All @@ -88,6 +108,7 @@ export function fillOperatorInfo(fountainCollection, dbg){

export function fillWikipediaSummaries(fountainCollection, dbg){
// takes a collection of fountains and returns the same collection, enhanced with wikipedia summaries
l.info('processing.service.js starting fillWikipediaSummaries: '+dbg+' '+new Date().toISOString());
return new Promise((resolve, reject) => {
let promises = [];
// loop through fountains
Expand Down Expand Up @@ -202,8 +223,9 @@ export function essenceOf(fountainCollection) {

}

export function fillOutNames(fountainCollection) {
export function fillOutNames(fountainCollection,dbg) {
// takes a collection of fountains and returns the same collection, with blanks in fountain names filled from other languages or from 'name' property
l.info('processing.service.js starting fillOutNames: '+dbg+' '+new Date().toISOString());
return new Promise((resolve, reject) => {
let langs = ['en','de','fr', 'it', 'tr'];
fountainCollection.forEach(f => {
Expand Down
3 changes: 3 additions & 0 deletions server/api/services/wikidata.service.js
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,11 @@ class WikidataService {
});
// l.debug(url);
// get data
let data = null;
return http.get(url)
// parse into an easier to read format
.then(r=>{
data = r.data;
return wdk.simplify.entity(
r.data.entities[qid],
{
Expand Down Expand Up @@ -199,6 +201,7 @@ class WikidataService {
// report error to log and save to data
l.error(`wikidata.service.ts fillArtistName: Error collecting artist name and url from wikidata: `+dbgHere);
l.info(`stack: ${err.stack}`);
l.info(`url: ${url}\n`);
fountain.properties.artist_name.issues.push({
data: err,
context: {
Expand Down
Loading

0 comments on commit df83f38

Please sign in to comment.