forked from w3c/webref
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprepare-curated.js
150 lines (135 loc) · 4.57 KB
/
prepare-curated.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/**
* Prepare the curated data and save the result to the given folder.
*
* Curation means copying raw data to the given folder, applying patches (CSS,
* elements, events, IDL) when needed and running post-processing modules that
* need to run on curated data to generate the `idlparsed`, `idlnames` and
* `idlnamesparsed` folders, and the merged `events.json` file
*
* The output folder gets created if it does not exist yet. Output folder
* contents get deleted to start with if folder is not empty.
*
* The `csscomplete` post-processing module is supposed to have been run when
* specs were crawled. We don't run it after curation because it could
* re-introduce some of the CSS properties defined in prose that the
* `dropCSSPropertyDuplicates` function removes during curation.
*
* node tools/prepare-curated.js [raw data folder] [curated folder]
*/
const fs = require('fs').promises;
const path = require('path');
const { rimraf } = require('rimraf');
const { crawlSpecs } = require('reffy');
const {
createFolderIfNeeded,
loadJSON,
copyFolder } = require('./utils');
const { applyPatches } = require('./apply-patches');
const { dropCSSPropertyDuplicates } = require('./drop-css-property-duplicates');
const { curateEvents } = require('./amend-event-data');
/**
* Remove the spec from curation process
*/
async function removeFromCuration(spec, curatedFolder) {
for (const property of ['css', 'elements', 'events', 'idl']) {
if (spec[property] &&
(typeof spec[property] === 'string') &&
spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
const filename = path.join(curatedFolder, spec[property]);
console.log(`Removing ${spec.standing} ${spec.title} from curation: del ${filename}`);
await fs.unlink(filename);
}
}
}
/**
* Remove links from given spec crawl result to extracts that no longer exist
* once patches have been applied.
*
* @function
* @param {Object} spec to parse and clean.
*/
async function cleanCrawlOutcome(spec) {
for (const property of Object.keys(spec)) {
// Only consider properties that link to an extract
if (spec[property] &&
(typeof spec[property] === 'string') &&
spec[property].match(/^[^\/]+\/[^\/]+\.(json|idl)$/)) {
try {
await fs.lstat(path.join(curatedFolder, spec[property]));
}
catch (err) {
delete spec[property];
}
}
}
}
/**
* Copy raw data and apply patches whenever needed.
*
* @function
* @param {String} type Package name. Must match one of the existing folder
* names under "packages" (e.g. "css", "elements", "events", "idl")
*/
async function prepareCurated(rawFolder, curatedFolder) {
console.log('Make sure that curated folder exists and is empty');
await createFolderIfNeeded(curatedFolder);
console.log('- folder exists');
try {
rimraf.sync(`${curatedFolder}/*`, { glob: true });
}
catch {
}
console.log('- folder is empty');
console.log();
console.log('Copy raw data to curated folder');
await crawlSpecs({
useCrawl: rawFolder,
output: curatedFolder,
quiet: true
});
console.log('- done');
console.log();
console.log('Apply patches');
await applyPatches(rawFolder, curatedFolder, 'all');
await curateEvents(curatedFolder);
console.log('- patches applied');
console.log();
console.log('Adjust curated data and crawl index');
const crawlIndexFile = path.join(curatedFolder, 'index.json');
const crawlIndex = await loadJSON(crawlIndexFile);
for (const spec of crawlIndex.results) {
if (spec.standing !== 'good') {
await removeFromCuration(spec, curatedFolder);
}
await cleanCrawlOutcome(spec);
}
await fs.writeFile(crawlIndexFile, JSON.stringify(crawlIndex, null, 2));
console.log('- done');
console.log();
console.log('Drop duplicate CSS property definitions when possible');
await dropCSSPropertyDuplicates(curatedFolder);
console.log('- done');
console.log();
console.log('Run post-processing modules on curated data');
await crawlSpecs({
useCrawl: curatedFolder,
output: curatedFolder,
post: ['idlparsed', 'idlnames', 'events'],
quiet: true
});
console.log('- done');
}
/*******************************************************************************
Kick things off
*******************************************************************************/
const rawFolder = process.argv[2] ?? "ed";
const curatedFolder = process.argv[3] ?? "curated";
prepareCurated(rawFolder, curatedFolder)
.then(() => {
console.log();
console.log("== The end ==");
})
.catch(err => {
console.error(err);
process.exit(1);
});