-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpopulate_category.php
executable file
·84 lines (78 loc) · 2.51 KB
/
populate_category.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#! /usr/bin/env php
<?php
// read category dump file
// populate tables:
// person
// ensemble
// nationality
// period
require_once("parse_category.inc");
require_once("populate_util.inc");
function main($file, $nlines) {
$f = fopen($file, 'r');
for ($i=0; $i<$nlines; $i++) {
echo "JSON record $i\n";
$x = fgets($f);
if (!$x) {
echo "end of file\n";
break;
}
if (!trim($x)) continue;
$y = json_decode($x);
if (!$y) {
echo "bad JSON: $x\n";
continue;
}
DB::begin_transaction();
foreach ($y as $title => $body) {
//echo "$title\n";
if (strpos($title, 'Category:') !== 0) continue;
$p = parse_person($title, $body);
if (!$p) continue;
$type = empty($p->type)?'':strtolower($p->type);
if ($type === 'other') {
// usually a category, e.g. German Folk Songs
// or random other stuff, like RISM
continue;
}
// ensembles can be either #fte:performer or #fte:person
//
if ($type == 'organization') {
if (empty($p->instrument)) {
// TODO: in some cases instrument is missing
// but could be inferred from name
continue;
}
$p->instrument = strtolower($p->instrument);
if (starts_with($p->instrument, 'opera company')) {
$p->instrument = 'opera company';
}
echo "got organization $p->instrument\n";
switch ($p->instrument) {
case 'band':
case 'chamber ensemble':
case 'choir':
case 'chorus':
case 'early music ensemble':
case 'ensemble':
case 'mixed chorus':
case 'opera company':
case 'orchestra':
case 'period-instrument ensemble':
case 'string quartet':
case 'vocal ensemble':
make_ensemble($p);
break;
default:
echo "unrecognized organization type: $p->instrument\n";
}
} else {
make_person($p);
}
}
DB::commit_transaction();
}
}
// there are 480 lines
main('data/david_category_template_dump.txt', 1000);
?>