Skip to content

Commit 29a88cc

Browse files
committed
Update wikidata script and bun run wikidata
1 parent d93ac94 commit 29a88cc

File tree

5 files changed

+66
-61
lines changed

5 files changed

+66
-61
lines changed

config/replacements.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@
6464
"note": "Hampshire Fire and Rescue Service (and Isle of Wight Fire and Rescue Service) have merged.",
6565
"wikidata": "Q106317911"
6666
},
67+
"Q5783997": {
68+
"note": "Consejo de Educación Secundaria",
69+
"wikidata": "Q119732446"
70+
},
6771
"Q6084130": {
6872
"note": "Isle of Wight Fire and Rescue Service (and Hampshire Fire and Rescue Service) have merged.",
6973
"wikidata": "Q106317911"
@@ -109,6 +113,10 @@
109113
"note": "Colin's Jeans - https://github.com/osmlab/name-suggestion-index/issues/3154",
110114
"wikidata": "Q18015543"
111115
},
116+
"Q50332297": {
117+
"note": "Junta de Castilla Ministry of Education - Wikidata redirect",
118+
"wikidata": "Q30297363"
119+
},
112120
"Q56599145": {
113121
"note": "Steers - https://github.com/osmlab/name-suggestion-index/issues/3154",
114122
"wikidata": "Q3056765"

data/operators/amenity/school.json

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12302,18 +12302,7 @@
1230212302
"amenity": "school",
1230312303
"operator": "Consejería de Educación de la Junta de Castilla y León",
1230412304
"operator:type": "public",
12305-
"operator:wikidata": "Q50332297"
12306-
}
12307-
},
12308-
{
12309-
"displayName": "Consejo de Educación Secundaria",
12310-
"id": "consejodeeducacionsecundaria-6176b9",
12311-
"locationSet": {"include": ["uy"]},
12312-
"tags": {
12313-
"amenity": "school",
12314-
"operator": "Consejo de Educación Secundaria",
12315-
"operator:type": "government",
12316-
"operator:wikidata": "Q5783997"
12305+
"operator:wikidata": "Q30297363"
1231712306
}
1231812307
},
1231912308
{
@@ -14780,6 +14769,9 @@
1478014769
"displayName": "Dirección General de Educación Secundaria",
1478114770
"id": "direcciongeneraldeeducacionsecundaria-6176b9",
1478214771
"locationSet": {"include": ["uy"]},
14772+
"matchNames": [
14773+
"consejo de educación secundaria"
14774+
],
1478314775
"tags": {
1478414776
"amenity": "school",
1478514777
"operator": "Dirección General de Educación Secundaria",

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@
100100
"postpublish": "bun ./scripts/postpublish.ts",
101101
"start": "bun ./scripts/server.ts",
102102
"test": "bun test --dots --coverage ./test/*.js",
103-
"wikidata": "bun ./scripts/build_wikidata.js"
103+
"wikidata": "bun ./scripts/wikidata.ts"
104104
},
105105
"dependencies": {
106106
"diacritics": "^1.3.0",

scripts/dist.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,19 @@ import { sortObject } from '../lib/sort_object.ts';
1212
// JSON
1313
const packageJSON = await Bun.file('./package.json').json();
1414
const treesJSON = await Bun.file('./config/trees.json').json();
15+
let featureCollectionJSON;
1516
try {
16-
const featureCollectionJSON = await Bun.file('./dist/json/featureCollection.json').json();
17+
featureCollectionJSON = await Bun.file('./dist/json/featureCollection.json').json();
1718
} catch (err) {
1819
console.error(styleText('red', `Error: ${err.message} `));
1920
console.error(styleText('yellow', `Please run 'bun run build' first.`));
2021
process.exit(1);
2122
}
23+
let dissolvedJSON;
24+
let wikidataJSON;
2225
try {
23-
const dissolvedJSON = await Bun.file('./dist/json/dissolved.json').json();
24-
const wikidataJSON = await Bun.file('./dist/json/wikidata.json').json();
26+
dissolvedJSON = await Bun.file('./dist/json/dissolved.json').json();
27+
wikidataJSON = await Bun.file('./dist/json/wikidata.json').json();
2528
} catch (err) {
2629
console.error(styleText('red', `Error: ${err.message} `));
2730
console.error(styleText('yellow', `Please run 'bun run wikidata' first.`));
Lines changed: 47 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
1-
// External
2-
import fs from 'bun:fs';
3-
import http from 'node:http';
4-
import https from 'node:https';
1+
// set to true if you just want to test what the script will do without updating Wikidata
2+
const DRYRUN = false;
3+
4+
import { $ } from 'bun';
55
import { iso1A2Code } from '@rapideditor/country-coder';
6-
import JSON5 from 'json5';
76
import localeCompare from 'locale-compare';
87
import LocationConflation from '@rapideditor/location-conflation';
9-
import shell from 'shelljs';
108
import stringify from '@aitodotai/json-stringify-pretty-compact';
119
import { styleText } from 'bun:util';
1210
import wikibase from 'wikibase-sdk';
@@ -16,37 +14,35 @@ const withLocale = localeCompare('en-US');
1614
// Internal
1715
import { sortObject } from '../lib/sort_object.ts';
1816
import { fileTree } from '../lib/file_tree.ts';
19-
import { writeFileWithMeta } from '../lib/write_file_with_meta.ts';
2017

2118
// JSON
22-
const packageJSON = JSON5.parse(fs.readFileSync('package.json', 'utf8'));
23-
const treesJSON = JSON5.parse(fs.readFileSync('config/trees.json', 'utf8'));
19+
const packageJSON = await Bun.file('./package.json').json();
20+
const treesJSON = await Bun.file('./config/trees.json').json();
2421
const trees = treesJSON.trees;
2522

2623
// We use LocationConflation for validating and processing the locationSets
27-
const featureCollectionJSON = JSON5.parse(fs.readFileSync('dist/featureCollection.json', 'utf8'));
28-
const loco = new LocationConflation(featureCollectionJSON);
24+
let featureCollectionJSON;
25+
try {
26+
featureCollectionJSON = await Bun.file('./dist/json/featureCollection.json').json();
27+
} catch (err) {
28+
console.error(styleText('red', `Error: ${err.message} `));
29+
console.error(styleText('yellow', `Please run 'bun run build' first.`));
30+
process.exit(1);
31+
}
32+
const _loco = new LocationConflation(featureCollectionJSON);
2933

3034
const wbk = wikibase({
3135
instance: 'https://www.wikidata.org',
3236
sparqlEndpoint: 'https://query.wikidata.org/sparql'
3337
});
3438

35-
// set keepalive for all the connections - see #4948
36-
const httpAgent = new http.Agent({ keepAlive: true });
37-
const httpsAgent = new https.Agent({ keepAlive: true });
38-
const fetchOptions = {
39-
agent: (url) => ((url.protocol === 'http:') ? httpAgent : httpsAgent)
40-
};
41-
const fetchOptionsQuery = {
42-
agent: fetchOptions.agent,
43-
method: 'GET',
44-
headers: new Headers( {'User-Agent': 'name-suggestion-index/6.0 (https://github.com/osmlab/name-suggestion-index)'} )
45-
};
4639

40+
$.nothrow(); // If a shell command returns nonzero, keep going.
41+
// Start fresh
42+
await $`rm -f ./dist/json/dissolved.*`;
43+
await $`rm -f ./dist/json/warnings.*`;
44+
await $`rm -f ./dist/json/wikidata.*`;
4745

48-
// set to true if you just want to test what the script will do without updating Wikidata
49-
const DRYRUN = false;
5046

5147
console.log(styleText('blue', '-'.repeat(70)));
5248
console.log(styleText('blue', '📓 Build Wikidata cache'));
@@ -70,13 +66,10 @@ console.log(styleText('blue', '-'.repeat(70)));
7066
// }
7167
// }
7268
// }
73-
shell.config.silent = true;
74-
shell.mv('-f', './config/secrets.json', './secrets.json');
75-
shell.config.reset();
7669

7770
let _secrets;
7871
try {
79-
_secrets = JSON5.parse(fs.readFileSync('./secrets.json', 'utf8'));
72+
_secrets = await Bun.file('./secrets.json').json();
8073
} catch (err) { /* ignore */ }
8174

8275
if (_secrets && !_secrets.wikibase) {
@@ -112,26 +105,30 @@ if (_secrets && _secrets.wikibase) {
112105

113106

114107
// what to fetch
115-
let _cache = {};
116-
console.log('');
117-
console.log('🏗 ' + styleText('yellow', `Loading index files (this might take over a minute, maybe more) ...`));
118-
fileTree.read(_cache, loco);
119-
fileTree.expandTemplates(_cache, loco);
108+
const START = '🏗 ' + styleText('yellow', `Loading index files…`);
109+
const END = '👍 ' + styleText('green', `done loading`);
110+
console.log(START);
111+
console.time(END);
112+
113+
const _nsi = {};
114+
await fileTree.read(_nsi, _loco);
115+
fileTree.expandTemplates(_nsi, _loco);
116+
console.timeEnd(END);
120117

121118

122119
// Gather all QIDs referenced by any tag..
123120
console.log('');
124-
console.log('🏗 ' + styleText('yellow', `Syncing Wikidata with name-suggestion-index ...`));
125-
console.log(' ... this is done in batches, and may take around 10 minutes ...');
121+
console.log('🏗 ' + styleText('yellow', `Syncing Wikidata with name-suggestion-index`));
122+
console.log(' This is done in batches, and may take around 10 minutes');
126123
let _wikidata = {};
127124
let _qidItems = {}; // any item referenced by a qid
128125
let _qidIdItems = {}; // items where we actually want to update the NSI-identifier on wikidata
129126
let _qidMetadata = {};
130-
Object.keys(_cache.path).forEach(tkv => {
127+
Object.keys(_nsi.path).forEach(tkv => {
131128
const parts = tkv.split('/', 3); // tkv = "tree/key/value"
132129
const t = parts[0];
133130

134-
const items = _cache.path[tkv].items;
131+
const items = _nsi.path[tkv].items;
135132
if (!Array.isArray(items) || !items.length) return;
136133

137134
items.forEach(item => {
@@ -203,7 +200,7 @@ function doFetch(index) {
203200
let backoff = false;
204201
console.log(styleText(['yellow','bold'], `\nBatch ${index+1}/${_urls.length}`));
205202

206-
return fetch(currURL, fetchOptions)
203+
return fetch(currURL)
207204
.then(response => {
208205
if (!response.ok) throw new Error(response.status + ' ' + response.statusText);
209206
return response.json();
@@ -641,7 +638,7 @@ function getClaimValues(entity, prop, includeDeprecated) {
641638
// - `wikidata.json`
642639
// - `dissolved.json`
643640
//
644-
function finish() {
641+
async function finish() {
645642
const START = '🏗 ' + styleText('yellow', 'Writing output files');
646643
const END = '👍 ' + styleText('green', 'output files updated');
647644
console.log('');
@@ -677,9 +674,9 @@ function finish() {
677674

678675
// Set `DRYRUN=true` at the beginning of this script to prevent actual file writes from happening.
679676
if (!DRYRUN) {
680-
writeFileWithMeta('dist/warnings.json', stringify({ warnings: _warnings }) + '\n');
681-
writeFileWithMeta('dist/wikidata.json', stringify({ wikidata: sortObject(_wikidata) }) + '\n');
682-
writeFileWithMeta('dist/dissolved.json', stringify({ dissolved: sortObject(dissolved) }, { maxLength: 100 }) + '\n');
677+
await Bun.write('./dist/json/warnings.json', stringify({ warnings: _warnings }) + '\n');
678+
await Bun.write('./dist/json/wikidata.json', stringify({ wikidata: sortObject(_wikidata) }) + '\n');
679+
await Bun.write('./dist/json/dissolved.json', stringify({ dissolved: sortObject(dissolved) }, { maxLength: 100 }) + '\n');
683680
}
684681

685682
console.timeEnd(END);
@@ -703,7 +700,7 @@ function fetchFacebookLogo(qid, username, restriction) {
703700
if (m) userid = m[1];
704701

705702
// Can specify no redirect to fetch json and speed up this process
706-
return fetch(`${logoURL}&redirect=0`, fetchOptions)
703+
return fetch(`${logoURL}&redirect=0`)
707704
.then(response => response.json())
708705
.then(json => {
709706
if (!json) return true;
@@ -766,7 +763,12 @@ function removeOldNsiClaims() {
766763
?guid ps:P8253 ?nsiId.
767764
}`;
768765

769-
return fetch(wbk.sparqlQuery(query), fetchOptionsQuery)
766+
const opts = {
767+
method: 'GET',
768+
headers: new Headers( {'User-Agent': 'name-suggestion-index/6.0 (https://github.com/osmlab/name-suggestion-index)'} )
769+
};
770+
771+
return fetch(wbk.sparqlQuery(query), opts)
770772
.then(response => {
771773
if (!response.ok) throw new Error(response.status + ' ' + response.statusText);
772774
return response.json();
@@ -842,7 +844,7 @@ function enLabelForQID(qid) {
842844
const meta = _qidMetadata[qid];
843845
const ids = Array.from(_qidItems[qid]);
844846
for (let i = 0; i < ids.length; i++) {
845-
const item = _cache.id.get(ids[i]);
847+
const item = _nsi.id.get(ids[i]);
846848

847849
if (meta.what === 'flag') {
848850
if (looksLatin(item.tags.subject)) return `flag of ${item.tags.subject}`;

0 commit comments

Comments
 (0)