-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathphyx2owl.js
More file actions
executable file
·166 lines (152 loc) · 5.63 KB
/
phyx2owl.js
File metadata and controls
executable file
·166 lines (152 loc) · 5.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/env node
const fs = require('node:fs');
const path = require('node:path');
const phyx = require('..');
/*
* An application for converting input Phyx files to OWL ontologies in N-Quads.
*/
// Read command line arguments.
const argv = require('yargs')
.usage('$0 [files or directories to convert into OWL ontologies]')
.describe(
'max-internal-specifiers',
'The maximum number of internal specifiers (phylorefs with more than this number will be ignored)',
)
.default('max-internal-specifiers', 8)
.describe(
'max-external-specifiers',
'The maximum number of external specifiers (phylorefs with more than this number will be ignored)',
)
.default('max-external-specifiers', 8)
.describe('base-iri', 'The base IRI to use for the input files')
.help()
.alias('h', 'help').argv;
/*
* Get a list of all files in a directory. We will recurse into directories and choose
* files that meet the criteria in the function `check(filename) => boolean`.
*/
function getFilesInDir(
filePath,
check = filename => filename.toLowerCase().endsWith('.json'),
) {
// console.debug(`Processing file: ${filePath}`)
if (!fs.existsSync(filePath)) return [];
const lsync = fs.lstatSync(filePath);
if (lsync.isFile()) {
// If `path` is a file, check if it meets the provided requirement. If so,
// add it to the list of collected files.
if (!check(filePath)) {
// console.debug(`Skipping ${filePath}.`)
return [];
} else {
return [filePath];
}
} else if (lsync.isDirectory()) {
// If `path` is a directory, recurse into every file in that directory.
const files = fs.readdirSync(filePath);
return files
.map(file => getFilesInDir(path.join(filePath, file), check))
.reduce((acc, curr) => acc.concat(curr), []);
} else {
// console.debug(`${filePath} is neither a file nor a directory; skipping.`);
return [];
}
}
// Get a list of all the files requested for processing on the command line.
// At this point, we convert directories into lists of files.
const filenames = argv._;
if (filenames.length === 0) {
console.error('No input files provided.');
process.exit(1);
}
const files = filenames
.map(filename => getFilesInDir(filename))
.reduce((acc, curr) => acc.concat(curr), []);
// console.debug(`Files to process: ${files.join(", ")}`);
if (files.length === 0) {
console.error(
`Input files do not exist or consist of directories that do not contain JSON files: ${filenames.join(', ')}`,
);
process.exit(1);
}
/*
* Convert the input file into the output filename.
* If no argOutputFilename is given, we generate one from the input
* filename: either by replacing '.json' with '.owl', or by concatenating
* '.owl' at the end.
*/
function convertFileToOWL(filename, argOutputFilename = '') {
// console.debug(`Starting with ${filename}.`);
let outputFilename;
if (argOutputFilename !== '') {
outputFilename = argOutputFilename;
} else if (filename.toLowerCase().endsWith('.json')) {
outputFilename = `${filename.substring(0, filename.length - 5)}.owl`;
} else {
outputFilename = `${filename}.owl`;
}
try {
// Parse the input file into JSON.
const phyxContent = JSON.parse(fs.readFileSync(filename));
// Remove any phylorefs that have too many specifiers.
const phylorefCount = (phyxContent.phylorefs || []).length;
filteredPhylorefs = (phyxContent.phylorefs || []).filter(phyloref => {
const wrappedPhyloref = new phyx.PhylorefWrapper(phyloref);
const internalSpecifiersCount = wrappedPhyloref.internalSpecifiers.length;
const externalSpecifiersCount = wrappedPhyloref.externalSpecifiers.length;
if (internalSpecifiersCount > argv.maxInternalSpecifiers) {
console.warn(
`Phyloreference ${wrappedPhyloref.label} was skipped, since it has ${internalSpecifiersCount} internal specifiers.`,
);
return false;
} else if (externalSpecifiersCount > argv.maxExternalSpecifiers) {
console.warn(
`Phyloreference ${wrappedPhyloref.label} was skipped, since it has ${externalSpecifiersCount} external specifiers.`,
);
return false;
}
return true;
});
phyxContent.phylorefs = filteredPhylorefs;
// Convert the Phyx file into JSON-LD.
const wrappedPhyx = new phyx.PhyxWrapper(phyxContent);
wrappedPhyx
.toRDF(argv.baseIri, path.dirname(filename))
.then(nquads => {
fs.writeFileSync(outputFilename, nquads);
})
.catch(err => {
throw err;
});
// Report on whether any phyloreferences were converted.
if (filteredPhylorefs.length === 0) {
console.warn(
`No phyloreferences in ${filename} were converted to ${outputFilename}, as they were all filtered out.`,
);
return false;
} else if (phylorefCount > filteredPhylorefs.length) {
console.warn(
`Only ${filteredPhylorefs.length} out of ${phylorefCount} were converted from ${filename} to ${outputFilename}.`,
);
return true;
} else {
console.info(`Converted ${filename} to ${outputFilename}.`);
return true;
}
} catch (e) {
console.error(
`Could not convert ${filename} to ${outputFilename}: ${e} at ${e.stack}`,
);
console.error(``);
}
return false;
}
// Count and report all the successes in converting files to OWL.
const successes = files.map(file => convertFileToOWL(file));
if (successes.every(x => x)) {
console.log(`${successes.length} files converted successfully.`);
} else {
console.log(
`Errors occurred; ${successes.filter(x => x).length} files converted successfully, ${successes.filter(x => !x).length} files failed.`,
);
}