-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathadvanced-search.html
More file actions
414 lines (361 loc) · 19.1 KB
/
advanced-search.html
File metadata and controls
414 lines (361 loc) · 19.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
---
layout: default
title: Advanced Search - KG Registry
---
<div class="row">
<h1 class="display-6">
Advanced Search
<br/>
<small class="text-muted">
Search the KG Registry using SQL
</small>
</h1>
</div>
<div class="row mt-4">
<div class="col-12">
<div class="card">
<div class="card-header">
<h5>Query the Knowledge Graph Registry</h5>
</div>
<div class="card-body">
<p class="card-text">
Enter a SQL query to search the KG Registry database. This interface uses DuckDB to run queries directly in your browser.
</p>
<div class="form-group">
<label for="sqlQuery"><strong>SQL Query:</strong></label>
<textarea class="form-control" id="sqlQuery" rows="5" placeholder="SELECT * FROM resources LIMIT 10;">SELECT * FROM resources LIMIT 10;</textarea>
</div>
<div class="mt-3">
<div class="d-flex flex-wrap gap-2">
<button id="runQuery" class="btn btn-primary">Run Query</button>
<button id="resetQuery" class="btn btn-secondary">Reset</button>
</div>
</div>
<div class="mt-3">
<h6>Example Queries:</h6>
<div class="example-query-list">
<button class="btn btn-sm btn-outline-secondary m-1 example-query" data-query="SELECT id, name, category FROM resources ORDER BY name LIMIT 20;">Basic Resource Info</button>
<button class="btn btn-sm btn-outline-secondary m-1 example-query" data-query="SELECT r.id, r.name, COUNT(d.domain) as domain_count FROM resources r JOIN resource_domains d ON r.id = d.resource_id GROUP BY r.id, r.name ORDER BY domain_count DESC LIMIT 10;">Resources by Domain Count</button>
<button class="btn btn-sm btn-outline-secondary m-1 example-query" data-query="SELECT r.id, r.name, p.product_id, p.product_name FROM resources r JOIN resource_products p ON r.id = p.resource_id LIMIT 20;">Resources with Products</button>
<button class="btn btn-sm btn-outline-secondary m-1 example-query" data-query="SELECT domain, COUNT(*) as count FROM resource_domains GROUP BY domain ORDER BY count DESC;">Most Common Domains</button>
<button class="btn btn-sm btn-outline-secondary m-1 example-query" data-query="SELECT r.id, r.name, t.taxon FROM resources r JOIN resource_taxa t ON r.id = t.resource_id ORDER BY t.taxon LIMIT 20;">Resources with Taxa</button>
<button class="btn btn-sm btn-outline-secondary m-1 example-query" data-query="SELECT r.id, r.name FROM resources r JOIN resource_taxa t ON r.id = t.resource_id WHERE t.taxon = 'NCBITaxon:9606' ORDER BY r.name;">Resources for Human (NCBITaxon:9606)</button>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="row mt-4">
<div class="col-12">
<div class="card">
<div class="card-header">
<div class="d-flex justify-content-between align-items-center flex-wrap gap-2">
<h5>Results</h5>
<button id="downloadResults" class="btn btn-sm btn-success" disabled>Download as CSV</button>
</div>
</div>
<div class="card-body">
<div id="loading" class="text-center d-none">
<div class="spinner-border text-primary" role="status">
<span class="visually-hidden">Loading...</span>
</div>
<p>Running query...</p>
</div>
<div id="results" class="table-responsive">
<p class="text-muted">Run a query to see results</p>
</div>
</div>
</div>
</div>
</div>
<div class="row mt-4 mb-4">
<div class="col-12">
<div class="card">
<div class="card-header">
<h5>Database Schema</h5>
</div>
<div class="card-body">
<p>The KG Registry database contains the following main tables:</p>
<ul>
<li><strong>resources</strong> - Basic information about each knowledge graph resource (id, name, description, etc.)</li>
<li><strong>resource_domains</strong> - Domain classifications for resources</li>
<li><strong>resource_products</strong> - Products associated with each resource</li>
<li><strong>resource_taxa</strong> - Taxa (organisms/species) that each resource is relevant to, using NCBI Taxonomy identifiers</li>
</ul>
<p>For more information about the data schema, visit the <a href="/kg-registry/docs/parquet_backend">Parquet backend documentation</a>.</p>
</div>
</div>
</div>
</div>
<!-- Loading message -->
<div id="duckdb-loading-message" class="alert alert-info mt-3">
<p><strong>Loading DuckDB...</strong> Please wait while the database engine is being initialized.</p>
</div>
<!-- Load required dependencies -->
<script type="module">
// Import DuckDB from CDN as an ES module
import * as duckdbWasm from 'https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@1.29.0/+esm';
// Make it available globally
window.duckdbWasm = duckdbWasm;
// Initialize everything once the document is loaded
document.addEventListener('DOMContentLoaded', function() {
initDuckDBAndSetup();
});
// Function to initialize DuckDB and set up event listeners
async function initDuckDBAndSetup() {
try {
// Disable the query button during initialization
document.getElementById('runQuery').disabled = true;
document.getElementById('results').innerHTML = '<div class="alert alert-info">Initializing DuckDB...</div>';
// Get the available bundles from JSDelivr
const JSDELIVR_BUNDLES = window.duckdbWasm.getJsDelivrBundles();
// Select a bundle based on browser capabilities
const bundle = await window.duckdbWasm.selectBundle(JSDELIVR_BUNDLES);
// Create a URL for the worker
const workerUrl = URL.createObjectURL(
new Blob([`importScripts("${bundle.mainWorker}");`], {
type: "text/javascript",
})
);
// Instantiate the database
const logger = new window.duckdbWasm.ConsoleLogger();
const worker = new Worker(workerUrl);
const db = new window.duckdbWasm.AsyncDuckDB(logger, worker);
await db.instantiate(bundle.mainModule, bundle.pthreadWorker);
URL.revokeObjectURL(workerUrl);
// Create a connection to the database
const conn = await db.connect();
// Load Parquet files
const tables = ['resources', 'resource_domains', 'resource_products', 'resource_taxa'];
const parquetDir = '/kg-registry/registry/parquet';
for (const table of tables) {
const parquetFile = `${parquetDir}/${table}.parquet`;
try {
const resp = await fetch(parquetFile);
if (!resp.ok) {
throw new Error(`Failed to fetch Parquet file: ${resp.status} ${resp.statusText}`);
}
const parquetData = await resp.arrayBuffer();
const parquetFileName = `${table}.parquet`;
await db.registerFileBuffer(parquetFileName, new Uint8Array(parquetData));
await conn.query(`CREATE VIEW ${table} AS SELECT * FROM read_parquet('${parquetFileName}')`);
console.log(`Loaded ${table} Parquet file`);
} catch (error) {
console.warn(`Error loading ${table} Parquet file:`, error);
// Create a fallback empty table if Parquet file couldn't be loaded
if (table === 'resources') {
await conn.query(`
CREATE TABLE resources (id VARCHAR, name VARCHAR, category VARCHAR, description VARCHAR);
INSERT INTO resources VALUES
('example1', 'Example Resource 1', 'DataSource', 'This is a sample resource');
`);
} else if (table === 'resource_domains') {
await conn.query(`
CREATE TABLE resource_domains (resource_id VARCHAR, domain VARCHAR);
INSERT INTO resource_domains VALUES ('example1', 'sample');
`);
} else if (table === 'resource_products') {
await conn.query(`
CREATE TABLE resource_products (
resource_id VARCHAR, product_id VARCHAR, product_name VARCHAR,
product_category VARCHAR, product_description VARCHAR,
product_format VARCHAR, product_url VARCHAR
);
`);
} else if (table === 'resource_taxa') {
await conn.query(`
CREATE TABLE resource_taxa (resource_id VARCHAR, taxon VARCHAR);
INSERT INTO resource_taxa VALUES ('example1', 'NCBITaxon:1');
`);
}
}
}
// Make database available globally
window.duckdb_db = db;
window.duckdb_conn = conn;
// Mark as initialized
window.duckdbInitialized = true;
// Hide loading message and enable query button
document.getElementById('duckdb-loading-message').style.display = 'none';
document.getElementById('results').innerHTML = '<div class="alert alert-success">DuckDB initialized successfully. Ready to run queries.</div>';
document.getElementById('runQuery').disabled = false;
// Set up event listeners
setupEventListeners();
} catch (error) {
console.error('Error initializing DuckDB:', error);
// Prepare error message
let errorMessage = error.message || 'Unknown error';
let solutionHint = '<p>This could be due to:</p>' +
'<ul>' +
'<li>Browser compatibility issues with WebAssembly</li>' +
'<li>Network issues when loading required files</li>' +
'<li>Browser extensions blocking scripts or workers</li>' +
'</ul>' +
'<p>Please try with a different browser (Chrome or Firefox recommended) or disable browser extensions.</p>';
// Special message for CORS errors
if (errorMessage.includes('CORS') || errorMessage.includes('SecurityError')) {
solutionHint = '<p>This appears to be a CORS (Cross-Origin Resource Sharing) issue.</p>' +
'<ul>' +
'<li><strong>If running locally:</strong> Try starting Jekyll with <code>bundle exec jekyll serve --host=0.0.0.0</code></li>' +
'<li><strong>If using Chrome:</strong> Try launching with <code>--allow-file-access-from-files</code> flag</li>' +
'<li><strong>Alternative solution:</strong> Access this page from a proper web server instead of localhost</li>' +
'</ul>';
}
// Display error to user
document.getElementById('duckdb-loading-message').style.display = 'none';
document.getElementById('results').innerHTML =
'<div class="alert alert-danger">' +
'<h5>Failed to initialize DuckDB:</h5>' +
'<p>' + errorMessage + '</p>' +
'<hr>' +
solutionHint +
'</div>';
}
}
// Function to execute SQL query
async function runQuery(sql) {
try {
if (!window.duckdbInitialized || !window.duckdb_conn) {
throw new Error('Database connection not established');
}
document.getElementById('loading').classList.remove('d-none');
document.getElementById('results').innerHTML = '';
document.getElementById('downloadResults').disabled = true;
// Execute the query
const result = await window.duckdb_conn.query(sql);
// Debug information
console.log('Query result:', result);
console.log('Schema:', result.schema);
console.log('Fields:', result.schema.fields);
if (result.toArray().length > 0) {
console.log('Sample row:', result.toArray()[0]);
console.log('Arrow vector type:', typeof result.getChildAt(0));
console.log('Arrow column value type:', typeof result.getChildAt(0).get(0));
}
// Store the results for potential download
window.currentResults = result;
// Display the results
const resultArray = [];
// Arrow format returns a Table object with schema and batches
// Extract field names from schema
const fields = result.schema.fields.map(f => f.name);
console.log('Field names:', fields);
// Get the data in Arrow format
const rowCount = result.numRows;
console.log('Row count:', rowCount);
// Process each row
for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
const obj = {};
// Process each column in the row
for (let colIndex = 0; colIndex < fields.length; colIndex++) {
// Get column vector
const vector = result.getChildAt(colIndex);
// Get value at row index
const value = vector.get(rowIndex);
// Store value with field name
obj[fields[colIndex]] = value;
}
resultArray.push(obj);
}
console.log('Processed rows:', resultArray);
// Create table HTML
let table = '<table class="table table-striped table-hover">';
// Add headers (if there are results)
if (resultArray.length > 0) {
table += '<thead><tr>';
for (const field of fields) {
table += '<th>' + field + '</th>';
}
table += '</tr></thead>';
}
// Add rows
table += '<tbody>';
for (const row of resultArray) {
table += '<tr>';
for (const field of fields) {
const value = row[field];
table += '<td>' + (value !== null && value !== undefined ? value : '<em>null</em>') + '</td>';
}
table += '</tr>';
}
table += '</tbody></table>';
if (resultArray.length === 0) {
document.getElementById('results').innerHTML = '<div class="alert alert-info">Query executed successfully, but no results were returned.</div>';
} else {
document.getElementById('results').innerHTML = table;
document.getElementById('downloadResults').disabled = false;
}
} catch (e) {
console.error('Error running query:', e);
document.getElementById('results').innerHTML = '<div class="alert alert-danger">Error running query: ' + e.message + '</div>';
} finally {
document.getElementById('loading').classList.add('d-none');
}
}
// Function to download results as CSV
function downloadCSV() {
if (!window.currentResults) return;
const result = window.currentResults;
// Create CSV content
let csv = [];
// Get field names from schema
const fields = result.schema.fields.map(f => f.name);
// Add headers
csv.push(fields.join(','));
// Add rows - get values directly from Arrow format
const rowCount = result.numRows;
for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
const rowValues = [];
// Process each column
for (let colIndex = 0; colIndex < fields.length; colIndex++) {
// Get column vector
const vector = result.getChildAt(colIndex);
// Get value at row index
const value = vector.get(rowIndex);
// Format for CSV
rowValues.push(value !== null && value !== undefined
? '"' + String(value).replace(/"/g, '""') + '"'
: '""');
}
csv.push(rowValues.join(','));
}
// Create and download the file
const csvContent = csv.join('\n');
const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
const url = URL.createObjectURL(blob);
const link = document.createElement('a');
link.setAttribute('href', url);
link.setAttribute('download', 'kg_registry_results.csv');
link.style.visibility = 'hidden';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
// Function to set up event listeners
function setupEventListeners() {
document.getElementById('runQuery').addEventListener('click', function() {
const sql = document.getElementById('sqlQuery').value;
runQuery(sql);
});
document.getElementById('resetQuery').addEventListener('click', function() {
document.getElementById('sqlQuery').value = 'SELECT * FROM resources LIMIT 10;';
document.getElementById('results').innerHTML = '<p class="text-muted">Run a query to see results</p>';
document.getElementById('downloadResults').disabled = true;
window.currentResults = null;
});
document.getElementById('downloadResults').addEventListener('click', downloadCSV);
// Example query buttons
document.querySelectorAll('.example-query').forEach(button => {
button.addEventListener('click', function() {
const query = this.getAttribute('data-query');
document.getElementById('sqlQuery').value = query;
runQuery(query);
});
});
}
// Export functions to global scope
window.runQuery = runQuery;
window.downloadCSV = downloadCSV;
window.setupEventListeners = setupEventListeners;
</script>