Skip to content

Commit 147ed13

Browse files
committed
switched algorithm to use data-field-name attribute on dragged td items
1 parent 1405828 commit 147ed13

2 files changed

Lines changed: 121 additions & 88 deletions

File tree

docs/README_user_manual.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,16 @@ Select the profile in the dropdown, then click **Delete profile**. The profile i
243243

244244
Click **Reset** to discard any drag-drop changes made in the current session and restore the table to its auto-detected state (as it appeared when the file was first loaded).
245245

246+
### Limitation — JSON files must have consistent field names across all records
247+
248+
When loading a JSON data file, DataHarmonizer builds the column header by scanning every record (row) in each table and collecting the union of all field names encountered. This works correctly when every record in a given class/table uses the same set of field names.
249+
250+
However, if some records contain a typo or variant spelling of a field name — for example because records were merged in from another source — the union will contain **both** the correct name and the mistyped one. The correctly named field will match the schema and its data will load normally. The mistyped name will appear in the Field Mapper report as an unmatched data column (visible when the **Concise view** checkbox is off).
251+
252+
This situation cannot be resolved through the Field Mapper: two distinct data columns cannot both be mapped to the same schema field, and even if they could, the per-record ambiguity (which name applies to which record) has no clean resolution at load time.
253+
254+
**Remedy:** fix the JSON file directly so that every record of a given class uses identical field names before loading it into DataHarmonizer.
255+
246256
---
247257

248258
## Clearing Data

lib/FieldMapper.js

Lines changed: 111 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,22 @@
2626
* Incomming data file might not have section headers, so don't make
2727
* code dependent on them.
2828
*
29+
* Assumption — JSON field name consistency across records:
30+
* When building the data header from a JSON file, all records (rows) of a
31+
* given class/table are scanned and their field names are unioned together.
32+
* This works correctly when all records use the same field names. However,
33+
* if some records have a typo in a field name (e.g. "sample_collector_sample_idz"
34+
* instead of "sample_collector_sample_id"), perhaps introduced by merging records
35+
* from another data file, the union of field names will contain BOTH the correct
36+
* and the mistyped name. The correct name will match the schema and be loaded;
37+
* the mistyped name will appear in the FieldMapper report as an unmatched data
38+
* field (visible when the "concise" checkbox is off). There is no practical UI
39+
* mechanism to resolve this: two distinct data fields cannot be merged into one
40+
* schema slot, and even if a user could specify such a mapping the per-record
41+
* ambiguity (correct name in some records, typo in others) has no clean
42+
* resolution at load time. The recommended remedy is to fix the source JSON
43+
* file so that all records of a class share identical field names.
44+
*
2945
* localStorage "DataHarmonizer" stores all objects via a YAML string.
3046
* - DataHarmonizer.schema = {} object holding schema name keys. Current
3147
* loaded schema name key will be added if working on field-mappings.
@@ -42,7 +58,8 @@ import $ from 'jquery';
4258
import {readBrowserDHSettings, saveBrowserDHSettings, clearDH} from './Toolbar';
4359
import YAML from 'yaml';
4460
import { updateSheetRange } from '../lib/utils/files';
45-
import { dataObjectToArray } from '../lib/utils/fields';
61+
// dataObjectToArray no longer used (JSON rows pre-converted to arrays in openJSONDataFile)
62+
// import { dataObjectToArray } from '../lib/utils/fields';
4663
import { utils as XlsxUtils } from 'xlsx/xlsx.js';
4764

4865
// A call like fm = new FieldMapper().bind(this), provides caller environment
@@ -184,7 +201,7 @@ export class FieldMapper {
184201
// as possible
185202
dh.slot_names.forEach((slot_name, index) => {
186203
let value = null;
187-
if (index in slot_to_data_col) {
204+
if (index in slot_to_data_col && slot_to_data_col[index] >= 0) {
188205
value = row[slot_to_data_col[index]];
189206
}
190207
new_row.push(value);
@@ -196,24 +213,14 @@ export class FieldMapper {
196213
// Overwrite any (empty) fields with user-defined column mapping.
197214
Object.entries(map_obj?.map || {}).forEach(([ptr, mapping]) => {
198215
const col_from = data_field_to_col[mapping.from];
199-
const col_to = dh.slot_title_to_column[mapping.to];
216+
const col_to = dh.slot_name_to_column[mapping.to];
200217
new_row[col_to] = row[col_from];
201218
});
202219
}
203220

204-
// if JSON, then examine some slot's datatype fields and overwrite with
205-
// new converted values.
206-
// Determine if this is still needed.
207-
if (this.file.ext === 'json') {
208-
new_table[row_ptr] = dataObjectToArray(new_row, dh, {
209-
serializedDateFormat: this.dateExportBehavior,
210-
dateFormat: this.dateFormat,// Probably NULL!
211-
datetimeFormat: this.datetimeFormat, // Probably NULL!
212-
timeFormat: this.timeFormat,// Probably NULL!
213-
});
214-
}
215-
else
216-
new_table.push(new_row);
221+
// JSON rows are now pre-converted to value arrays in openJSONDataFile,
222+
// so push new_row the same way as tabular data.
223+
new_table.push(new_row);
217224

218225
})
219226

@@ -244,7 +251,7 @@ export class FieldMapper {
244251
* @param {Integer} header_row which is 1 more than actual row (natural number).
245252
* @param {String} file_name name of data file user selected for loading.
246253
*/
247-
appendFieldMappingModal(dh) {
254+
appendFieldMappingModal(dh, append_html = true) {
248255

249256
const data_fields = this.data[dh.template.name].header;
250257

@@ -258,8 +265,8 @@ export class FieldMapper {
258265

259266
// FUTURE: PROTECT AGAINST DUPLICATE FIELD NAMES IN DATA FILE.
260267
// Preliminary scan for all matches via ordered slot_names array
261-
let slot_matches = new Array(dh.slot_names.length).fill(false);
262-
let data_matches = new Array(data_fields.length).fill(false);
268+
let slot_matches = new Array(dh.slot_names.length).fill(-1);
269+
let data_matches = new Array(data_fields.length).fill(-1);
263270
//let found_by_title = false;
264271
dh.slot_names.forEach((slot_name, index) => {
265272
if (slot_name in data_field_to_col) { // JSON data matches on slot.name
@@ -284,7 +291,10 @@ export class FieldMapper {
284291
this.data[dh.template.name].slot_to_data_col_matches = slot_matches;
285292
//this.data[dh.template.name].data_matches = data_matches;
286293

287-
// Display template/tab/class (i.e. this call to extend content is
294+
// When called for a perfectly-matched file, skip HTML generation.
295+
if (!append_html) return;
296+
297+
// Display template/tab/class (i.e. this call to extend content is
288298
// dedicated to that content.
289299
let html = `
290300
<tbody class="field-mapping-template">
@@ -307,18 +317,38 @@ export class FieldMapper {
307317
</tr>
308318
`;
309319

310-
let old_match_data_row = null;
320+
// last_shown_data_row tracks the last data file column index displayed.
321+
// Starting at -1 so the first pre-match check begins at data column 0.
322+
let last_shown_data_row = -1;
323+
311324
Object.entries(section.children).forEach(([section_slot_row, slot]) => {
312325

313-
if (slot_matches[slot_row] !== false && slot_matches[slot_row] >= 0) {
326+
if (slot_matches[slot_row] >= 0) {
314327
const data_row = slot_matches[slot_row];
315-
old_match_data_row = data_row;
316-
// HERE slots are always displayed by their title (for multilingual sanity?)
317-
const ordering = (slot_row != data_row) ? `<span class="reordered">${data_row}</span>` : data_row
328+
329+
// Show any unmatched data fields that appear BEFORE this slot's
330+
// match position. This ensures data columns skipped before the
331+
// first match (or between out-of-order matches) are not lost.
332+
for (let dr = last_shown_data_row + 1; dr < data_row; dr++) {
333+
if (data_matches[dr] < 0 && !done_data_row[dr]) {
334+
done_data_row[dr] = true;
335+
html += `
336+
<tr class="field-mismatch">
337+
<td>&nbsp;</td>
338+
<td class="draggable-mapping-item field-mismatch" data-field-name="${data_fields[dr]}">${dr}) ${data_fields[dr]}</td>
339+
</tr>`;
340+
last_shown_data_row = dr;
341+
}
342+
}
343+
344+
// Show the matched slot row. data-slot-name and data-field-name
345+
// attributes are used by getProfileMapping() instead of text parsing.
346+
last_shown_data_row = data_row;
347+
const ordering = (slot_row != data_row) ? `<span class="reordered">${data_row}</span>` : data_row;
318348
html += `
319349
<tr class="field-match">
320-
<td>${slot_row}) ${slot.title}</td>
321-
<td class="field-match">${ordering}) ${data_fields[data_row]}</td>
350+
<td data-slot-name="${slot.name}">${slot_row}) ${slot.title}</td>
351+
<td class="field-match" data-field-name="${data_fields[data_row]}">${ordering}) ${data_fields[data_row]}</td>
322352
</tr>`;
323353

324354
}
@@ -327,33 +357,24 @@ export class FieldMapper {
327357
// Do slot side's mismatched item.
328358
html += `
329359
<tr class="field-mismatch">
330-
<td class="field-mismatch">${slot_row}) ${slot.title}</td>
331-
<td class="draggable-mapping-item field-mismatch"></td>
360+
<td class="field-mismatch" data-slot-name="${slot.name}">${slot_row}) ${slot.title}</td>
361+
<td class="draggable-mapping-item field-mismatch" data-field-name=""></td>
332362
</tr>`;
333-
}
334363

335-
// If we don't have a match, then we have to ensure all the
336-
// mismatched field headers for both tables are provided until
337-
// the next match. (If the data table had columns whose order is
338-
// completely different, then semantically the template section
339-
// slots will have unrelated fields near them, c'est la vie.)
340-
341-
// Possibility that next data row(s) are not a match so squeeze them
342-
// in here.
343-
// Add each data_matches' entry between last match and next one.
344-
let data_row = (old_match_data_row || -1) + 1;
345-
while (data_row < data_matches.length
346-
&& (data_matches[data_row] === false
347-
&& ! done_data_row[data_row])
348-
){
349-
done_data_row[data_row] = true; // Best way to handle this case?
350-
const ordering = slot_row != data_row ? 'reordered' : ''
351-
html += `
352-
<tr class="field-mismatch">
353-
<td>&nbsp;</td>
354-
<td class="draggable-mapping-item field-mismatch">${data_row}) ${data_fields[data_row]}</td>
355-
</tr>`;
356-
data_row += 1;
364+
// Show unmatched data fields after last shown, up to the next match.
365+
let dr = last_shown_data_row + 1;
366+
while (dr < data_matches.length
367+
&& (data_matches[dr] < 0 && !done_data_row[dr])
368+
) {
369+
done_data_row[dr] = true;
370+
html += `
371+
<tr class="field-mismatch">
372+
<td>&nbsp;</td>
373+
<td class="draggable-mapping-item field-mismatch" data-field-name="${data_fields[dr]}">${dr}) ${data_fields[dr]}</td>
374+
</tr>`;
375+
last_shown_data_row = dr;
376+
dr++;
377+
}
357378
}
358379

359380
slot_row += 1;
@@ -423,43 +444,36 @@ export class FieldMapper {
423444
*/
424445
getProfileMapping() {
425446

426-
function get_label (nmstr) {
427-
const i = nmstr.indexOf(' ')+1;
428-
return (i ? nmstr.slice(i) : '')
429-
};
430-
// In each tbody[data-table] section that has a template_name, look for
431-
// any tr which has a "field-mismatch" attribute. That tr first td will
432-
// have slotname of schema/template.
433-
// Each
447+
// Read slot name and data field name from data attributes rather than
448+
// parsing the visible "N) fieldname" text, which was fragile.
434449
const schema = this.context.getSchemaRef();
435450
let mapping = {
436451
schema_version: schema.version,
437-
tables: {}
452+
tables: {}
438453
};
439454

440455
$('table#field-mapping-table tbody[data-table]').each((t_index, tbody) => {
441456

442457
// A given schema class often has multiple tbody, each for a data-table section
443458
const table_name = $(tbody).attr('data-table');
444459

445-
mapping.tables[table_name]??= {}; // init empty value if it doesn't exist.
446-
$(tbody).find('td:first-child.field-mismatch').each((index, slot_field) => {
447-
// Retrieve labels, get past column id) prefix;
448-
const data_label = get_label($(slot_field).next('td').text());
449-
if (data_label.length > 0) {// target has a mapping in it.
460+
mapping.tables[table_name] ??= {};
461+
// Only look at unmatched slot rows (left cell has data-slot-name).
462+
$(tbody).find('td:first-child.field-mismatch[data-slot-name]').each((index, slot_field) => {
463+
const data_field_name = $(slot_field).next('td').attr('data-field-name');
464+
if (data_field_name && data_field_name.length > 0) {
450465
let row_map = {
451-
'to': get_label($(slot_field).text()),
452-
'from': data_label
466+
'to': $(slot_field).attr('data-slot-name'),
467+
'from': data_field_name
453468
};
454469
const table_section = $(tbody).attr('data-section') || '';
455470
if (table_section)
456471
row_map['section'] = table_section;
457472

458-
// Adds table if it isn't present.
459-
mapping.tables[table_name].map??= []; // establishes array if not there.
473+
mapping.tables[table_name].map ??= [];
460474
mapping.tables[table_name].map.push(row_map);
461475
}
462-
})
476+
});
463477
// The .map attribute exists only if 1+ mappings.
464478

465479
});
@@ -544,41 +558,50 @@ export class FieldMapper {
544558
//activeClass: "ui-state-active",
545559
hoverClass: "ui-state-hover",
546560
drop: function(event, ui) {
561+
// Swap visible text labels.
547562
const source_text = ui.draggable[0].innerText;
548563
ui.draggable[0].innerText = event.target.innerText;
549564
event.target.innerText = source_text;
565+
// Swap data-field-name attributes so getProfileMapping() reads
566+
// the correct field names without text parsing.
567+
const src_field = $(ui.draggable[0]).attr('data-field-name');
568+
const tgt_field = $(event.target).attr('data-field-name');
569+
$(ui.draggable[0]).attr('data-field-name', tgt_field ?? '');
570+
$(event.target).attr('data-field-name', src_field ?? '');
550571
$(this).css("background-color", "lightskyblue");
551-
ui.draggable.css("background-color", "lightblue");
572+
ui.draggable.css("background-color", "lightblue");
552573
}
553574
});
554575
}
555576

556-
// User has dragged one field down to the row of another within a table.
557-
// Switch the labels of the selected fields.
558-
// WARNING: THIS TEXT MATCHING ALGORITHM IS VERY SENSITIVE TO spaces etc.
559-
// in field label HTML display
577+
// Apply a saved mapping profile to the current field-mapping display.
578+
// Uses data-slot-name and data-field-name attributes for reliable matching.
560579
applyFieldMapping(profile_name) {
561580
const [dh_settings, profile] = this.getProfile(profile_name);
562581

563582
Object.entries(profile.tables || {}).forEach(([table_name, table_obj]) => {
564583

565-
const mismatched_rows = $(`table#field-mapping-table tbody[data-table="${table_name}"] > tr.field-mismatch`);
566584
// Doing table by table; otherwise identical field names in different
567585
// tables lead to garbled rule implementation.
568-
569586
Object.entries(table_obj?.map || {}).forEach(([index, mapping]) => {
570-
// Find mapping.from text. mapping.from td will initially have empty
571-
// data field td. First fetch the 2nd data file field value
572-
const schema_slot_td = $(mismatched_rows).find('td:first-child')
573-
.filter(function() {return $(this).text().endsWith(') ' + mapping.to)});
574-
const schema_data_td = $(mismatched_rows).find('td:eq(1)')
575-
.filter(function() {return $(this).text().endsWith(') ' + mapping.from)});
576-
577-
// Now do the switch of values as mapping dictates:
587+
// Find the unmatched slot TD by data-slot-name attribute.
588+
const schema_slot_td = $(
589+
`table#field-mapping-table tbody[data-table="${table_name}"] > tr.field-mismatch > td:first-child[data-slot-name="${mapping.to}"]`
590+
);
591+
// Find the data field TD by data-field-name attribute.
592+
const schema_data_td = $(
593+
`table#field-mapping-table tbody[data-table="${table_name}"] > tr.field-mismatch > td.draggable-mapping-item[data-field-name="${mapping.from}"]`
594+
);
595+
596+
// Move data-field-name from the source data TD to the slot's right TD.
597+
const src_field = $(schema_data_td).attr('data-field-name');
598+
$(schema_data_td).attr('data-field-name', '');
599+
$(schema_slot_td).next('td').attr('data-field-name', src_field ?? '');
600+
601+
// Move the visible text label as well.
578602
const source_data_td_text = $(schema_data_td).text();
579-
$(schema_data_td).text(''); // Clear out old data td side.
603+
$(schema_data_td).text('');
580604
$(schema_slot_td).next('td').text(source_data_td_text);
581-
582605
});
583606
});
584607
}

0 commit comments

Comments
 (0)