2626 * Incomming data file might not have section headers, so don't make
2727 * code dependent on them.
2828 *
29+ * Assumption — JSON field name consistency across records:
30+ * When building the data header from a JSON file, all records (rows) of a
31+ * given class/table are scanned and their field names are unioned together.
32+ * This works correctly when all records use the same field names. However,
33+ * if some records have a typo in a field name (e.g. "sample_collector_sample_idz"
34+ * instead of "sample_collector_sample_id"), perhaps introduced by merging records
35+ * from another data file, the union of field names will contain BOTH the correct
36+ * and the mistyped name. The correct name will match the schema and be loaded;
37+ * the mistyped name will appear in the FieldMapper report as an unmatched data
38+ * field (visible when the "concise" checkbox is off). There is no practical UI
39+ * mechanism to resolve this: two distinct data fields cannot be merged into one
40+ * schema slot, and even if a user could specify such a mapping the per-record
41+ * ambiguity (correct name in some records, typo in others) has no clean
42+ * resolution at load time. The recommended remedy is to fix the source JSON
43+ * file so that all records of a class share identical field names.
44+ *
2945 * localStorage "DataHarmonizer" stores all objects via a YAML string.
3046 * - DataHarmonizer.schema = {} object holding schema name keys. Current
3147 * loaded schema name key will be added if working on field-mappings.
@@ -42,7 +58,8 @@ import $ from 'jquery';
4258import { readBrowserDHSettings , saveBrowserDHSettings , clearDH } from './Toolbar' ;
4359import YAML from 'yaml' ;
4460import { updateSheetRange } from '../lib/utils/files' ;
45- import { dataObjectToArray } from '../lib/utils/fields' ;
61+ // dataObjectToArray no longer used (JSON rows pre-converted to arrays in openJSONDataFile)
62+ // import { dataObjectToArray } from '../lib/utils/fields';
4663import { utils as XlsxUtils } from 'xlsx/xlsx.js' ;
4764
4865// A call like fm = new FieldMapper().bind(this), provides caller environment
@@ -184,7 +201,7 @@ export class FieldMapper {
184201 // as possible
185202 dh . slot_names . forEach ( ( slot_name , index ) => {
186203 let value = null ;
187- if ( index in slot_to_data_col ) {
204+ if ( index in slot_to_data_col && slot_to_data_col [ index ] >= 0 ) {
188205 value = row [ slot_to_data_col [ index ] ] ;
189206 }
190207 new_row . push ( value ) ;
@@ -196,24 +213,14 @@ export class FieldMapper {
196213 // Overwrite any (empty) fields with user-defined column mapping.
197214 Object . entries ( map_obj ?. map || { } ) . forEach ( ( [ ptr , mapping ] ) => {
198215 const col_from = data_field_to_col [ mapping . from ] ;
199- const col_to = dh . slot_title_to_column [ mapping . to ] ;
216+ const col_to = dh . slot_name_to_column [ mapping . to ] ;
200217 new_row [ col_to ] = row [ col_from ] ;
201218 } ) ;
202219 }
203220
204- // if JSON, then examine some slot's datatype fields and overwrite with
205- // new converted values.
206- // Determine if this is still needed.
207- if ( this . file . ext === 'json' ) {
208- new_table [ row_ptr ] = dataObjectToArray ( new_row , dh , {
209- serializedDateFormat : this . dateExportBehavior ,
210- dateFormat : this . dateFormat , // Probably NULL!
211- datetimeFormat : this . datetimeFormat , // Probably NULL!
212- timeFormat : this . timeFormat , // Probably NULL!
213- } ) ;
214- }
215- else
216- new_table . push ( new_row ) ;
221+ // JSON rows are now pre-converted to value arrays in openJSONDataFile,
222+ // so push new_row the same way as tabular data.
223+ new_table . push ( new_row ) ;
217224
218225 } )
219226
@@ -244,7 +251,7 @@ export class FieldMapper {
244251 * @param {Integer } header_row which is 1 more than actual row (natural number).
245252 * @param {String } file_name name of data file user selected for loading.
246253 */
247- appendFieldMappingModal ( dh ) {
254+ appendFieldMappingModal ( dh , append_html = true ) {
248255
249256 const data_fields = this . data [ dh . template . name ] . header ;
250257
@@ -258,8 +265,8 @@ export class FieldMapper {
258265
259266 // FUTURE: PROTECT AGAINST DUPLICATE FIELD NAMES IN DATA FILE.
260267 // Preliminary scan for all matches via ordered slot_names array
261- let slot_matches = new Array ( dh . slot_names . length ) . fill ( false ) ;
262- let data_matches = new Array ( data_fields . length ) . fill ( false ) ;
268+ let slot_matches = new Array ( dh . slot_names . length ) . fill ( - 1 ) ;
269+ let data_matches = new Array ( data_fields . length ) . fill ( - 1 ) ;
263270 //let found_by_title = false;
264271 dh . slot_names . forEach ( ( slot_name , index ) => {
265272 if ( slot_name in data_field_to_col ) { // JSON data matches on slot.name
@@ -284,7 +291,10 @@ export class FieldMapper {
284291 this . data [ dh . template . name ] . slot_to_data_col_matches = slot_matches ;
285292 //this.data[dh.template.name].data_matches = data_matches;
286293
287- // Display template/tab/class (i.e. this call to extend content is
294+ // When called for a perfectly-matched file, skip HTML generation.
295+ if ( ! append_html ) return ;
296+
297+ // Display template/tab/class (i.e. this call to extend content is
288298 // dedicated to that content.
289299 let html = `
290300 <tbody class="field-mapping-template">
@@ -307,18 +317,38 @@ export class FieldMapper {
307317 </tr>
308318 ` ;
309319
310- let old_match_data_row = null ;
320+ // last_shown_data_row tracks the last data file column index displayed.
321+ // Starting at -1 so the first pre-match check begins at data column 0.
322+ let last_shown_data_row = - 1 ;
323+
311324 Object . entries ( section . children ) . forEach ( ( [ section_slot_row , slot ] ) => {
312325
313- if ( slot_matches [ slot_row ] !== false && slot_matches [ slot_row ] >= 0 ) {
326+ if ( slot_matches [ slot_row ] >= 0 ) {
314327 const data_row = slot_matches [ slot_row ] ;
315- old_match_data_row = data_row ;
316- // HERE slots are always displayed by their title (for multilingual sanity?)
317- const ordering = ( slot_row != data_row ) ? `<span class="reordered">${ data_row } </span>` : data_row
328+
329+ // Show any unmatched data fields that appear BEFORE this slot's
330+ // match position. This ensures data columns skipped before the
331+ // first match (or between out-of-order matches) are not lost.
332+ for ( let dr = last_shown_data_row + 1 ; dr < data_row ; dr ++ ) {
333+ if ( data_matches [ dr ] < 0 && ! done_data_row [ dr ] ) {
334+ done_data_row [ dr ] = true ;
335+ html += `
336+ <tr class="field-mismatch">
337+ <td> </td>
338+ <td class="draggable-mapping-item field-mismatch" data-field-name="${ data_fields [ dr ] } ">${ dr } ) ${ data_fields [ dr ] } </td>
339+ </tr>` ;
340+ last_shown_data_row = dr ;
341+ }
342+ }
343+
344+ // Show the matched slot row. data-slot-name and data-field-name
345+ // attributes are used by getProfileMapping() instead of text parsing.
346+ last_shown_data_row = data_row ;
347+ const ordering = ( slot_row != data_row ) ? `<span class="reordered">${ data_row } </span>` : data_row ;
318348 html += `
319349 <tr class="field-match">
320- <td>${ slot_row } ) ${ slot . title } </td>
321- <td class="field-match">${ ordering } ) ${ data_fields [ data_row ] } </td>
350+ <td data-slot-name=" ${ slot . name } " >${ slot_row } ) ${ slot . title } </td>
351+ <td class="field-match" data-field-name=" ${ data_fields [ data_row ] } " >${ ordering } ) ${ data_fields [ data_row ] } </td>
322352 </tr>` ;
323353
324354 }
@@ -327,33 +357,24 @@ export class FieldMapper {
327357 // Do slot side's mismatched item.
328358 html += `
329359 <tr class="field-mismatch">
330- <td class="field-mismatch">${ slot_row } ) ${ slot . title } </td>
331- <td class="draggable-mapping-item field-mismatch"></td>
360+ <td class="field-mismatch" data-slot-name=" ${ slot . name } " >${ slot_row } ) ${ slot . title } </td>
361+ <td class="draggable-mapping-item field-mismatch" data-field-name="" ></td>
332362 </tr>` ;
333- }
334363
335- // If we don't have a match, then we have to ensure all the
336- // mismatched field headers for both tables are provided until
337- // the next match. (If the data table had columns whose order is
338- // completely different, then semantically the template section
339- // slots will have unrelated fields near them, c'est la vie.)
340-
341- // Possibility that next data row(s) are not a match so squeeze them
342- // in here.
343- // Add each data_matches' entry between last match and next one.
344- let data_row = ( old_match_data_row || - 1 ) + 1 ;
345- while ( data_row < data_matches . length
346- && ( data_matches [ data_row ] === false
347- && ! done_data_row [ data_row ] )
348- ) {
349- done_data_row [ data_row ] = true ; // Best way to handle this case?
350- const ordering = slot_row != data_row ? 'reordered' : ''
351- html += `
352- <tr class="field-mismatch">
353- <td> </td>
354- <td class="draggable-mapping-item field-mismatch">${ data_row } ) ${ data_fields [ data_row ] } </td>
355- </tr>` ;
356- data_row += 1 ;
364+ // Show unmatched data fields after last shown, up to the next match.
365+ let dr = last_shown_data_row + 1 ;
366+ while ( dr < data_matches . length
367+ && ( data_matches [ dr ] < 0 && ! done_data_row [ dr ] )
368+ ) {
369+ done_data_row [ dr ] = true ;
370+ html += `
371+ <tr class="field-mismatch">
372+ <td> </td>
373+ <td class="draggable-mapping-item field-mismatch" data-field-name="${ data_fields [ dr ] } ">${ dr } ) ${ data_fields [ dr ] } </td>
374+ </tr>` ;
375+ last_shown_data_row = dr ;
376+ dr ++ ;
377+ }
357378 }
358379
359380 slot_row += 1 ;
@@ -423,43 +444,36 @@ export class FieldMapper {
423444 */
424445 getProfileMapping ( ) {
425446
426- function get_label ( nmstr ) {
427- const i = nmstr . indexOf ( ' ' ) + 1 ;
428- return ( i ? nmstr . slice ( i ) : '' )
429- } ;
430- // In each tbody[data-table] section that has a template_name, look for
431- // any tr which has a "field-mismatch" attribute. That tr first td will
432- // have slotname of schema/template.
433- // Each
447+ // Read slot name and data field name from data attributes rather than
448+ // parsing the visible "N) fieldname" text, which was fragile.
434449 const schema = this . context . getSchemaRef ( ) ;
435450 let mapping = {
436451 schema_version : schema . version ,
437- tables : { }
452+ tables : { }
438453 } ;
439454
440455 $ ( 'table#field-mapping-table tbody[data-table]' ) . each ( ( t_index , tbody ) => {
441456
442457 // A given schema class often has multiple tbody, each for a data-table section
443458 const table_name = $ ( tbody ) . attr ( 'data-table' ) ;
444459
445- mapping . tables [ table_name ] ??= { } ; // init empty value if it doesn't exist.
446- $ ( tbody ) . find ( 'td:first-child.field-mismatch' ) . each ( ( index , slot_field ) => {
447- // Retrieve labels, get past column id) prefix;
448- const data_label = get_label ( $ ( slot_field ) . next ( 'td' ) . text ( ) ) ;
449- if ( data_label . length > 0 ) { // target has a mapping in it.
460+ mapping . tables [ table_name ] ??= { } ;
461+ // Only look at unmatched slot rows (left cell has data-slot-name).
462+ $ ( tbody ) . find ( 'td:first-child.field-mismatch[data-slot-name]' ) . each ( ( index , slot_field ) => {
463+ const data_field_name = $ ( slot_field ) . next ( 'td' ) . attr ( 'data-field-name' ) ;
464+ if ( data_field_name && data_field_name . length > 0 ) {
450465 let row_map = {
451- 'to' : get_label ( $ ( slot_field ) . text ( ) ) ,
452- 'from' : data_label
466+ 'to' : $ ( slot_field ) . attr ( 'data-slot-name' ) ,
467+ 'from' : data_field_name
453468 } ;
454469 const table_section = $ ( tbody ) . attr ( 'data-section' ) || '' ;
455470 if ( table_section )
456471 row_map [ 'section' ] = table_section ;
457472
458- // Adds table if it isn't present.
459- mapping . tables [ table_name ] . map ??= [ ] ; // establishes array if not there.
473+ mapping . tables [ table_name ] . map ??= [ ] ;
460474 mapping . tables [ table_name ] . map . push ( row_map ) ;
461475 }
462- } )
476+ } ) ;
463477 // The .map attribute exists only if 1+ mappings.
464478
465479 } ) ;
@@ -544,41 +558,50 @@ export class FieldMapper {
544558 //activeClass: "ui-state-active",
545559 hoverClass : "ui-state-hover" ,
546560 drop : function ( event , ui ) {
561+ // Swap visible text labels.
547562 const source_text = ui . draggable [ 0 ] . innerText ;
548563 ui . draggable [ 0 ] . innerText = event . target . innerText ;
549564 event . target . innerText = source_text ;
565+ // Swap data-field-name attributes so getProfileMapping() reads
566+ // the correct field names without text parsing.
567+ const src_field = $ ( ui . draggable [ 0 ] ) . attr ( 'data-field-name' ) ;
568+ const tgt_field = $ ( event . target ) . attr ( 'data-field-name' ) ;
569+ $ ( ui . draggable [ 0 ] ) . attr ( 'data-field-name' , tgt_field ?? '' ) ;
570+ $ ( event . target ) . attr ( 'data-field-name' , src_field ?? '' ) ;
550571 $ ( this ) . css ( "background-color" , "lightskyblue" ) ;
551- ui . draggable . css ( "background-color" , "lightblue" ) ;
572+ ui . draggable . css ( "background-color" , "lightblue" ) ;
552573 }
553574 } ) ;
554575 }
555576
556- // User has dragged one field down to the row of another within a table.
557- // Switch the labels of the selected fields.
558- // WARNING: THIS TEXT MATCHING ALGORITHM IS VERY SENSITIVE TO spaces etc.
559- // in field label HTML display
577+ // Apply a saved mapping profile to the current field-mapping display.
578+ // Uses data-slot-name and data-field-name attributes for reliable matching.
560579 applyFieldMapping ( profile_name ) {
561580 const [ dh_settings , profile ] = this . getProfile ( profile_name ) ;
562581
563582 Object . entries ( profile . tables || { } ) . forEach ( ( [ table_name , table_obj ] ) => {
564583
565- const mismatched_rows = $ ( `table#field-mapping-table tbody[data-table="${ table_name } "] > tr.field-mismatch` ) ;
566584 // Doing table by table; otherwise identical field names in different
567585 // tables lead to garbled rule implementation.
568-
569586 Object . entries ( table_obj ?. map || { } ) . forEach ( ( [ index , mapping ] ) => {
570- // Find mapping.from text. mapping.from td will initially have empty
571- // data field td. First fetch the 2nd data file field value
572- const schema_slot_td = $ ( mismatched_rows ) . find ( 'td:first-child' )
573- . filter ( function ( ) { return $ ( this ) . text ( ) . endsWith ( ') ' + mapping . to ) } ) ;
574- const schema_data_td = $ ( mismatched_rows ) . find ( 'td:eq(1)' )
575- . filter ( function ( ) { return $ ( this ) . text ( ) . endsWith ( ') ' + mapping . from ) } ) ;
576-
577- // Now do the switch of values as mapping dictates:
587+ // Find the unmatched slot TD by data-slot-name attribute.
588+ const schema_slot_td = $ (
589+ `table#field-mapping-table tbody[data-table="${ table_name } "] > tr.field-mismatch > td:first-child[data-slot-name="${ mapping . to } "]`
590+ ) ;
591+ // Find the data field TD by data-field-name attribute.
592+ const schema_data_td = $ (
593+ `table#field-mapping-table tbody[data-table="${ table_name } "] > tr.field-mismatch > td.draggable-mapping-item[data-field-name="${ mapping . from } "]`
594+ ) ;
595+
596+ // Move data-field-name from the source data TD to the slot's right TD.
597+ const src_field = $ ( schema_data_td ) . attr ( 'data-field-name' ) ;
598+ $ ( schema_data_td ) . attr ( 'data-field-name' , '' ) ;
599+ $ ( schema_slot_td ) . next ( 'td' ) . attr ( 'data-field-name' , src_field ?? '' ) ;
600+
601+ // Move the visible text label as well.
578602 const source_data_td_text = $ ( schema_data_td ) . text ( ) ;
579- $ ( schema_data_td ) . text ( '' ) ; // Clear out old data td side.
603+ $ ( schema_data_td ) . text ( '' ) ;
580604 $ ( schema_slot_td ) . next ( 'td' ) . text ( source_data_td_text ) ;
581-
582605 } ) ;
583606 } ) ;
584607 }
0 commit comments