@@ -8,8 +8,8 @@ import Control.Monad (filterM)
88import Data.Aeson (FromJSON , parseJSON , withObject , (.:) , (.:?) )
99import Data.Either (lefts , rights )
1010import Data.Foldable (foldl' )
11- import Data.List (elemIndices , groupBy , sortBy , sortOn ,
12- transpose )
11+ import Data.List (elemIndices , groupBy , insertBy , sortBy ,
12+ sortOn , transpose )
1313import qualified Data.Map.Strict as M
1414import qualified Data.Set as Set
1515import Data.Version (Version )
@@ -111,17 +111,40 @@ mergeJannos xs =
111111
112112reorderJannoColumns :: ([String ], [[String ]]) -> ([String ], [[String ]])
113113reorderJannoColumns (oldCols, oldRowsData) =
114- let orderedCols = sortOn getOrder oldCols
115- orderingIndices = concatMap (`elemIndices` oldCols) orderedCols
114+ let baseOrderedCols = sortOn getOrder oldCols
115+ finalCols = applyNoteWeaving baseOrderedCols
116+ orderingIndices = concatMap (`elemIndices` oldCols) finalCols
116117 orderedRowsData = map (\ row -> map (row !! ) orderingIndices) oldRowsData
117- in (orderedCols , orderedRowsData)
118+ in (finalCols , orderedRowsData)
118119 where
119120 -- https://stackoverflow.com/a/26260968/3216883
120121 getOrder :: String -> Int
121122 getOrder k = M. findWithDefault (length jannoOrder) k ordermap
122123 ordermap :: M. Map String Int
123124 ordermap = M. fromList (zip jannoOrder [0 .. ])
124125
126+ -- _Note column weaving as in trident's Janno.hs module
127+ applyNoteWeaving :: [String ] -> [String ]
128+ applyNoteWeaving cols =
129+ let noteCols = filter isNote cols
130+ nonNoteCols = filter (not . isNote) cols
131+ in weave noteCols nonNoteCols
132+ where
133+ isNote x = reverse (takeWhile (/= ' _' ) (reverse x)) == " Note"
134+ weave :: [String ] -> [String ] -> [String ]
135+ weave inserts = reverse . insertByMulti findSpot inserts . reverse
136+ -- reverse, because Note columns should be at the end of column groups (e.g. Date_*)
137+ insertByMulti :: (a -> a -> Ordering ) -> [a ] -> [a ] -> [a ]
138+ insertByMulti _ [] xs = xs
139+ insertByMulti f (i: rest) xs = insertBy f i (insertByMulti f rest xs)
140+ findSpot :: String -> String -> Ordering
141+ findSpot i x
142+ | removeSuffix i == x = LT
143+ | removeSuffix x == x = GT
144+ | otherwise = findSpot i (removeSuffix x)
145+ removeSuffix :: String -> String
146+ removeSuffix = reverse . drop 1 . dropWhile (/= ' _' ) . reverse
147+
125148jannoOrder :: [String ]
126149jannoOrder = " package_title" : " package_version" : " source_file" : jannoHeader
127150
@@ -130,44 +153,25 @@ jannoHeader = [
130153 " Poseidon_ID"
131154 , " Genetic_Sex"
132155 , " Group_Name"
133- , " Alternative_IDs"
134- , " Relation_To"
135- , " Relation_Degree"
136- , " Relation_Type"
137- , " Relation_Note"
138- , " Collection_ID"
139- , " Country"
140- , " Country_ISO"
141- , " Location"
142- , " Site"
143- , " Latitude"
144- , " Longitude"
156+ , " Individual_ID"
157+ , " Species"
158+ , " Alternative_IDs" , " Alternative_IDs_Context"
159+ , " Relation_To" , " Relation_Degree" , " Relation_Type"
160+ , " Collection_ID" , " Custodian_Institution"
161+ , " Cultural_Era" , " Cultural_Era_URL" , " Archaeological_Culture" , " Archaeological_Culture_URL"
162+ , " Country" , " Country_ISO"
163+ , " Location" , " Site" , " Latitude" , " Longitude"
145164 , " Date_Type"
146- , " Date_C14_Labnr"
147- , " Date_C14_Uncal_BP"
148- , " Date_C14_Uncal_BP_Err"
149- , " Date_BC_AD_Start"
150- , " Date_BC_AD_Median"
151- , " Date_BC_AD_Stop"
152- , " Date_Note"
153- , " MT_Haplogroup"
154- , " Y_Haplogroup"
155- , " Source_Tissue"
156- , " Nr_Libraries"
157- , " Library_Names"
158- , " Capture_Type"
159- , " UDG"
160- , " Library_Built"
161- , " Genotype_Ploidy"
165+ , " Date_C14_Labnr" , " Date_C14_Uncal_BP" , " Date_C14_Uncal_BP_Err"
166+ , " Date_BC_AD_Start" , " Date_BC_AD_Median" , " Date_BC_AD_Stop"
167+ , " Chromosomal_Anomalies"
168+ , " MT_Haplogroup" , " Y_Haplogroup"
169+ , " Source_Material"
170+ , " Nr_Libraries" , " Library_Names"
171+ , " Capture_Type" , " UDG" , " Library_Built" , " Genotype_Ploidy"
162172 , " Data_Preparation_Pipeline_URL"
163- , " Endogenous"
164- , " Nr_SNPs"
165- , " Coverage_on_Target_SNPs"
166- , " Damage"
167- , " Contamination"
168- , " Contamination_Err"
169- , " Contamination_Meas"
170- , " Contamination_Note"
173+ , " Endogenous" , " Nr_SNPs" , " Coverage_on_Target_SNPs" , " Damage"
174+ , " Contamination" , " Contamination_Err" , " Contamination_Meas"
171175 , " Genetic_Source_Accession_IDs"
172176 , " Primary_Contact"
173177 , " Publication"
0 commit comments