Skip to content
This repository was archived by the owner on Apr 16, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
- 1.0.2.0: Added VCF writing support to admixpops. Updated to newest poseidon-hs.
- 1.0.1.2: Fixed a bug in the FStatConfig parser.
- 1.0.1.0: fixed a bug in the FST estimation, added more graceful errors in case of illegal input for fstats
- 1.0.0.2: Switched to poseidon-hs v1.4.0.3 and a new compiler (GHC 9.4.7) and stackage snapshot version (LTS-21.17)
Expand Down
4 changes: 2 additions & 2 deletions poseidon-analysis-hs.cabal
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: poseidon-analysis-hs
version: 1.0.1.2
version: 1.0.2.0
synopsis: A package with analysis-tools to work with Poseidon Genotype Data
description: The tools in this package analyse Poseidon-formatted genotype databases, a modular system for storing genotype data from thousands of individuals.
license: MIT
Expand All @@ -15,7 +15,7 @@ extra-source-files: README.md,
library
exposed-modules: Poseidon.Analysis.FStatsConfig, Poseidon.Analysis.RASconfig, Poseidon.Analysis.Utils,
Poseidon.Analysis.CLI.FStats, Poseidon.Analysis.CLI.RAS,
Poseidon.Generator.CLI.AdmixPops, Poseidon.Generator.CLI.SpaceTime,
Poseidon.Generator.CLI.AdmixPops,
Poseidon.Generator.Parsers, Poseidon.Generator.Types,
Poseidon.Generator.SampleGeno, Poseidon.Generator.Utils
hs-source-dirs: src
Expand Down
17 changes: 7 additions & 10 deletions src-executables/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ import Paths_poseidon_analysis_hs (version)
import Poseidon.CLI.OptparseApplicativeParsers
import Poseidon.PoseidonVersion (showPoseidonVersion,
validPoseidonVersions)
import Poseidon.Utils (LogMode (..),
import Poseidon.Utils (ErrorLength (..),
LogMode (..),
PlinkPopNameMode (..),
PoseidonException (..),
PoseidonIO, TestMode,
Expand Down Expand Up @@ -54,17 +55,12 @@ main = do
hPutStrLn stderr renderVersion
hPutStrLn stderr ""
(Options logMode testMode errLength plinkMode subcommand) <- OP.customExecParser (OP.prefs OP.showHelpOnEmpty) optParserInfo
catch (usePoseidonLogger logMode testMode plinkMode $ runCmd subcommand) (handler logMode testMode errLength plinkMode)
catch (usePoseidonLogger logMode testMode plinkMode errLength $ runCmd subcommand) (handler logMode testMode plinkMode errLength)
where
handler :: LogMode -> TestMode -> ErrorLength -> PlinkPopNameMode -> PoseidonException -> IO ()
handler l t len pm e = do
usePoseidonLogger l t pm $ logError $ truncateErr len $ renderPoseidonException e
handler :: LogMode -> TestMode -> PlinkPopNameMode -> ErrorLength -> PoseidonException -> IO ()
handler l t pm len e = do
usePoseidonLogger l t pm len $ logError $ renderPoseidonException e
exitFailure
truncateErr :: ErrorLength -> String -> String
truncateErr CharInf s = s
truncateErr (CharCount len) s
| length s > len = take len s ++ "... (see more with --errLength)"
| otherwise = s

runCmd :: Subcommand -> PoseidonIO ()
runCmd o = case o of
Expand Down Expand Up @@ -239,6 +235,7 @@ admixPopsOptParser = AdmixPopsOptions <$> parseGenoDataSources
<*> parseIndWithAdmixtureSetFromFile
<*> parseAdmixPopsMethodSettings
<*> parseOutGenotypeFormat True
<*> parseZipOut
<*> parseOutPackagePath
<*> parseMaybeOutPackageName
<*> parseOutputPlinkPopMode
Expand Down
13 changes: 7 additions & 6 deletions src/Poseidon/Analysis/CLI/FStats.hs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import Control.Exception (catch, throwIO)
import Control.Foldl (FoldM (..), impurely, list,
purely)
import Control.Monad (forM, forM_, unless, when)
import Control.Monad.Catch (throwM)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.IORef (IORef, modifyIORef', newIORef,
readIORef, writeIORef)
Expand All @@ -43,13 +44,13 @@ import Pipes (cat, for, yield, (>->))
import Pipes.Group (chunksOf, foldsM, groupsBy)
import qualified Pipes.Prelude as P
import Pipes.Safe (runSafeT)
import Poseidon.ColumnTypesJanno (JannoGenotypePloidy (..))
import Poseidon.EntityTypes (IndividualInfo (..),
PoseidonEntity (..),
checkIfAllEntitiesExist,
determineRelevantPackages,
resolveUniqueEntityIndices,
underlyingEntity)
import Poseidon.Janno (JannoGenotypePloidy (..))
import Poseidon.Package (PackageReadOptions (..),
PoseidonPackage (..),
defaultPackageReadOptions,
Expand All @@ -58,7 +59,7 @@ import Poseidon.Package (PackageReadOptions (..),
getJointJanno,
readPoseidonPackageCollection)
import Poseidon.Utils (PoseidonException (..),
PoseidonIO, envInputPlinkMode,
PoseidonIO, envErrorLength,
envLogAction, logInfo,
logWithEnv)
import SequenceFormats.Eigenstrat (EigenstratSnpEntry (..),
Expand Down Expand Up @@ -134,11 +135,11 @@ runFstats opts = do
logInfo "Computing stats:"
mapM_ (logInfo . summaryPrintFstats) statSpecs
logA <- envLogAction
inPlinkPopMode <- envInputPlinkMode
statsFold <- buildStatSpecsFold relevantPackages statSpecs
errLength <- envErrorLength
blocks <- liftIO $ catch (
runSafeT $ do
(_, eigenstratProd) <- getJointGenotypeData logA False inPlinkPopMode relevantPackages Nothing
eigenstratProd <- getJointGenotypeData logA False relevantPackages Nothing
let eigenstratProdFiltered =
eigenstratProd >->
P.filter chromFilter >->
Expand All @@ -148,7 +149,7 @@ runFstats opts = do
JackknifePerN chunkSize -> chunkEigenstratByNrSnps chunkSize eigenstratProdFiltered
let summaryStatsProd = impurely foldsM statsFold eigenstratProdInChunks
purely P.fold list (summaryStatsProd >-> printBlockInfoPipe logA)
) (throwIO . PoseidonGenotypeExceptionForward)
) (throwM . PoseidonGenotypeExceptionForward errLength)
let jackknifeEstimates = processBlocks statSpecs blocks
let nrSitesList = [sum [(vals !! i) !! 1 | BlockData _ _ _ vals <- blocks] | i <- [0..(length statSpecs - 1)]]
let hasAscertainment = or $ do
Expand Down Expand Up @@ -238,7 +239,7 @@ buildStatSpecsFold packages fStatSpecs = do
ploidyVec <- makePloidyVec . getJointJanno $ packages
entityIndicesLookup <- do
let collectedSpecs = collectStatSpecGroups fStatSpecs
entityIndices <- sequence [resolveUniqueEntityIndices [s] indInfoCollection | s <- collectedSpecs]
entityIndices <- sequence [resolveUniqueEntityIndices True [s] indInfoCollection | s <- collectedSpecs]
return . M.fromList . zip collectedSpecs $ entityIndices
blockAccum <- do
listOfInnerVectors <- forM fStatSpecs $ \(FStatSpec fType _ _) -> do
Expand Down
16 changes: 8 additions & 8 deletions src/Poseidon/Analysis/CLI/RAS.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ import Pipes (cat, (>->))
import Pipes.Group (chunksOf, foldsM, groupsBy)
import qualified Pipes.Prelude as P
import Pipes.Safe (runSafeT)
import Poseidon.ColumnTypesJanno (JannoGenotypePloidy (..))
import Poseidon.EntityTypes (EntitiesList, IndividualInfo (..),
PoseidonEntity (..),
checkIfAllEntitiesExist,
determineRelevantPackages,
resolveUniqueEntityIndices,
underlyingEntity)
import Poseidon.Janno (JannoGenotypePloidy (..))
import Poseidon.Package (PackageReadOptions (..),
PoseidonPackage (..),
defaultPackageReadOptions,
Expand All @@ -44,7 +44,7 @@ import Poseidon.Package (PackageReadOptions (..),
getJointJanno,
readPoseidonPackageCollection)
import Poseidon.Utils (PoseidonException (..),
PoseidonIO, envInputPlinkMode,
PoseidonIO, envErrorLength,
envLogAction, logInfo, logWithEnv)
import SequenceFormats.Bed (filterThroughBed, readBedFile)
import SequenceFormats.Eigenstrat (EigenstratSnpEntry (..),
Expand Down Expand Up @@ -137,10 +137,10 @@ runRAS rasOpts = do

-- run the fold and retrieve the block data needed for RAS computations and output
logA <- envLogAction
inPlinkPopMode <- envInputPlinkMode
errLength <- envErrorLength
blockData <- liftIO $ catch (
runSafeT $ do
(_, eigenstratProd) <- getJointGenotypeData logA False inPlinkPopMode relevantPackages Nothing
eigenstratProd <- getJointGenotypeData logA False relevantPackages Nothing
let eigenstratProdFiltered =
bedFilterFunc (eigenstratProd >->
P.filter (chromFilter (_rasExcludeChroms rasOpts)) >->
Expand All @@ -152,7 +152,7 @@ runRAS rasOpts = do
let summaryStatsProd = impurely foldsM rasFold eigenstratProdInChunks
logWithEnv logA . logInfo $ "performing counts"
purely P.fold list (summaryStatsProd >-> P.tee (P.map showBlockLogOutput >-> P.toHandle stderr))
) (throwIO . PoseidonGenotypeExceptionForward)
) (throwIO . PoseidonGenotypeExceptionForward errLength)

-- outputting and computing results
logInfo "collating results"
Expand Down Expand Up @@ -250,9 +250,9 @@ buildRasFold packages minFreq maxFreq maxM maybeOutgroup popLefts popRights = do
ploidyVec <- makePloidyVec . getJointJanno $ packages
outgroupI <- case maybeOutgroup of
Nothing -> return []
Just o -> resolveUniqueEntityIndices [o] indInfoCollection
leftI <- sequence [resolveUniqueEntityIndices [l] indInfoCollection | l <- popLefts]
rightI <- sequence [resolveUniqueEntityIndices [r] indInfoCollection | r <- popRights]
Just o -> resolveUniqueEntityIndices True [o] indInfoCollection
leftI <- sequence [resolveUniqueEntityIndices True [l] indInfoCollection | l <- popLefts]
rightI <- sequence [resolveUniqueEntityIndices True [r] indInfoCollection | r <- popRights]
let nL = length popLefts
nR = length popRights
let indivNames = map indInfoName (fst indInfoCollection)
Expand Down
16 changes: 9 additions & 7 deletions src/Poseidon/Analysis/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,19 @@ import Data.Aeson ((.:))
import Data.Aeson.Key (toString)
import Data.Aeson.KeyMap (toList)
import Data.Aeson.Types (Object, Parser)
import qualified Data.Text as T
import qualified Data.Vector as V
import Pipes (Pipe, cat)
import qualified Pipes.Prelude as P
import Poseidon.ColumnTypesJanno (GroupName (..),
JannoGenotypePloidy (..))
import Poseidon.ColumnTypesUtils (ListColumn (..))
import Poseidon.EntityTypes (IndividualInfo (..),
SignedEntitiesList,
indInfoConformsToEntitySpecs,
isLatestInCollection,
makePacNameAndVersion)
import Poseidon.Janno (JannoGenotypePloidy (..),
JannoList (..), JannoRow (..),
JannoRows (..), getJannoList)
import Poseidon.Janno (JannoRow (..), JannoRows (..))
import Poseidon.Package (PoseidonPackage (..),
getJannoRowsFromPac)
import Poseidon.Utils (PoseidonException (..), PoseidonIO,
Expand All @@ -45,13 +47,13 @@ addGroupDefs groupDefs pacs = do -- this loops through all input packages
isLatest <- isLatestInCollection pacs pac
let newJanno = JannoRows $ do -- this loops through the janno-file
jannoRow <- getJannoRowsFromPac pac
let oldGroupNames = (getJannoList . jGroupName) jannoRow
let oldGroupNames = getListColumn . jGroupName $ jannoRow
let additionalGroupNames = do -- this loops through each new group definition and returns those group names that apply to this janno-row
(groupName, signedEntityList) <- groupDefs
let indInfo = IndividualInfo (jPoseidonID jannoRow) oldGroupNames (makePacNameAndVersion pac)
let indInfo = IndividualInfo (jPoseidonID jannoRow) (map (\(GroupName n) -> T.unpack n) oldGroupNames) (makePacNameAndVersion pac)
True <- return $ indInfoConformsToEntitySpecs indInfo isLatest signedEntityList -- this checks whether a new group-def applies to this janno-row
return groupName -- only returns if the previous row pattern-matched, i.e. if the group applies
return $ jannoRow {jGroupName = JannoList (oldGroupNames ++ additionalGroupNames)} -- returns a new janno-row with the new group definitions
return . GroupName . T.pack $ groupName -- only returns if the previous row pattern-matched, i.e. if the group applies
return $ jannoRow {jGroupName = ListColumn (oldGroupNames ++ additionalGroupNames)} -- returns a new janno-row with the new group definitions
return $ pac {posPacJanno = newJanno} -- returns a new package with the new janno

parseGroupDefsFromJSON :: Object -> Parser [GroupDef]
Expand Down
Loading
Loading