Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 37 additions & 2 deletions amdirt/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,12 @@ def get_filename(path_string: str, orientation: str) -> Tuple[str, str]:
"""

if ";" in path_string:
fwd = Path(path_string.split(";")[0]).name
rev = Path(path_string.split(";")[1]).name
if path_string.count(";") == 1:
fwd = Path(path_string.split(";")[0]).name
rev = Path(path_string.split(";")[1]).name
else: # three files per sample
fwd = Path(path_string.split(";")[1]).name
rev = Path(path_string.split(";")[2]).name
else:
fwd = Path(path_string).name
rev = "NA"
Expand All @@ -252,13 +256,17 @@ def parse_to_mag(libraries):
get_filename, orientation="rev"
)
libraries["short_reads_2"] = libraries["short_reads_2"].replace("NA", "")
libraries["short_reads_platform"] = libraries["instrument_model"].apply(get_sequencing_platform)
libraries["longs_reads"] = ""
libraries["long_reads_platform"] = ""
col2keep = [
"archive_data_accession",
"archive_sample_accession",
"short_reads_1",
"short_reads_2",
"longs_reads",
"short_reads_platform",
"long_reads_platform",
]
libraries = libraries[col2keep].rename(
columns={
Expand Down Expand Up @@ -624,3 +632,30 @@ def is_merge_size_zero(
if samples.shape[0] != 0 and library_selected.shape[0] == 0:
return True
return False


def get_sequencing_platform(instrument_model: str) -> str:
"""
Infer sequencing platform from the instrument model.

Args:
instrument_model (str): instrument model reported on ENA
Returns:
str: sequencing platform
"""
if (instrument_model.startswith("Illumina") or
instrument_model.startswith("HiSeq") or
instrument_model.startswith("NextSeq")):
return "ILLUMINA"
elif instrument_model.startswith("454"):
return "454"
elif instrument_model.startswith("AB"):
return "AB"
elif instrument_model.startswith("BGISEQ"):
return "BGISEQ"
elif instrument_model == "Complete Genomics":
return instrument_model
elif instrument_model.startswith("Helicos"):
return "Helicos"
else:
return "Unknown"