forked from nf-core/eager
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkraken_parse.py
More file actions
executable file
·78 lines (60 loc) · 2.05 KB
/
kraken_parse.py
File metadata and controls
executable file
·78 lines (60 loc) · 2.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python
import argparse
import csv
def _get_args():
'''This function parses and return arguments passed in'''
parser = argparse.ArgumentParser(
prog='kraken_parse',
formatter_class=argparse.RawDescriptionHelpFormatter,
description='Parsing kraken')
parser.add_argument('krakenReport', help="path to kraken report file")
parser.add_argument(
'-c',
dest="count",
default=50,
help="Minimum number of hits on clade to report it. Default = 50")
parser.add_argument(
'-o',
dest="output",
default=None,
help="Output file. Default = <basename>.kraken_parsed.csv")
args = parser.parse_args()
infile = args.krakenReport
countlim = int(args.count)
outfile = args.output
return(infile, countlim, outfile)
def _get_basename(file_name):
if ("/") in file_name:
basename = file_name.split("/")[-1].split(".")[0]
else:
basename = file_name.split(".")[0]
return(basename)
def parse_kraken(infile, countlim):
'''
INPUT:
infile (str): path to kraken report file
countlim (int): lowest count threshold to report hit
OUTPUT:
resdict (dict): key=taxid, value=readCount
'''
with open(infile, 'r') as f:
resdict = {}
csvreader = csv.reader(f, delimiter='\t')
for line in csvreader:
reads = int(line[1])
if reads >= countlim:
taxid = line[4]
resdict[taxid] = reads
return(resdict)
def write_output(resdict, infile, outfile):
with open(outfile, 'w') as f:
basename = _get_basename(infile)
f.write(f"TAXID,{basename}\n")
for akey in resdict.keys():
f.write(f"{akey},{resdict[akey]}\n")
if __name__ == '__main__':
INFILE, COUNTLIM, outfile = _get_args()
if not outfile:
outfile = _get_basename(INFILE)+".kraken_parsed.csv"
tmp_dict = parse_kraken(infile=INFILE, countlim=COUNTLIM)
write_output(resdict=tmp_dict, infile=INFILE, outfile=outfile)