-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMerge_for_DESeq2.py
More file actions
72 lines (63 loc) · 2.53 KB
/
Merge_for_DESeq2.py
File metadata and controls
72 lines (63 loc) · 2.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#YuEnoch 30-07-2024
#Merge_for_Deseq2.py
#Purpose: takes in all Peptide Counts for each treatment and merges all into one Table (ready to be analyzed by DESeq2), with peptides of low counts filtered out
#Changes across Experiments: please alter on parameters.txt
# 1. Location of Mutagenesis
# 2. Mutagenesis for 5 Amino Acids
# 3. Number of Treatments
import sys
import re
import mod575
import gzip
import os
import csv
from collections import defaultdict
#Opens the necessary files for input and output
experimentName = sys.argv[1]
numberOfTreatments = sys.argv[2] #Example: 15
num = int(numberOfTreatments)
treatments = sys.argv[3] #Example: C1, C2, C3, E1, E2, E3, H1, H2, H3, V1, V2, V3, N1, N2, N3
treatments = treatments.split(',')
minCount = int(sys.argv[4]) #Minimum Count for Each Peptide (across the row), rowsum filter
n_mer = int(sys.argv[5])
combinations=open('NNK' + str(n_mer) + '_combinations.txt','r') #All possible combinations for n-mer amino acids
outfile=open(experimentName+"_data.txt",'w')
outfileCSV=open(experimentName+"_data.csv",'w', newline='')
writer = csv.writer(outfileCSV)
header = ["AA_seq"] + treatments
writer.writerow(header)
noreads=0
peptides=dict() #Empty Peptides Dictionary
for line in combinations:
line=line.strip()
if line in peptides:
print('non_unique')
else:
peptides[line]=0
for i in range(num): #Creates a Peptides Dictionary for each treatment, then goes through every Peptides File and adds to that Dictionary
input1 = open(treatments[i]+'_peptide', 'r')
globals()['peptide%s' % i] = peptides.copy()
for line in input1:
row=line.strip().split() #separates into peptide, peptide count, sequence distribution
peptide=row[0]
peptide_count=row[1]
if peptide in globals()['peptide%s' % i]:
globals()['peptide%s' % i][peptide]=peptide_count
else:
print('sth is wrong with dictionary!')
input1.close()
for peptide in peptides:
sum = 0
for i in range(num):
sum += int(globals()['peptide%s' % i][peptide])
if sum<minCount: #Threshold. If total counts for that peptide is below minCount, it is not added to final result
noreads+=1
continue
else:
list = [peptide]
for i in range(num):
list.append(str(globals()['peptide%s' % i][peptide]))
print ('\t'.join(list), file=outfile)
writer.writerow(list)
outfile.close()
outfileCSV.close()