9
|
1 #!/usr/bin/env python
|
|
2 import argparse, os, shutil, sys, tempfile, subprocess
|
|
3
|
|
4
|
|
5 class sampleContainer:
|
|
6 def __init__(self):
|
|
7 self.samples = []
|
|
8 self.pairs = []
|
|
9 self.pair_index = {}
|
|
10 self.treatments = {}
|
|
11 self.treatment_types = {}
|
|
12 self.names = {}
|
|
13
|
|
14 def add_sample(self,sample):
|
|
15 if(sample in self.samples):
|
|
16 sys.stderr.write("Error:\n* Sample has been added multiple times: "+sample+"\n")
|
|
17 sys.exit(1)
|
|
18 else:
|
|
19 self.samples.append(sample)
|
|
20 print " - Added: "+sample
|
|
21
|
|
22 def add_pair(self,pair):
|
|
23 print " - Adding pair:"
|
|
24 for sample in pair:
|
|
25 self.add_sample(sample)
|
|
26 self.pairs.append(pair)
|
|
27 pair_id = len(self.pairs)
|
|
28
|
|
29 for sample in pair:
|
|
30 self.pair_index[sample] = pair_id
|
|
31
|
|
32 def add_treatment_argument(self,treatment_argument,name=None):
|
|
33 print " - Parsing treatment"
|
|
34
|
|
35 treatment = {"factor_index":{},"sample_index":{}}
|
|
36 only_integers = True
|
|
37
|
|
38 i = 1
|
|
39 for item in treatment_argument:
|
|
40 if(i % 2):
|
|
41 factor = item
|
|
42 if(treatment['factor_index'].has_key(factor)):
|
|
43 sys.stderr.write("Error:\n* Factor has been added multiple times to treatment: "+factor+"\n")
|
|
44 else:
|
|
45 print " - Adding factor: "+factor
|
|
46 treatment["factor_index"][factor] = []
|
|
47 if(not factor.isdigit()):
|
|
48 only_integers = False
|
|
49 else:
|
|
50 for sample in item.split(","):
|
|
51 treatment["factor_index"][factor].append(sample)
|
|
52 if(treatment["sample_index"].has_key(sample)):
|
|
53 sys.stderr.write("Error:\n* Factor has been added to treatment before: "+sample+"/"+factor+", factors must be mutually exclusive!\n")
|
|
54 else:
|
|
55 treatment["sample_index"][sample] = factor
|
|
56 i += 1
|
|
57
|
|
58 treatment_factors = sorted(treatment["factor_index"].keys())
|
|
59
|
|
60 if(name == None):
|
|
61 treatment["name"] = "_vs_".join(treatment_factors)
|
|
62 else:
|
|
63 treatment["name"] = str(name)
|
|
64
|
|
65 if(len(treatment["sample_index"]) != len(self.samples)):
|
|
66 sys.stderr.write("Error:\n* The number of samples for treatment '"+treatment["name"]+"' ("+str(len(treatment["sample_index"]))+") is different from the total number of samples ("+str(len(self.samples))+").\n")
|
|
67
|
|
68 if(only_integers):
|
|
69 treatment_type = "integer"
|
|
70 else:
|
|
71 treatment_type = "string"
|
|
72
|
|
73 if(self.treatments.has_key(treatment["name"])):
|
|
74 sys.stderr.write("Error:\n* Treatment was already added: '"+treatment["name"]+"\n")
|
|
75 else:
|
|
76 self.treatments[treatment["name"]] = treatment
|
|
77 self.treatment_types[treatment["name"]] = treatment_type
|
|
78 print " - Treatment of type: "+treatment_type+" is valid: "+treatment["name"]
|
|
79
|
|
80 def add_names_argument(self,names_argument):
|
|
81 for sample in names_argument:
|
|
82 self.add_name(sample)
|
|
83
|
|
84 def add_name(self,argument):
|
|
85 argument = argument.split(":",1)
|
|
86 self.names[argument[0]] = argument[1]
|
|
87
|
|
88 def add_unpaired_samples_argument(self,argument):
|
|
89 print " - Adding unpaired samples"
|
|
90 for sample in argument:
|
|
91 self.add_sample(sample)
|
|
92
|
|
93 def add_paired_samples_argument(self,argument):
|
|
94 print " - Adding paired samples"
|
|
95 pair = []
|
|
96 for potential_sample in argument:
|
|
97 if(potential_sample == ":"):
|
|
98 self.add_pair(pair)
|
|
99 pair = []
|
|
100 else:
|
|
101 pair.append(potential_sample)
|
|
102
|
|
103 def add_treatments_argument(self,argument):
|
|
104 print " - Adding treatments"
|
|
105 treatment_argument = []
|
|
106 for potential_treatment in argument:
|
|
107 if(potential_treatment == ":"):
|
|
108 self.add_treatment_argument(treatment_argument)
|
|
109 treatment_argument = []
|
|
110 else:
|
|
111 treatment_argument.append(potential_treatment)
|
|
112
|
|
113 def add_pairings(self):
|
|
114 samples = sorted(self.samples)
|
|
115 treatment_arguments = []
|
|
116
|
|
117 if len(self.pairs) > 0:
|
|
118 for pair_id in range(len(self.pairs)):
|
|
119 # add to treatments
|
|
120 treatment_arguments.append(str(pair_id+1))
|
|
121 treatment_arguments.append(",".join(self.pairs[pair_id]))
|
|
122
|
|
123 # remove from samples
|
|
124 for sample in self.pairs[pair_id]:
|
|
125 samples.remove(sample)
|
|
126
|
|
127 # Add remaining samples as separate factors
|
|
128 i = len(self.pairs)+1
|
|
129 for sample in samples:
|
|
130 treatment_arguments.append(str(i))
|
|
131 treatment_arguments.append(sample)
|
|
132 i += 1
|
|
133
|
|
134 treatment_arguments.append(":")
|
|
135
|
|
136 self.add_treatments_argument(treatment_arguments)
|
|
137
|
|
138 def export(self,output):
|
|
139 # Open file stream
|
|
140 if(args.output == "-"):
|
|
141 fh = sys.stdout
|
|
142 else:
|
|
143 fh = open(args.output,"w")
|
|
144
|
|
145 # Write header:
|
|
146 fh.write("filename\tname\t"+"\t".join(self.treatments.keys())+"\n")
|
|
147
|
|
148 #fh.write("#\ttype")
|
|
149 #for treatment in self.treatments.keys():
|
|
150 # fh.write("\t"+self.treatment_types[treatment])
|
|
151 #fh.write("\n")
|
|
152
|
|
153 # Write body:
|
|
154 for sample in self.samples:
|
|
155 fh.write(sample+"\t"+self.names[sample])
|
|
156 for treatment in self.treatments.keys():
|
|
157 fh.write("\t"+self.treatments[treatment]["sample_index"][sample])
|
|
158 fh.write("\n")
|
|
159
|
|
160 fh.close()
|
|
161
|
|
162 if __name__=="__main__":
|
|
163 parser = argparse.ArgumentParser(description="Create an edgeR design matrix with read-count datasets.")
|
|
164 parser.add_argument("-o","--output", help="Output file, '-' for stdout.",required=True)
|
10
|
165 parser.add_argument("-u","--unpaired-samples", nargs="*", help='Unpaired sample files.')
|
|
166 parser.add_argument("-p","--paired-samples", nargs="*", help='Paired sample files: pat_x_sample_1.txt, .')
|
9
|
167 parser.add_argument("-t","--treatments", nargs="+", help='Treatment conditions',required=True)
|
|
168 parser.add_argument("-n","--sample-names", nargs="+", help='Paired sample files: pat_x_sample_1.txt:NAME .')
|
|
169
|
|
170 args = parser.parse_args()
|
|
171
|
|
172 s = sampleContainer()
|
|
173 s.add_unpaired_samples_argument(args.unpaired_samples)
|
|
174 s.add_paired_samples_argument(args.paired_samples)
|
|
175 s.add_treatments_argument(args.treatments)
|
|
176 s.add_names_argument(args.sample_names)
|
|
177 s.add_pairings()
|
|
178 s.export(args.output)
|