Mercurial > repos > yhoogstrate > edger_with_design_matrix
comparison edgeR_Design_Matrix.py @ 9:82bff7137e81 draft
Uploaded
author | yhoogstrate |
---|---|
date | Thu, 09 Jan 2014 05:24:29 -0500 |
parents | |
children | a6ce4b7710d8 |
comparison
equal
deleted
inserted
replaced
8:8c71e4c3c054 | 9:82bff7137e81 |
---|---|
1 #!/usr/bin/env python | |
2 import argparse, os, shutil, sys, tempfile, subprocess | |
3 | |
4 | |
5 class sampleContainer: | |
6 def __init__(self): | |
7 self.samples = [] | |
8 self.pairs = [] | |
9 self.pair_index = {} | |
10 self.treatments = {} | |
11 self.treatment_types = {} | |
12 self.names = {} | |
13 | |
14 def add_sample(self,sample): | |
15 if(sample in self.samples): | |
16 sys.stderr.write("Error:\n* Sample has been added multiple times: "+sample+"\n") | |
17 sys.exit(1) | |
18 else: | |
19 self.samples.append(sample) | |
20 print " - Added: "+sample | |
21 | |
22 def add_pair(self,pair): | |
23 print " - Adding pair:" | |
24 for sample in pair: | |
25 self.add_sample(sample) | |
26 self.pairs.append(pair) | |
27 pair_id = len(self.pairs) | |
28 | |
29 for sample in pair: | |
30 self.pair_index[sample] = pair_id | |
31 | |
32 def add_treatment_argument(self,treatment_argument,name=None): | |
33 print " - Parsing treatment" | |
34 | |
35 treatment = {"factor_index":{},"sample_index":{}} | |
36 only_integers = True | |
37 | |
38 i = 1 | |
39 for item in treatment_argument: | |
40 if(i % 2): | |
41 factor = item | |
42 if(treatment['factor_index'].has_key(factor)): | |
43 sys.stderr.write("Error:\n* Factor has been added multiple times to treatment: "+factor+"\n") | |
44 else: | |
45 print " - Adding factor: "+factor | |
46 treatment["factor_index"][factor] = [] | |
47 if(not factor.isdigit()): | |
48 only_integers = False | |
49 else: | |
50 for sample in item.split(","): | |
51 treatment["factor_index"][factor].append(sample) | |
52 if(treatment["sample_index"].has_key(sample)): | |
53 sys.stderr.write("Error:\n* Factor has been added to treatment before: "+sample+"/"+factor+", factors must be mutually exclusive!\n") | |
54 else: | |
55 treatment["sample_index"][sample] = factor | |
56 i += 1 | |
57 | |
58 treatment_factors = sorted(treatment["factor_index"].keys()) | |
59 | |
60 if(name == None): | |
61 treatment["name"] = "_vs_".join(treatment_factors) | |
62 else: | |
63 treatment["name"] = str(name) | |
64 | |
65 if(len(treatment["sample_index"]) != len(self.samples)): | |
66 sys.stderr.write("Error:\n* The number of samples for treatment '"+treatment["name"]+"' ("+str(len(treatment["sample_index"]))+") is different from the total number of samples ("+str(len(self.samples))+").\n") | |
67 | |
68 if(only_integers): | |
69 treatment_type = "integer" | |
70 else: | |
71 treatment_type = "string" | |
72 | |
73 if(self.treatments.has_key(treatment["name"])): | |
74 sys.stderr.write("Error:\n* Treatment was already added: '"+treatment["name"]+"\n") | |
75 else: | |
76 self.treatments[treatment["name"]] = treatment | |
77 self.treatment_types[treatment["name"]] = treatment_type | |
78 print " - Treatment of type: "+treatment_type+" is valid: "+treatment["name"] | |
79 | |
80 def add_names_argument(self,names_argument): | |
81 for sample in names_argument: | |
82 self.add_name(sample) | |
83 | |
84 def add_name(self,argument): | |
85 argument = argument.split(":",1) | |
86 self.names[argument[0]] = argument[1] | |
87 | |
88 def add_unpaired_samples_argument(self,argument): | |
89 print " - Adding unpaired samples" | |
90 for sample in argument: | |
91 self.add_sample(sample) | |
92 | |
93 def add_paired_samples_argument(self,argument): | |
94 print " - Adding paired samples" | |
95 pair = [] | |
96 for potential_sample in argument: | |
97 if(potential_sample == ":"): | |
98 self.add_pair(pair) | |
99 pair = [] | |
100 else: | |
101 pair.append(potential_sample) | |
102 | |
103 def add_treatments_argument(self,argument): | |
104 print " - Adding treatments" | |
105 treatment_argument = [] | |
106 for potential_treatment in argument: | |
107 if(potential_treatment == ":"): | |
108 self.add_treatment_argument(treatment_argument) | |
109 treatment_argument = [] | |
110 else: | |
111 treatment_argument.append(potential_treatment) | |
112 | |
113 def add_pairings(self): | |
114 samples = sorted(self.samples) | |
115 treatment_arguments = [] | |
116 | |
117 if len(self.pairs) > 0: | |
118 for pair_id in range(len(self.pairs)): | |
119 # add to treatments | |
120 treatment_arguments.append(str(pair_id+1)) | |
121 treatment_arguments.append(",".join(self.pairs[pair_id])) | |
122 | |
123 # remove from samples | |
124 for sample in self.pairs[pair_id]: | |
125 samples.remove(sample) | |
126 | |
127 # Add remaining samples as separate factors | |
128 i = len(self.pairs)+1 | |
129 for sample in samples: | |
130 treatment_arguments.append(str(i)) | |
131 treatment_arguments.append(sample) | |
132 i += 1 | |
133 | |
134 treatment_arguments.append(":") | |
135 | |
136 self.add_treatments_argument(treatment_arguments) | |
137 | |
138 def export(self,output): | |
139 # Open file stream | |
140 if(args.output == "-"): | |
141 fh = sys.stdout | |
142 else: | |
143 fh = open(args.output,"w") | |
144 | |
145 # Write header: | |
146 fh.write("filename\tname\t"+"\t".join(self.treatments.keys())+"\n") | |
147 | |
148 #fh.write("#\ttype") | |
149 #for treatment in self.treatments.keys(): | |
150 # fh.write("\t"+self.treatment_types[treatment]) | |
151 #fh.write("\n") | |
152 | |
153 # Write body: | |
154 for sample in self.samples: | |
155 fh.write(sample+"\t"+self.names[sample]) | |
156 for treatment in self.treatments.keys(): | |
157 fh.write("\t"+self.treatments[treatment]["sample_index"][sample]) | |
158 fh.write("\n") | |
159 | |
160 fh.close() | |
161 | |
162 if __name__=="__main__": | |
163 parser = argparse.ArgumentParser(description="Create an edgeR design matrix with read-count datasets.") | |
164 parser.add_argument("-o","--output", help="Output file, '-' for stdout.",required=True) | |
165 parser.add_argument("-u","--unpaired-samples", nargs="+", help='Unpaired sample files.') | |
166 parser.add_argument("-p","--paired-samples", nargs="+", help='Paired sample files: pat_x_sample_1.txt, .') | |
167 parser.add_argument("-t","--treatments", nargs="+", help='Treatment conditions',required=True) | |
168 parser.add_argument("-n","--sample-names", nargs="+", help='Paired sample files: pat_x_sample_1.txt:NAME .') | |
169 | |
170 args = parser.parse_args() | |
171 | |
172 s = sampleContainer() | |
173 s.add_unpaired_samples_argument(args.unpaired_samples) | |
174 s.add_paired_samples_argument(args.paired_samples) | |
175 s.add_treatments_argument(args.treatments) | |
176 s.add_names_argument(args.sample_names) | |
177 s.add_pairings() | |
178 s.export(args.output) |