annotate edgeR_Design_Matrix.py @ 23:fa476f8e1f9e draft

Uploaded
author yhoogstrate
date Thu, 09 Jan 2014 11:09:47 -0500
parents a6ce4b7710d8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
1 #!/usr/bin/env python
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
2 import argparse, os, shutil, sys, tempfile, subprocess
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
3
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
4
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
5 class sampleContainer:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
6 def __init__(self):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
7 self.samples = []
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
8 self.pairs = []
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
9 self.pair_index = {}
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
10 self.treatments = {}
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
11 self.treatment_types = {}
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
12 self.names = {}
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
13
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
14 def add_sample(self,sample):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
15 if(sample in self.samples):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
16 sys.stderr.write("Error:\n* Sample has been added multiple times: "+sample+"\n")
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
17 sys.exit(1)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
18 else:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
19 self.samples.append(sample)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
20 print " - Added: "+sample
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
21
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
22 def add_pair(self,pair):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
23 print " - Adding pair:"
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
24 for sample in pair:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
25 self.add_sample(sample)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
26 self.pairs.append(pair)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
27 pair_id = len(self.pairs)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
28
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
29 for sample in pair:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
30 self.pair_index[sample] = pair_id
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
31
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
32 def add_treatment_argument(self,treatment_argument,name=None):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
33 print " - Parsing treatment"
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
34
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
35 treatment = {"factor_index":{},"sample_index":{}}
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
36 only_integers = True
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
37
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
38 i = 1
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
39 for item in treatment_argument:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
40 if(i % 2):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
41 factor = item
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
42 if(treatment['factor_index'].has_key(factor)):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
43 sys.stderr.write("Error:\n* Factor has been added multiple times to treatment: "+factor+"\n")
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
44 else:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
45 print " - Adding factor: "+factor
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
46 treatment["factor_index"][factor] = []
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
47 if(not factor.isdigit()):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
48 only_integers = False
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
49 else:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
50 for sample in item.split(","):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
51 treatment["factor_index"][factor].append(sample)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
52 if(treatment["sample_index"].has_key(sample)):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
53 sys.stderr.write("Error:\n* Factor has been added to treatment before: "+sample+"/"+factor+", factors must be mutually exclusive!\n")
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
54 else:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
55 treatment["sample_index"][sample] = factor
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
56 i += 1
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
57
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
58 treatment_factors = sorted(treatment["factor_index"].keys())
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
59
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
60 if(name == None):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
61 treatment["name"] = "_vs_".join(treatment_factors)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
62 else:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
63 treatment["name"] = str(name)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
64
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
65 if(len(treatment["sample_index"]) != len(self.samples)):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
66 sys.stderr.write("Error:\n* The number of samples for treatment '"+treatment["name"]+"' ("+str(len(treatment["sample_index"]))+") is different from the total number of samples ("+str(len(self.samples))+").\n")
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
67
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
68 if(only_integers):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
69 treatment_type = "integer"
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
70 else:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
71 treatment_type = "string"
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
72
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
73 if(self.treatments.has_key(treatment["name"])):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
74 sys.stderr.write("Error:\n* Treatment was already added: '"+treatment["name"]+"\n")
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
75 else:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
76 self.treatments[treatment["name"]] = treatment
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
77 self.treatment_types[treatment["name"]] = treatment_type
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
78 print " - Treatment of type: "+treatment_type+" is valid: "+treatment["name"]
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
79
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
80 def add_names_argument(self,names_argument):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
81 for sample in names_argument:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
82 self.add_name(sample)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
83
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
84 def add_name(self,argument):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
85 argument = argument.split(":",1)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
86 self.names[argument[0]] = argument[1]
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
87
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
88 def add_unpaired_samples_argument(self,argument):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
89 print " - Adding unpaired samples"
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
90 for sample in argument:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
91 self.add_sample(sample)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
92
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
93 def add_paired_samples_argument(self,argument):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
94 print " - Adding paired samples"
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
95 pair = []
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
96 for potential_sample in argument:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
97 if(potential_sample == ":"):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
98 self.add_pair(pair)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
99 pair = []
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
100 else:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
101 pair.append(potential_sample)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
102
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
103 def add_treatments_argument(self,argument):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
104 print " - Adding treatments"
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
105 treatment_argument = []
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
106 for potential_treatment in argument:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
107 if(potential_treatment == ":"):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
108 self.add_treatment_argument(treatment_argument)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
109 treatment_argument = []
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
110 else:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
111 treatment_argument.append(potential_treatment)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
112
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
113 def add_pairings(self):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
114 samples = sorted(self.samples)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
115 treatment_arguments = []
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
116
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
117 if len(self.pairs) > 0:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
118 for pair_id in range(len(self.pairs)):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
119 # add to treatments
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
120 treatment_arguments.append(str(pair_id+1))
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
121 treatment_arguments.append(",".join(self.pairs[pair_id]))
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
122
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
123 # remove from samples
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
124 for sample in self.pairs[pair_id]:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
125 samples.remove(sample)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
126
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
127 # Add remaining samples as separate factors
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
128 i = len(self.pairs)+1
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
129 for sample in samples:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
130 treatment_arguments.append(str(i))
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
131 treatment_arguments.append(sample)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
132 i += 1
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
133
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
134 treatment_arguments.append(":")
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
135
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
136 self.add_treatments_argument(treatment_arguments)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
137
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
138 def export(self,output):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
139 # Open file stream
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
140 if(args.output == "-"):
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
141 fh = sys.stdout
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
142 else:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
143 fh = open(args.output,"w")
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
144
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
145 # Write header:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
146 fh.write("filename\tname\t"+"\t".join(self.treatments.keys())+"\n")
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
147
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
148 #fh.write("#\ttype")
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
149 #for treatment in self.treatments.keys():
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
150 # fh.write("\t"+self.treatment_types[treatment])
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
151 #fh.write("\n")
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
152
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
153 # Write body:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
154 for sample in self.samples:
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
155 fh.write(sample+"\t"+self.names[sample])
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
156 for treatment in self.treatments.keys():
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
157 fh.write("\t"+self.treatments[treatment]["sample_index"][sample])
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
158 fh.write("\n")
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
159
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
160 fh.close()
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
161
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
162 if __name__=="__main__":
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
163 parser = argparse.ArgumentParser(description="Create an edgeR design matrix with read-count datasets.")
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
164 parser.add_argument("-o","--output", help="Output file, '-' for stdout.",required=True)
10
a6ce4b7710d8 Uploaded
yhoogstrate
parents: 9
diff changeset
165 parser.add_argument("-u","--unpaired-samples", nargs="*", help='Unpaired sample files.')
a6ce4b7710d8 Uploaded
yhoogstrate
parents: 9
diff changeset
166 parser.add_argument("-p","--paired-samples", nargs="*", help='Paired sample files: pat_x_sample_1.txt, .')
9
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
167 parser.add_argument("-t","--treatments", nargs="+", help='Treatment conditions',required=True)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
168 parser.add_argument("-n","--sample-names", nargs="+", help='Paired sample files: pat_x_sample_1.txt:NAME .')
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
169
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
170 args = parser.parse_args()
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
171
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
172 s = sampleContainer()
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
173 s.add_unpaired_samples_argument(args.unpaired_samples)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
174 s.add_paired_samples_argument(args.paired_samples)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
175 s.add_treatments_argument(args.treatments)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
176 s.add_names_argument(args.sample_names)
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
177 s.add_pairings()
82bff7137e81 Uploaded
yhoogstrate
parents:
diff changeset
178 s.export(args.output)