comparison edgeR_Design_Matrix.py @ 9:82bff7137e81 draft

Uploaded
author yhoogstrate
date Thu, 09 Jan 2014 05:24:29 -0500
parents
children a6ce4b7710d8
comparison
equal deleted inserted replaced
8:8c71e4c3c054 9:82bff7137e81
1 #!/usr/bin/env python
2 import argparse, os, shutil, sys, tempfile, subprocess
3
4
5 class sampleContainer:
6 def __init__(self):
7 self.samples = []
8 self.pairs = []
9 self.pair_index = {}
10 self.treatments = {}
11 self.treatment_types = {}
12 self.names = {}
13
14 def add_sample(self,sample):
15 if(sample in self.samples):
16 sys.stderr.write("Error:\n* Sample has been added multiple times: "+sample+"\n")
17 sys.exit(1)
18 else:
19 self.samples.append(sample)
20 print " - Added: "+sample
21
22 def add_pair(self,pair):
23 print " - Adding pair:"
24 for sample in pair:
25 self.add_sample(sample)
26 self.pairs.append(pair)
27 pair_id = len(self.pairs)
28
29 for sample in pair:
30 self.pair_index[sample] = pair_id
31
32 def add_treatment_argument(self,treatment_argument,name=None):
33 print " - Parsing treatment"
34
35 treatment = {"factor_index":{},"sample_index":{}}
36 only_integers = True
37
38 i = 1
39 for item in treatment_argument:
40 if(i % 2):
41 factor = item
42 if(treatment['factor_index'].has_key(factor)):
43 sys.stderr.write("Error:\n* Factor has been added multiple times to treatment: "+factor+"\n")
44 else:
45 print " - Adding factor: "+factor
46 treatment["factor_index"][factor] = []
47 if(not factor.isdigit()):
48 only_integers = False
49 else:
50 for sample in item.split(","):
51 treatment["factor_index"][factor].append(sample)
52 if(treatment["sample_index"].has_key(sample)):
53 sys.stderr.write("Error:\n* Factor has been added to treatment before: "+sample+"/"+factor+", factors must be mutually exclusive!\n")
54 else:
55 treatment["sample_index"][sample] = factor
56 i += 1
57
58 treatment_factors = sorted(treatment["factor_index"].keys())
59
60 if(name == None):
61 treatment["name"] = "_vs_".join(treatment_factors)
62 else:
63 treatment["name"] = str(name)
64
65 if(len(treatment["sample_index"]) != len(self.samples)):
66 sys.stderr.write("Error:\n* The number of samples for treatment '"+treatment["name"]+"' ("+str(len(treatment["sample_index"]))+") is different from the total number of samples ("+str(len(self.samples))+").\n")
67
68 if(only_integers):
69 treatment_type = "integer"
70 else:
71 treatment_type = "string"
72
73 if(self.treatments.has_key(treatment["name"])):
74 sys.stderr.write("Error:\n* Treatment was already added: '"+treatment["name"]+"\n")
75 else:
76 self.treatments[treatment["name"]] = treatment
77 self.treatment_types[treatment["name"]] = treatment_type
78 print " - Treatment of type: "+treatment_type+" is valid: "+treatment["name"]
79
80 def add_names_argument(self,names_argument):
81 for sample in names_argument:
82 self.add_name(sample)
83
84 def add_name(self,argument):
85 argument = argument.split(":",1)
86 self.names[argument[0]] = argument[1]
87
88 def add_unpaired_samples_argument(self,argument):
89 print " - Adding unpaired samples"
90 for sample in argument:
91 self.add_sample(sample)
92
93 def add_paired_samples_argument(self,argument):
94 print " - Adding paired samples"
95 pair = []
96 for potential_sample in argument:
97 if(potential_sample == ":"):
98 self.add_pair(pair)
99 pair = []
100 else:
101 pair.append(potential_sample)
102
103 def add_treatments_argument(self,argument):
104 print " - Adding treatments"
105 treatment_argument = []
106 for potential_treatment in argument:
107 if(potential_treatment == ":"):
108 self.add_treatment_argument(treatment_argument)
109 treatment_argument = []
110 else:
111 treatment_argument.append(potential_treatment)
112
113 def add_pairings(self):
114 samples = sorted(self.samples)
115 treatment_arguments = []
116
117 if len(self.pairs) > 0:
118 for pair_id in range(len(self.pairs)):
119 # add to treatments
120 treatment_arguments.append(str(pair_id+1))
121 treatment_arguments.append(",".join(self.pairs[pair_id]))
122
123 # remove from samples
124 for sample in self.pairs[pair_id]:
125 samples.remove(sample)
126
127 # Add remaining samples as separate factors
128 i = len(self.pairs)+1
129 for sample in samples:
130 treatment_arguments.append(str(i))
131 treatment_arguments.append(sample)
132 i += 1
133
134 treatment_arguments.append(":")
135
136 self.add_treatments_argument(treatment_arguments)
137
138 def export(self,output):
139 # Open file stream
140 if(args.output == "-"):
141 fh = sys.stdout
142 else:
143 fh = open(args.output,"w")
144
145 # Write header:
146 fh.write("filename\tname\t"+"\t".join(self.treatments.keys())+"\n")
147
148 #fh.write("#\ttype")
149 #for treatment in self.treatments.keys():
150 # fh.write("\t"+self.treatment_types[treatment])
151 #fh.write("\n")
152
153 # Write body:
154 for sample in self.samples:
155 fh.write(sample+"\t"+self.names[sample])
156 for treatment in self.treatments.keys():
157 fh.write("\t"+self.treatments[treatment]["sample_index"][sample])
158 fh.write("\n")
159
160 fh.close()
161
162 if __name__=="__main__":
163 parser = argparse.ArgumentParser(description="Create an edgeR design matrix with read-count datasets.")
164 parser.add_argument("-o","--output", help="Output file, '-' for stdout.",required=True)
165 parser.add_argument("-u","--unpaired-samples", nargs="+", help='Unpaired sample files.')
166 parser.add_argument("-p","--paired-samples", nargs="+", help='Paired sample files: pat_x_sample_1.txt, .')
167 parser.add_argument("-t","--treatments", nargs="+", help='Treatment conditions',required=True)
168 parser.add_argument("-n","--sample-names", nargs="+", help='Paired sample files: pat_x_sample_1.txt:NAME .')
169
170 args = parser.parse_args()
171
172 s = sampleContainer()
173 s.add_unpaired_samples_argument(args.unpaired_samples)
174 s.add_paired_samples_argument(args.paired_samples)
175 s.add_treatments_argument(args.treatments)
176 s.add_names_argument(args.sample_names)
177 s.add_pairings()
178 s.export(args.output)