Mercurial > repos > yhoogstrate > edger_with_design_matrix
comparison edgeR_Design_Matrix.py @ 9:82bff7137e81 draft
Uploaded
| author | yhoogstrate |
|---|---|
| date | Thu, 09 Jan 2014 05:24:29 -0500 |
| parents | |
| children | a6ce4b7710d8 |
comparison
equal
deleted
inserted
replaced
| 8:8c71e4c3c054 | 9:82bff7137e81 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import argparse, os, shutil, sys, tempfile, subprocess | |
| 3 | |
| 4 | |
| 5 class sampleContainer: | |
| 6 def __init__(self): | |
| 7 self.samples = [] | |
| 8 self.pairs = [] | |
| 9 self.pair_index = {} | |
| 10 self.treatments = {} | |
| 11 self.treatment_types = {} | |
| 12 self.names = {} | |
| 13 | |
| 14 def add_sample(self,sample): | |
| 15 if(sample in self.samples): | |
| 16 sys.stderr.write("Error:\n* Sample has been added multiple times: "+sample+"\n") | |
| 17 sys.exit(1) | |
| 18 else: | |
| 19 self.samples.append(sample) | |
| 20 print " - Added: "+sample | |
| 21 | |
| 22 def add_pair(self,pair): | |
| 23 print " - Adding pair:" | |
| 24 for sample in pair: | |
| 25 self.add_sample(sample) | |
| 26 self.pairs.append(pair) | |
| 27 pair_id = len(self.pairs) | |
| 28 | |
| 29 for sample in pair: | |
| 30 self.pair_index[sample] = pair_id | |
| 31 | |
| 32 def add_treatment_argument(self,treatment_argument,name=None): | |
| 33 print " - Parsing treatment" | |
| 34 | |
| 35 treatment = {"factor_index":{},"sample_index":{}} | |
| 36 only_integers = True | |
| 37 | |
| 38 i = 1 | |
| 39 for item in treatment_argument: | |
| 40 if(i % 2): | |
| 41 factor = item | |
| 42 if(treatment['factor_index'].has_key(factor)): | |
| 43 sys.stderr.write("Error:\n* Factor has been added multiple times to treatment: "+factor+"\n") | |
| 44 else: | |
| 45 print " - Adding factor: "+factor | |
| 46 treatment["factor_index"][factor] = [] | |
| 47 if(not factor.isdigit()): | |
| 48 only_integers = False | |
| 49 else: | |
| 50 for sample in item.split(","): | |
| 51 treatment["factor_index"][factor].append(sample) | |
| 52 if(treatment["sample_index"].has_key(sample)): | |
| 53 sys.stderr.write("Error:\n* Factor has been added to treatment before: "+sample+"/"+factor+", factors must be mutually exclusive!\n") | |
| 54 else: | |
| 55 treatment["sample_index"][sample] = factor | |
| 56 i += 1 | |
| 57 | |
| 58 treatment_factors = sorted(treatment["factor_index"].keys()) | |
| 59 | |
| 60 if(name == None): | |
| 61 treatment["name"] = "_vs_".join(treatment_factors) | |
| 62 else: | |
| 63 treatment["name"] = str(name) | |
| 64 | |
| 65 if(len(treatment["sample_index"]) != len(self.samples)): | |
| 66 sys.stderr.write("Error:\n* The number of samples for treatment '"+treatment["name"]+"' ("+str(len(treatment["sample_index"]))+") is different from the total number of samples ("+str(len(self.samples))+").\n") | |
| 67 | |
| 68 if(only_integers): | |
| 69 treatment_type = "integer" | |
| 70 else: | |
| 71 treatment_type = "string" | |
| 72 | |
| 73 if(self.treatments.has_key(treatment["name"])): | |
| 74 sys.stderr.write("Error:\n* Treatment was already added: '"+treatment["name"]+"\n") | |
| 75 else: | |
| 76 self.treatments[treatment["name"]] = treatment | |
| 77 self.treatment_types[treatment["name"]] = treatment_type | |
| 78 print " - Treatment of type: "+treatment_type+" is valid: "+treatment["name"] | |
| 79 | |
| 80 def add_names_argument(self,names_argument): | |
| 81 for sample in names_argument: | |
| 82 self.add_name(sample) | |
| 83 | |
| 84 def add_name(self,argument): | |
| 85 argument = argument.split(":",1) | |
| 86 self.names[argument[0]] = argument[1] | |
| 87 | |
| 88 def add_unpaired_samples_argument(self,argument): | |
| 89 print " - Adding unpaired samples" | |
| 90 for sample in argument: | |
| 91 self.add_sample(sample) | |
| 92 | |
| 93 def add_paired_samples_argument(self,argument): | |
| 94 print " - Adding paired samples" | |
| 95 pair = [] | |
| 96 for potential_sample in argument: | |
| 97 if(potential_sample == ":"): | |
| 98 self.add_pair(pair) | |
| 99 pair = [] | |
| 100 else: | |
| 101 pair.append(potential_sample) | |
| 102 | |
| 103 def add_treatments_argument(self,argument): | |
| 104 print " - Adding treatments" | |
| 105 treatment_argument = [] | |
| 106 for potential_treatment in argument: | |
| 107 if(potential_treatment == ":"): | |
| 108 self.add_treatment_argument(treatment_argument) | |
| 109 treatment_argument = [] | |
| 110 else: | |
| 111 treatment_argument.append(potential_treatment) | |
| 112 | |
| 113 def add_pairings(self): | |
| 114 samples = sorted(self.samples) | |
| 115 treatment_arguments = [] | |
| 116 | |
| 117 if len(self.pairs) > 0: | |
| 118 for pair_id in range(len(self.pairs)): | |
| 119 # add to treatments | |
| 120 treatment_arguments.append(str(pair_id+1)) | |
| 121 treatment_arguments.append(",".join(self.pairs[pair_id])) | |
| 122 | |
| 123 # remove from samples | |
| 124 for sample in self.pairs[pair_id]: | |
| 125 samples.remove(sample) | |
| 126 | |
| 127 # Add remaining samples as separate factors | |
| 128 i = len(self.pairs)+1 | |
| 129 for sample in samples: | |
| 130 treatment_arguments.append(str(i)) | |
| 131 treatment_arguments.append(sample) | |
| 132 i += 1 | |
| 133 | |
| 134 treatment_arguments.append(":") | |
| 135 | |
| 136 self.add_treatments_argument(treatment_arguments) | |
| 137 | |
| 138 def export(self,output): | |
| 139 # Open file stream | |
| 140 if(args.output == "-"): | |
| 141 fh = sys.stdout | |
| 142 else: | |
| 143 fh = open(args.output,"w") | |
| 144 | |
| 145 # Write header: | |
| 146 fh.write("filename\tname\t"+"\t".join(self.treatments.keys())+"\n") | |
| 147 | |
| 148 #fh.write("#\ttype") | |
| 149 #for treatment in self.treatments.keys(): | |
| 150 # fh.write("\t"+self.treatment_types[treatment]) | |
| 151 #fh.write("\n") | |
| 152 | |
| 153 # Write body: | |
| 154 for sample in self.samples: | |
| 155 fh.write(sample+"\t"+self.names[sample]) | |
| 156 for treatment in self.treatments.keys(): | |
| 157 fh.write("\t"+self.treatments[treatment]["sample_index"][sample]) | |
| 158 fh.write("\n") | |
| 159 | |
| 160 fh.close() | |
| 161 | |
| 162 if __name__=="__main__": | |
| 163 parser = argparse.ArgumentParser(description="Create an edgeR design matrix with read-count datasets.") | |
| 164 parser.add_argument("-o","--output", help="Output file, '-' for stdout.",required=True) | |
| 165 parser.add_argument("-u","--unpaired-samples", nargs="+", help='Unpaired sample files.') | |
| 166 parser.add_argument("-p","--paired-samples", nargs="+", help='Paired sample files: pat_x_sample_1.txt, .') | |
| 167 parser.add_argument("-t","--treatments", nargs="+", help='Treatment conditions',required=True) | |
| 168 parser.add_argument("-n","--sample-names", nargs="+", help='Paired sample files: pat_x_sample_1.txt:NAME .') | |
| 169 | |
| 170 args = parser.parse_args() | |
| 171 | |
| 172 s = sampleContainer() | |
| 173 s.add_unpaired_samples_argument(args.unpaired_samples) | |
| 174 s.add_paired_samples_argument(args.paired_samples) | |
| 175 s.add_treatments_argument(args.treatments) | |
| 176 s.add_names_argument(args.sample_names) | |
| 177 s.add_pairings() | |
| 178 s.export(args.output) |
