Mercurial > repos > yhoogstrate > edger_with_design_matrix
view edgeR_Design_Matrix.py @ 10:a6ce4b7710d8 draft
Uploaded
author | yhoogstrate |
---|---|
date | Thu, 09 Jan 2014 05:42:44 -0500 |
parents | 82bff7137e81 |
children |
line wrap: on
line source
#!/usr/bin/env python import argparse, os, shutil, sys, tempfile, subprocess class sampleContainer: def __init__(self): self.samples = [] self.pairs = [] self.pair_index = {} self.treatments = {} self.treatment_types = {} self.names = {} def add_sample(self,sample): if(sample in self.samples): sys.stderr.write("Error:\n* Sample has been added multiple times: "+sample+"\n") sys.exit(1) else: self.samples.append(sample) print " - Added: "+sample def add_pair(self,pair): print " - Adding pair:" for sample in pair: self.add_sample(sample) self.pairs.append(pair) pair_id = len(self.pairs) for sample in pair: self.pair_index[sample] = pair_id def add_treatment_argument(self,treatment_argument,name=None): print " - Parsing treatment" treatment = {"factor_index":{},"sample_index":{}} only_integers = True i = 1 for item in treatment_argument: if(i % 2): factor = item if(treatment['factor_index'].has_key(factor)): sys.stderr.write("Error:\n* Factor has been added multiple times to treatment: "+factor+"\n") else: print " - Adding factor: "+factor treatment["factor_index"][factor] = [] if(not factor.isdigit()): only_integers = False else: for sample in item.split(","): treatment["factor_index"][factor].append(sample) if(treatment["sample_index"].has_key(sample)): sys.stderr.write("Error:\n* Factor has been added to treatment before: "+sample+"/"+factor+", factors must be mutually exclusive!\n") else: treatment["sample_index"][sample] = factor i += 1 treatment_factors = sorted(treatment["factor_index"].keys()) if(name == None): treatment["name"] = "_vs_".join(treatment_factors) else: treatment["name"] = str(name) if(len(treatment["sample_index"]) != len(self.samples)): sys.stderr.write("Error:\n* The number of samples for treatment '"+treatment["name"]+"' ("+str(len(treatment["sample_index"]))+") is different from the total number of samples ("+str(len(self.samples))+").\n") if(only_integers): treatment_type = "integer" else: treatment_type = "string" if(self.treatments.has_key(treatment["name"])): sys.stderr.write("Error:\n* Treatment was already added: '"+treatment["name"]+"\n") else: self.treatments[treatment["name"]] = treatment self.treatment_types[treatment["name"]] = treatment_type print " - Treatment of type: "+treatment_type+" is valid: "+treatment["name"] def add_names_argument(self,names_argument): for sample in names_argument: self.add_name(sample) def add_name(self,argument): argument = argument.split(":",1) self.names[argument[0]] = argument[1] def add_unpaired_samples_argument(self,argument): print " - Adding unpaired samples" for sample in argument: self.add_sample(sample) def add_paired_samples_argument(self,argument): print " - Adding paired samples" pair = [] for potential_sample in argument: if(potential_sample == ":"): self.add_pair(pair) pair = [] else: pair.append(potential_sample) def add_treatments_argument(self,argument): print " - Adding treatments" treatment_argument = [] for potential_treatment in argument: if(potential_treatment == ":"): self.add_treatment_argument(treatment_argument) treatment_argument = [] else: treatment_argument.append(potential_treatment) def add_pairings(self): samples = sorted(self.samples) treatment_arguments = [] if len(self.pairs) > 0: for pair_id in range(len(self.pairs)): # add to treatments treatment_arguments.append(str(pair_id+1)) treatment_arguments.append(",".join(self.pairs[pair_id])) # remove from samples for sample in self.pairs[pair_id]: samples.remove(sample) # Add remaining samples as separate factors i = len(self.pairs)+1 for sample in samples: treatment_arguments.append(str(i)) treatment_arguments.append(sample) i += 1 treatment_arguments.append(":") self.add_treatments_argument(treatment_arguments) def export(self,output): # Open file stream if(args.output == "-"): fh = sys.stdout else: fh = open(args.output,"w") # Write header: fh.write("filename\tname\t"+"\t".join(self.treatments.keys())+"\n") #fh.write("#\ttype") #for treatment in self.treatments.keys(): # fh.write("\t"+self.treatment_types[treatment]) #fh.write("\n") # Write body: for sample in self.samples: fh.write(sample+"\t"+self.names[sample]) for treatment in self.treatments.keys(): fh.write("\t"+self.treatments[treatment]["sample_index"][sample]) fh.write("\n") fh.close() if __name__=="__main__": parser = argparse.ArgumentParser(description="Create an edgeR design matrix with read-count datasets.") parser.add_argument("-o","--output", help="Output file, '-' for stdout.",required=True) parser.add_argument("-u","--unpaired-samples", nargs="*", help='Unpaired sample files.') parser.add_argument("-p","--paired-samples", nargs="*", help='Paired sample files: pat_x_sample_1.txt, .') parser.add_argument("-t","--treatments", nargs="+", help='Treatment conditions',required=True) parser.add_argument("-n","--sample-names", nargs="+", help='Paired sample files: pat_x_sample_1.txt:NAME .') args = parser.parse_args() s = sampleContainer() s.add_unpaired_samples_argument(args.unpaired_samples) s.add_paired_samples_argument(args.paired_samples) s.add_treatments_argument(args.treatments) s.add_names_argument(args.sample_names) s.add_pairings() s.export(args.output)