Mercurial > repos > yhoogstrate > edger_with_design_matrix
changeset 9:82bff7137e81 draft
Uploaded
author | yhoogstrate |
---|---|
date | Thu, 09 Jan 2014 05:24:29 -0500 |
parents | 8c71e4c3c054 |
children | a6ce4b7710d8 |
files | edgeR_Design_Matrix.py |
diffstat | 1 files changed, 178 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/edgeR_Design_Matrix.py Thu Jan 09 05:24:29 2014 -0500 @@ -0,0 +1,178 @@ +#!/usr/bin/env python +import argparse, os, shutil, sys, tempfile, subprocess + + +class sampleContainer: + def __init__(self): + self.samples = [] + self.pairs = [] + self.pair_index = {} + self.treatments = {} + self.treatment_types = {} + self.names = {} + + def add_sample(self,sample): + if(sample in self.samples): + sys.stderr.write("Error:\n* Sample has been added multiple times: "+sample+"\n") + sys.exit(1) + else: + self.samples.append(sample) + print " - Added: "+sample + + def add_pair(self,pair): + print " - Adding pair:" + for sample in pair: + self.add_sample(sample) + self.pairs.append(pair) + pair_id = len(self.pairs) + + for sample in pair: + self.pair_index[sample] = pair_id + + def add_treatment_argument(self,treatment_argument,name=None): + print " - Parsing treatment" + + treatment = {"factor_index":{},"sample_index":{}} + only_integers = True + + i = 1 + for item in treatment_argument: + if(i % 2): + factor = item + if(treatment['factor_index'].has_key(factor)): + sys.stderr.write("Error:\n* Factor has been added multiple times to treatment: "+factor+"\n") + else: + print " - Adding factor: "+factor + treatment["factor_index"][factor] = [] + if(not factor.isdigit()): + only_integers = False + else: + for sample in item.split(","): + treatment["factor_index"][factor].append(sample) + if(treatment["sample_index"].has_key(sample)): + sys.stderr.write("Error:\n* Factor has been added to treatment before: "+sample+"/"+factor+", factors must be mutually exclusive!\n") + else: + treatment["sample_index"][sample] = factor + i += 1 + + treatment_factors = sorted(treatment["factor_index"].keys()) + + if(name == None): + treatment["name"] = "_vs_".join(treatment_factors) + else: + treatment["name"] = str(name) + + if(len(treatment["sample_index"]) != len(self.samples)): + sys.stderr.write("Error:\n* The number of samples for treatment '"+treatment["name"]+"' ("+str(len(treatment["sample_index"]))+") is different from the total number of samples ("+str(len(self.samples))+").\n") + + if(only_integers): + treatment_type = "integer" + else: + treatment_type = "string" + + if(self.treatments.has_key(treatment["name"])): + sys.stderr.write("Error:\n* Treatment was already added: '"+treatment["name"]+"\n") + else: + self.treatments[treatment["name"]] = treatment + self.treatment_types[treatment["name"]] = treatment_type + print " - Treatment of type: "+treatment_type+" is valid: "+treatment["name"] + + def add_names_argument(self,names_argument): + for sample in names_argument: + self.add_name(sample) + + def add_name(self,argument): + argument = argument.split(":",1) + self.names[argument[0]] = argument[1] + + def add_unpaired_samples_argument(self,argument): + print " - Adding unpaired samples" + for sample in argument: + self.add_sample(sample) + + def add_paired_samples_argument(self,argument): + print " - Adding paired samples" + pair = [] + for potential_sample in argument: + if(potential_sample == ":"): + self.add_pair(pair) + pair = [] + else: + pair.append(potential_sample) + + def add_treatments_argument(self,argument): + print " - Adding treatments" + treatment_argument = [] + for potential_treatment in argument: + if(potential_treatment == ":"): + self.add_treatment_argument(treatment_argument) + treatment_argument = [] + else: + treatment_argument.append(potential_treatment) + + def add_pairings(self): + samples = sorted(self.samples) + treatment_arguments = [] + + if len(self.pairs) > 0: + for pair_id in range(len(self.pairs)): + # add to treatments + treatment_arguments.append(str(pair_id+1)) + treatment_arguments.append(",".join(self.pairs[pair_id])) + + # remove from samples + for sample in self.pairs[pair_id]: + samples.remove(sample) + + # Add remaining samples as separate factors + i = len(self.pairs)+1 + for sample in samples: + treatment_arguments.append(str(i)) + treatment_arguments.append(sample) + i += 1 + + treatment_arguments.append(":") + + self.add_treatments_argument(treatment_arguments) + + def export(self,output): + # Open file stream + if(args.output == "-"): + fh = sys.stdout + else: + fh = open(args.output,"w") + + # Write header: + fh.write("filename\tname\t"+"\t".join(self.treatments.keys())+"\n") + + #fh.write("#\ttype") + #for treatment in self.treatments.keys(): + # fh.write("\t"+self.treatment_types[treatment]) + #fh.write("\n") + + # Write body: + for sample in self.samples: + fh.write(sample+"\t"+self.names[sample]) + for treatment in self.treatments.keys(): + fh.write("\t"+self.treatments[treatment]["sample_index"][sample]) + fh.write("\n") + + fh.close() + +if __name__=="__main__": + parser = argparse.ArgumentParser(description="Create an edgeR design matrix with read-count datasets.") + parser.add_argument("-o","--output", help="Output file, '-' for stdout.",required=True) + parser.add_argument("-u","--unpaired-samples", nargs="+", help='Unpaired sample files.') + parser.add_argument("-p","--paired-samples", nargs="+", help='Paired sample files: pat_x_sample_1.txt, .') + parser.add_argument("-t","--treatments", nargs="+", help='Treatment conditions',required=True) + parser.add_argument("-n","--sample-names", nargs="+", help='Paired sample files: pat_x_sample_1.txt:NAME .') + + args = parser.parse_args() + + s = sampleContainer() + s.add_unpaired_samples_argument(args.unpaired_samples) + s.add_paired_samples_argument(args.paired_samples) + s.add_treatments_argument(args.treatments) + s.add_names_argument(args.sample_names) + s.add_pairings() + s.export(args.output)