changeset 9:82bff7137e81 draft

Uploaded
author yhoogstrate
date Thu, 09 Jan 2014 05:24:29 -0500
parents 8c71e4c3c054
children a6ce4b7710d8
files edgeR_Design_Matrix.py
diffstat 1 files changed, 178 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/edgeR_Design_Matrix.py	Thu Jan 09 05:24:29 2014 -0500
@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+import argparse, os, shutil, sys, tempfile, subprocess
+
+
+class sampleContainer:
+	def __init__(self):
+		self.samples = []
+		self.pairs = []
+		self.pair_index = {}
+		self.treatments = {}
+		self.treatment_types = {}
+		self.names = {}
+	
+	def add_sample(self,sample):
+		if(sample in self.samples):
+			sys.stderr.write("Error:\n* Sample has been added multiple times: "+sample+"\n")
+			sys.exit(1)
+		else:
+			self.samples.append(sample)
+			print "     - Added: "+sample
+	
+	def add_pair(self,pair):
+		print "   - Adding pair:"
+		for sample in pair:
+			self.add_sample(sample)
+		self.pairs.append(pair)
+		pair_id = len(self.pairs)
+		
+		for sample in pair:
+			self.pair_index[sample] = pair_id
+	
+	def add_treatment_argument(self,treatment_argument,name=None):
+		print " - Parsing treatment"
+		
+		treatment = {"factor_index":{},"sample_index":{}}
+		only_integers = True
+		
+		i = 1
+		for item in treatment_argument:
+			if(i % 2):
+				factor = item
+				if(treatment['factor_index'].has_key(factor)):
+					sys.stderr.write("Error:\n* Factor has been added multiple times to treatment: "+factor+"\n")
+				else:
+					print "   - Adding factor: "+factor
+					treatment["factor_index"][factor] = []
+					if(not factor.isdigit()):
+						only_integers = False
+			else:
+				for sample in item.split(","):
+					treatment["factor_index"][factor].append(sample)
+					if(treatment["sample_index"].has_key(sample)):
+						sys.stderr.write("Error:\n* Factor has been added to treatment before: "+sample+"/"+factor+", factors must be mutually exclusive!\n")
+					else:
+						treatment["sample_index"][sample] = factor
+			i += 1
+		
+		treatment_factors = sorted(treatment["factor_index"].keys())
+		
+		if(name == None):
+			treatment["name"] = "_vs_".join(treatment_factors)
+		else:
+			treatment["name"] = str(name)
+		
+		if(len(treatment["sample_index"]) != len(self.samples)):
+			sys.stderr.write("Error:\n* The number of samples for treatment '"+treatment["name"]+"' ("+str(len(treatment["sample_index"]))+") is different from the total number of samples ("+str(len(self.samples))+").\n")
+		
+		if(only_integers):
+			treatment_type = "integer"
+		else:
+			treatment_type = "string"
+		
+		if(self.treatments.has_key(treatment["name"])):
+			sys.stderr.write("Error:\n* Treatment was already added: '"+treatment["name"]+"\n")
+		else:
+			self.treatments[treatment["name"]] = treatment
+			self.treatment_types[treatment["name"]] = treatment_type
+			print "     - Treatment of type: "+treatment_type+" is valid: "+treatment["name"]
+	
+	def add_names_argument(self,names_argument):
+		for sample in names_argument:
+			self.add_name(sample)
+	
+	def add_name(self,argument):
+		argument = argument.split(":",1)
+		self.names[argument[0]] = argument[1]
+	
+	def add_unpaired_samples_argument(self,argument):
+		print " - Adding unpaired samples"
+		for sample in argument:
+			self.add_sample(sample)
+	
+	def add_paired_samples_argument(self,argument):
+		print " - Adding paired samples"
+		pair = []
+		for potential_sample in argument:
+			if(potential_sample == ":"):
+				self.add_pair(pair)
+				pair = []
+			else:
+				pair.append(potential_sample)
+	
+	def add_treatments_argument(self,argument):
+		print " - Adding treatments"
+		treatment_argument = []
+		for potential_treatment in argument:
+			if(potential_treatment == ":"):
+				self.add_treatment_argument(treatment_argument)
+				treatment_argument = []
+			else:
+				treatment_argument.append(potential_treatment)
+	
+	def add_pairings(self):
+		samples = sorted(self.samples)
+		treatment_arguments = []
+		
+		if len(self.pairs) > 0:
+			for pair_id in range(len(self.pairs)):
+				# add to treatments
+				treatment_arguments.append(str(pair_id+1))
+				treatment_arguments.append(",".join(self.pairs[pair_id]))
+				
+				# remove from samples
+				for sample in self.pairs[pair_id]:
+					samples.remove(sample)
+		
+		# Add remaining samples as separate factors
+		i = len(self.pairs)+1
+		for sample in samples:
+			treatment_arguments.append(str(i))
+			treatment_arguments.append(sample)
+			i += 1
+		
+		treatment_arguments.append(":")
+		
+		self.add_treatments_argument(treatment_arguments)
+	
+	def export(self,output):
+		# Open file stream
+		if(args.output == "-"):
+			fh = sys.stdout
+		else:
+			fh = open(args.output,"w")
+		
+		# Write header:
+		fh.write("filename\tname\t"+"\t".join(self.treatments.keys())+"\n")
+		
+		#fh.write("#\ttype")
+		#for treatment in self.treatments.keys():
+		#	fh.write("\t"+self.treatment_types[treatment])
+		#fh.write("\n")
+		
+		# Write body:
+		for sample in self.samples:
+			fh.write(sample+"\t"+self.names[sample])
+			for treatment in self.treatments.keys():
+				fh.write("\t"+self.treatments[treatment]["sample_index"][sample])
+			fh.write("\n")
+		
+		fh.close()
+
+if __name__=="__main__":
+	parser = argparse.ArgumentParser(description="Create an edgeR design matrix with read-count datasets.")
+	parser.add_argument("-o","--output", help="Output file, '-' for stdout.",required=True)
+	parser.add_argument("-u","--unpaired-samples", nargs="+", help='Unpaired sample files.')
+	parser.add_argument("-p","--paired-samples", nargs="+", help='Paired sample files: pat_x_sample_1.txt, .')
+	parser.add_argument("-t","--treatments", nargs="+", help='Treatment conditions',required=True)
+	parser.add_argument("-n","--sample-names", nargs="+", help='Paired sample files: pat_x_sample_1.txt:NAME .')
+	
+	args = parser.parse_args()
+	
+	s = sampleContainer()
+	s.add_unpaired_samples_argument(args.unpaired_samples)
+	s.add_paired_samples_argument(args.paired_samples)
+	s.add_treatments_argument(args.treatments)
+	s.add_names_argument(args.sample_names)
+	s.add_pairings()
+	s.export(args.output)