Mercurial > repos > yhoogstrate > edger_with_design_matrix

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/design_matrix_creator	Tue Sep 01 04:32:16 2015 -0400
@@ -0,0 +1,222 @@
+#!/usr/bin/env python
+
+import argparse, os, shutil, sys, tempfile, subprocess
+
+__version_info__ = ('1', '0', '0')#, 'beta')
+__version__ = '.'.join(__version_info__) if (len(__version_info__) == 3) else '.'.join(__version_info__[0:3])+"-"+__version_info__[3]
+__author__ = 'Youri Hoogstrate'
+__homepage__ = 'https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools'
+__license__ = 'GNU General Public License v3 (GPLv3)'
+
+
+class sampleContainer:
+	def __init__(self):
+		self.samples = []
+		self.treatments = {}
+		self.treatment_index = []
+		self.treatment_types = {}
+
+	def do_decode(self,encoded_str):
+		return encoded_str.decode("base64").strip().replace("\t",'')
+
+	def add_samples(self,argument):
+		print " - Adding samples"
+		for sample in argument:
+			self.add_sample(self.do_decode(sample))
+
+	def add_sample(self,sample):
+		if(sample in self.samples):
+			sys.stderr.write("Error:\n* Non-unique sample: "+sample+"\n")
+			sys.exit(1)
+		else:
+			self.samples.append(sample)
+			print "     - Added: "+sample
+
+	def add_blocking(self,argument):
+		print " - Adding paired samples"
+		pair = []
+		for block in argument:
+			self.add_block(block)
+
+	def add_block(self,blocks):
+		blocks = blocks.split(":")
+		as_treatment = blocks[0]
+		blocks = blocks[1:]
+
+		used_samples = []
+		indexed_samples = {}
+
+		for i in range(len(blocks)):
+			block = blocks[i]
+			samples = self.get_samples_from_block(block)
+			indexed_samples[i+1] = []
+			for sample in samples:
+				if(sample in used_samples):
+					sys.stderr.write("Error:\n* Blocking contains multiple times the same sample: "+sample+"\n")
+					sys.exit(0)
+				else:
+					indexed_samples[i+1] = block
+				used_samples.append(sample)
+
+		for sample in self.samples:
+			if(sample not in used_samples):
+				i = i + 1
+				indexed_samples[i+1] = str(sample).encode('base64').strip()
+
+		for index in indexed_samples.keys():
+			key = str(index).encode('base64').strip()
+			as_treatment += ":"+key+":"+indexed_samples[index]
+
+		self.add_treatment(as_treatment)
+
+	def get_samples_from_block(self,decoded_block):
+		return [ self.do_decode(x) for x in decoded_block.split(",")]
+
+	def add_treatments(self,argument):
+		print " - Adding treatments"
+		for treatment in argument:
+			self.add_treatment(treatment)
+
+	def add_treatment(self,treatment_argument):
+		print " - Parsing treatment"
+
+
+		treatment_argument = treatment_argument.split(":")
+		name = self.do_decode(treatment_argument[0])
+		treatment_argument = treatment_argument[1:]
+
+
+		treatment = {"factor_index":{},"sample_index":{}}
+		only_integers = True
+
+		i = 1
+		for item in treatment_argument:
+			if(i % 2):
+				factor = self.do_decode(item)
+
+				if(treatment['factor_index'].has_key(factor)):
+					sys.stderr.write("Error:\n* Factor has been added multiple times to treatment: "+factor+"\n")
+					sys.exit(0)
+				else:
+					print "   - Adding factor: "+factor
+					treatment["factor_index"][factor] = []
+					if(not factor.isdigit()):
+						only_integers = False
+			else:
+				for sample in item.split(","):
+					sample = self.do_decode(sample)
+
+					if(not sample in self.samples):
+						sys.stderr.write("Error:\n* Unknown sample: "+sample+"\n")
+						sys.exit(0)
+
+					treatment["factor_index"][factor].append(sample)
+					if(treatment["sample_index"].has_key(sample)):
+						sys.stderr.write("Error:\n* Factor has been added to treatment before: "+sample+"/"+factor+", factors must be mutually exclusive!\n")
+						sys.exit(0)
+					else:
+						treatment["sample_index"][sample] = factor
+			i += 1
+
+		treatment_factors = sorted(treatment["factor_index"].keys())
+
+		if(name == None):
+			treatment["name"] = "_vs_".join(treatment_factors)
+		else:
+			treatment["name"] = str(name)
+
+		if(len(treatment["sample_index"]) != len(self.samples)):
+			sys.stderr.write("Error:\n* The number of samples for treatment '"+treatment["name"]+"' ("+str(len(treatment["sample_index"]))+") is different from the total number of samples ("+str(len(self.samples))+").\n")
+
+		if(only_integers):
+			treatment_type = "integer"
+		else:
+			treatment_type = "string"
+
+		if(self.treatments.has_key(treatment["name"])):
+			sys.stderr.write("Error:\n* Treatment was already added: '"+treatment["name"]+"\n")
+		else:
+			self.treatments[treatment["name"]] = treatment
+			self.treatment_index.append(treatment["name"])
+			self.treatment_types[treatment["name"]] = treatment_type
+			print "     - Treatment \""+treatment["name"]+"\" of type \""+treatment_type+"\" is valid"
+
+	def export(self,output):
+		# Open file stream
+		if(args.output == "-"):
+			fh = sys.stdout
+		else:
+			fh = open(args.output,"w")
+
+		# Write header:
+		fh.write("sample-name\t"+"\t".join(self.treatment_index)+"\n")
+
+		# Write body:
+		for sample in self.samples:
+			fh.write(sample)
+			for treatment_id in self.treatment_index:
+				treatment = self.treatments[treatment_id]
+				fh.write("\t"+treatment["sample_index"][sample])
+			fh.write("\n")
+
+		fh.close()
+
+if __name__=="__main__":
+	parser = argparse.ArgumentParser(description="Create an edgeR design matrix with read-count datasets.")
+	parser.add_argument("-o","--output", help="Output file, '-' for stdout.",required=True)
+	parser.add_argument("-c","--columns-file", nargs="?", help='Use columns of [this] file as UIDs (counting from 1)')
+	parser.add_argument("-s","--sample-names", nargs="*", help='Sample names (UIDs that correspond to the columns in the expression matrix)')
+	parser.add_argument("-t","--treatments", nargs="+", help='Treatment or conditions: "name::sample:condition& (sample-names and conditions have to be provided using Base64 encoding to avoid weird characters)',required=True)
+	parser.add_argument("-b","--blocking", nargs="+", help='Description of sample blocking: "blocking_condition*&sample-1-name&sample-2-name&sample-n-name"')
+
+	args = parser.parse_args()
+
+	columns = None
+	if(args.columns_file):
+		with open(args.columns_file, "r") as f:
+			listed_columns = [None] + f.readline().strip("\n").split("\t")
+			for i in range(1,len(listed_columns)):
+				listed_columns[i] =  listed_columns[i].encode('base64').replace('\n','')
+
+	s = sampleContainer()
+
+	if(listed_columns):
+		columns = []
+		for sample in args.sample_names:
+			columns.append(listed_columns[int(sample)])
+
+
+		treatments = []
+		for treatment in args.treatments:
+			treatment = treatment.split(":")
+			for i in range(1,len(treatment)):
+				if(i%2 == 0):
+					treatment_tmp = treatment[i].split(",")
+					for j in range(len(treatment_tmp)):
+						treatment_tmp[j] = listed_columns[int(treatment_tmp[j])]
+					treatment[i] = ",".join(treatment_tmp)
+
+			treatments.append(":".join(treatment))
+
+		blockings = []
+		if(args.blocking):
+			for blocking in args.blocking:
+				blocking = blocking.split(":")
+				for i in range(1,len(blocking)):
+					block = blocking[i].split(",")
+					for j in range(len(block)):
+						block[j] = listed_columns[int(block[j])]
+					blocking[i] = ",".join(block)
+				blockings.append(":".join(blocking))
+
+		s.add_samples(columns)
+		s.add_treatments(treatments)
+		s.add_blocking(blockings)
+
+	else:
+		s.add_samples(args.sample_names)
+		s.add_treatments(args.treatments)
+		if(args.blocking):
+			s.add_blocking(args.blocking)
+
+	s.export(args.output)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/edger_dge_table_to_bedgraph	Tue Sep 01 04:32:16 2015 -0400
@@ -0,0 +1,222 @@
+#!/usr/bin/env python
+
+import re,sys,os,os.path,argparse,textwrap,datetime
+
+class GTF:
+	def __init__(self,filename,features=["exon"],symbol="gene_id"):
+		self.features = features
+		if(re.match("^[a-zA-Z0-9_\-]+$",symbol)):
+			self.symbol = symbol
+		else:
+			raise ValueError('False symbol matching symbol: '+str(symbol))
+		self.index = {}
+		self.parse(filename)
+
+	def parse(self,filename):
+		with open(filename) as infile:
+			for line in infile:
+				self.parse_line(line)
+
+	def parse_line(self,line):
+		line = line.strip().split("\t")
+		if(len(line) == 9):
+			if(line[2] in self.features):
+				gene_id = self.parse_column_9(line[8])
+				if(gene_id):
+					if(not self.index.has_key(gene_id)):
+						self.index[gene_id] = []
+					self.index[gene_id].append([line[0],line[3],line[4]])
+
+	def parse_column_9(self,line):
+		query = self.symbol+'[ =]+([^ ;]+)'
+		m = re.search(query, line)
+
+		if(m):
+			return m.group(1).strip("'").strip('"')
+		else:
+			return None
+
+	def get(self,key):
+		try:
+			return self.index[key]
+		except:
+			return False
+
+
+class EdgeR_table:
+	def __init__(self,table,gtf,columns=[3,7]):
+		self.index = {}
+		self.gtf = gtf
+		self.columns = columns
+		self.parse(table)
+
+	def parse(self,filename):
+		i = 0
+		with open(filename) as infile:
+			for line in infile:
+				if(i == 0):
+					self.parse_header(line)
+				else:
+					self.parse_line(line)
+				i += 1
+
+	def parse_header(self,line):
+		params = line.strip().split("\t")
+		if(params[1].lower().find("genes") == -1):
+			raise ValueError('False header in file - no "genes" in 2nd colum: '+line)
+		if(params[2].lower().find("logfc") == -1):
+			raise ValueError('False header in file - no "logfc" in 3rd colum: '+line)
+		if(params[3].lower().find("logcpm") == -1):
+			raise ValueError('False header in file - no "logcpm" in 4th colum: '+line)
+		if(params[4].lower().find("lr") == -1):
+			raise ValueError('False header in file - no "lr" in 5th colum: '+line)
+		if(params[5].lower().find("pvalue") == -1):
+			raise ValueError('False header in file - no "pvalue" in 6th colum: '+line)
+		if(params[6].lower().find("fdr") == -1):
+			raise ValueError('False header in file - no "fdr" in 7th colum: '+line)
+
+	def parse_line(self,line):
+		line = line.strip().split("\t")
+
+		if(len(line) == 7):
+			gene_id = line[1].strip('"').strip("'")
+			column_data = {}
+			for column in self.columns:
+				if(column in [6,7]):
+					column_data[column] = str(1.0 - float(line[column-1]))
+				else:
+					column_data[column] = line[column-1]
+
+			locations = self.gtf.get(gene_id)
+			if(not locations):
+				print "Warning: no location found for gene "+gene_id
+			else:
+				for location in locations:
+					self.insert(location,column_data)
+
+	def insert(self,location,data):
+		chrom = location[0]
+		start = location[1]
+		end = location[2]
+
+		if(not self.index.has_key(chrom)):
+			self.index[chrom] = {}
+
+		if(not self.index[chrom].has_key(start)):
+			self.index[chrom][start] = {}
+
+		if(not self.index[chrom][start].has_key(end)):
+			self.index[chrom][start][end] = []
+
+		self.index[chrom][start][end].append(data)
+
+	def export(self,filenames={3:"log_cpm.txt",7:"fdr.txt"}):
+		for column in self.columns:
+			fh = open(filenames[column],"w")
+
+			buf = False
+
+			for chrom in sorted(self.index.keys()):
+				for start in sorted(self.index[chrom].keys()):
+					for end in sorted(self.index[chrom][start].keys()):
+						fh.write(chrom+"\t"+start+"\t"+end+"\t"+self.index[chrom][start][end][0][column]+"\n")
+
+			fh.close()
+
+			os.system("sort -k1,1V -k2,2g -k3,3g '"+filenames[column]+"' > '"+filenames[column]+".sorted'")
+
+
+def remove_overlap_in_bedgraph(bedgraph_file_dirty,bedgraph_file_clean):
+	fh = open(bedgraph_file_clean,"w")
+	buf = False
+
+	with open(bedgraph_file_dirty,"r") as f:
+		for line in f:
+			cur = line.strip().split("\t")
+			cur[1] = int(cur[1])
+			cur[2] = int(cur[2])
+
+			if(not buf):
+				buf = cur
+			else:
+				if(cur[0] == buf[0] and cur[1] <= buf[2] ):
+					if(buf[1] == cur[1]):									#is subset
+						newscore = (float(buf[3])+float(cur[3]))/2
+						buf[2] = cur[2]
+						buf[3] = newscore
+					else:
+						c1 = buf[1]
+						c2 = cur[1]
+						c3 = min(buf[2],cur[2])
+						c4 = max(buf[2],cur[2])
+
+						fh.write(buf[0]+"\t"+str(c1)+"\t"+str(c2-1)+"\t"+str(buf[3])+"\n")
+
+						newscore = (float(buf[3])+float(cur[3]))/2
+						#fh.write(buf[0]+"\t"+str(c2+1)+"\t"+str(c3)+"\t"+str(newscore)+"\tp2\n")
+						#buf = [buf[0], c3+1 , c4 , cur[3]]
+
+						buf = [buf[0], c2 , c4 , cur[3]]
+
+						# find if buf is a subset -> if so, merge and send to buffer
+						# or find the overlapping region
+
+						# if current is overlapping with buffer; merge:
+						##  [       ]                 < buf
+						##       [        ]           < cur
+						##
+						##  [             ]           < buf
+						##       [    ]               < cur
+						##  111112222333333  << write 1 and 2 and keep 3 in buf
+				else:
+					fh.write(buf[0]+"\t"+str(buf[1])+"\t"+str(buf[2])+"\t"+str(buf[3])+"\n")
+					buf=cur
+
+		fh.write(buf[0]+"\t"+str(buf[1])+"\t"+str(buf[2])+"\t"+str(buf[3])+"\n")
+	fh.close()
+
+
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser()
+
+	parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,epilog="For more info please visit:\n<https://github.com/yhoogstrate/fuma>")
+
+	parser.add_argument("-t",help="CPM table to extract columns from",nargs=1,required=True)
+	parser.add_argument("-g",help="GTF file used to extract genomic location",nargs=1,required=True)
+
+	parser.add_argument("-c3",help="Output (bedgraph) for column 3 (logFC)",nargs="?",required=False)
+	parser.add_argument("-c4",help="Output (bedgraph) for column 4 (logCPM)",nargs="?",required=False)
+	parser.add_argument("-c5",help="Output (bedgraph) for column 5 (LR)",nargs="?",required=False)
+	parser.add_argument("-c6",help="Output (bedgraph) for column 6 (PValue)",nargs="?",required=False)
+	parser.add_argument("-c7",help="Output (bedgraph) for column 7 (FDR)",nargs="?",required=False)
+
+	args = parser.parse_args()
+
+	#files = {3:"VCAP_logFC.hg19.bedgraph",7:"VCAP_fdr.hg19.bedgraph"}
+	files = {}
+
+	if(args.c3):
+		files[3] = args.c3
+	if(args.c4):
+		files[4] = args.c4
+	if(args.c5):
+		files[5] = args.c5
+	if(args.c6):
+		files[6] = args.c6
+	if(args.c7):
+		files[7] = args.c7
+
+	print "Parsing GTF file"
+	g = GTF(args.g[0])
+
+	print "Parsing EdgeR table"
+	e = EdgeR_table(args.t[0],g,files.keys())
+
+	print "Exporting raw bedgraph(s)"
+	e.export(files)
+
+	print "Removing overlapping entries in bedgraph(s)"
+	for key in files.keys():
+		remove_overlap_in_bedgraph(files[key]+".sorted",files[key])
+		os.system("rm '"+files[key]+".sorted'")
--- a/design_matrix_creator	Tue Sep 01 04:25:37 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,222 +0,0 @@
-#!/usr/bin/env python
-
-import argparse, os, shutil, sys, tempfile, subprocess
-
-__version_info__ = ('1', '0', '0')#, 'beta')
-__version__ = '.'.join(__version_info__) if (len(__version_info__) == 3) else '.'.join(__version_info__[0:3])+"-"+__version_info__[3]
-__author__ = 'Youri Hoogstrate'
-__homepage__ = 'https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools'
-__license__ = 'GNU General Public License v3 (GPLv3)'
-
-
-class sampleContainer:
-	def __init__(self):
-		self.samples = []
-		self.treatments = {}
-		self.treatment_index = []
-		self.treatment_types = {}
-
-	def do_decode(self,encoded_str):
-		return encoded_str.decode("base64").strip().replace("\t",'')
-
-	def add_samples(self,argument):
-		print " - Adding samples"
-		for sample in argument:
-			self.add_sample(self.do_decode(sample))
-
-	def add_sample(self,sample):
-		if(sample in self.samples):
-			sys.stderr.write("Error:\n* Non-unique sample: "+sample+"\n")
-			sys.exit(1)
-		else:
-			self.samples.append(sample)
-			print "     - Added: "+sample
-
-	def add_blocking(self,argument):
-		print " - Adding paired samples"
-		pair = []
-		for block in argument:
-			self.add_block(block)
-
-	def add_block(self,blocks):
-		blocks = blocks.split(":")
-		as_treatment = blocks[0]
-		blocks = blocks[1:]
-
-		used_samples = []
-		indexed_samples = {}
-
-		for i in range(len(blocks)):
-			block = blocks[i]
-			samples = self.get_samples_from_block(block)
-			indexed_samples[i+1] = []
-			for sample in samples:
-				if(sample in used_samples):
-					sys.stderr.write("Error:\n* Blocking contains multiple times the same sample: "+sample+"\n")
-					sys.exit(0)
-				else:
-					indexed_samples[i+1] = block
-				used_samples.append(sample)
-
-		for sample in self.samples:
-			if(sample not in used_samples):
-				i = i + 1
-				indexed_samples[i+1] = str(sample).encode('base64').strip()
-
-		for index in indexed_samples.keys():
-			key = str(index).encode('base64').strip()
-			as_treatment += ":"+key+":"+indexed_samples[index]
-
-		self.add_treatment(as_treatment)
-
-	def get_samples_from_block(self,decoded_block):
-		return [ self.do_decode(x) for x in decoded_block.split(",")]
-
-	def add_treatments(self,argument):
-		print " - Adding treatments"
-		for treatment in argument:
-			self.add_treatment(treatment)
-
-	def add_treatment(self,treatment_argument):
-		print " - Parsing treatment"
-
-
-		treatment_argument = treatment_argument.split(":")
-		name = self.do_decode(treatment_argument[0])
-		treatment_argument = treatment_argument[1:]
-
-
-		treatment = {"factor_index":{},"sample_index":{}}
-		only_integers = True
-
-		i = 1
-		for item in treatment_argument:
-			if(i % 2):
-				factor = self.do_decode(item)
-
-				if(treatment['factor_index'].has_key(factor)):
-					sys.stderr.write("Error:\n* Factor has been added multiple times to treatment: "+factor+"\n")
-					sys.exit(0)
-				else:
-					print "   - Adding factor: "+factor
-					treatment["factor_index"][factor] = []
-					if(not factor.isdigit()):
-						only_integers = False
-			else:
-				for sample in item.split(","):
-					sample = self.do_decode(sample)
-
-					if(not sample in self.samples):
-						sys.stderr.write("Error:\n* Unknown sample: "+sample+"\n")
-						sys.exit(0)
-
-					treatment["factor_index"][factor].append(sample)
-					if(treatment["sample_index"].has_key(sample)):
-						sys.stderr.write("Error:\n* Factor has been added to treatment before: "+sample+"/"+factor+", factors must be mutually exclusive!\n")
-						sys.exit(0)
-					else:
-						treatment["sample_index"][sample] = factor
-			i += 1
-
-		treatment_factors = sorted(treatment["factor_index"].keys())
-
-		if(name == None):
-			treatment["name"] = "_vs_".join(treatment_factors)
-		else:
-			treatment["name"] = str(name)
-
-		if(len(treatment["sample_index"]) != len(self.samples)):
-			sys.stderr.write("Error:\n* The number of samples for treatment '"+treatment["name"]+"' ("+str(len(treatment["sample_index"]))+") is different from the total number of samples ("+str(len(self.samples))+").\n")
-
-		if(only_integers):
-			treatment_type = "integer"
-		else:
-			treatment_type = "string"
-
-		if(self.treatments.has_key(treatment["name"])):
-			sys.stderr.write("Error:\n* Treatment was already added: '"+treatment["name"]+"\n")
-		else:
-			self.treatments[treatment["name"]] = treatment
-			self.treatment_index.append(treatment["name"])
-			self.treatment_types[treatment["name"]] = treatment_type
-			print "     - Treatment \""+treatment["name"]+"\" of type \""+treatment_type+"\" is valid"
-
-	def export(self,output):
-		# Open file stream
-		if(args.output == "-"):
-			fh = sys.stdout
-		else:
-			fh = open(args.output,"w")
-
-		# Write header:
-		fh.write("sample-name\t"+"\t".join(self.treatment_index)+"\n")
-
-		# Write body:
-		for sample in self.samples:
-			fh.write(sample)
-			for treatment_id in self.treatment_index:
-				treatment = self.treatments[treatment_id]
-				fh.write("\t"+treatment["sample_index"][sample])
-			fh.write("\n")
-
-		fh.close()
-
-if __name__=="__main__":
-	parser = argparse.ArgumentParser(description="Create an edgeR design matrix with read-count datasets.")
-	parser.add_argument("-o","--output", help="Output file, '-' for stdout.",required=True)
-	parser.add_argument("-c","--columns-file", nargs="?", help='Use columns of [this] file as UIDs (counting from 1)')
-	parser.add_argument("-s","--sample-names", nargs="*", help='Sample names (UIDs that correspond to the columns in the expression matrix)')
-	parser.add_argument("-t","--treatments", nargs="+", help='Treatment or conditions: "name::sample:condition& (sample-names and conditions have to be provided using Base64 encoding to avoid weird characters)',required=True)
-	parser.add_argument("-b","--blocking", nargs="+", help='Description of sample blocking: "blocking_condition*&sample-1-name&sample-2-name&sample-n-name"')
-
-	args = parser.parse_args()
-
-	columns = None
-	if(args.columns_file):
-		with open(args.columns_file, "r") as f:
-			listed_columns = [None] + f.readline().strip("\n").split("\t")
-			for i in range(1,len(listed_columns)):
-				listed_columns[i] =  listed_columns[i].encode('base64').replace('\n','')
-
-	s = sampleContainer()
-
-	if(listed_columns):
-		columns = []
-		for sample in args.sample_names:
-			columns.append(listed_columns[int(sample)])
-
-
-		treatments = []
-		for treatment in args.treatments:
-			treatment = treatment.split(":")
-			for i in range(1,len(treatment)):
-				if(i%2 == 0):
-					treatment_tmp = treatment[i].split(",")
-					for j in range(len(treatment_tmp)):
-						treatment_tmp[j] = listed_columns[int(treatment_tmp[j])]
-					treatment[i] = ",".join(treatment_tmp)
-
-			treatments.append(":".join(treatment))
-
-		blockings = []
-		if(args.blocking):
-			for blocking in args.blocking:
-				blocking = blocking.split(":")
-				for i in range(1,len(blocking)):
-					block = blocking[i].split(",")
-					for j in range(len(block)):
-						block[j] = listed_columns[int(block[j])]
-					blocking[i] = ",".join(block)
-				blockings.append(":".join(blocking))
-
-		s.add_samples(columns)
-		s.add_treatments(treatments)
-		s.add_blocking(blockings)
-
-	else:
-		s.add_samples(args.sample_names)
-		s.add_treatments(args.treatments)
-		if(args.blocking):
-			s.add_blocking(args.blocking)
-
-	s.export(args.output)
--- a/edgeR_Differential_Gene_Expression.xml	Tue Sep 01 04:25:37 2015 -0400
+++ b/edgeR_Differential_Gene_Expression.xml	Tue Sep 01 04:32:16 2015 -0400
@@ -29,11 +29,6 @@
     <version_command>echo $(R --version | grep version | grep -v GNU) " , EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2&gt; /dev/null | grep -v -i "WARNING: ")</version_command>

     <command>
-        <!--
-            The following script is written in the "Cheetah" language:
-            http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html
-        -->
-
         R --vanilla --slave -f $R_script '--args
             $expression_matrix
             $design_matrix
@@ -111,48 +106,47 @@

     <configfiles>
         <configfile name="R_script">
-library(limma,quietly=TRUE) ## enable quietly to avoid unnecessaity stderr dumping
-library(edgeR,quietly=TRUE) ## enable quietly to avoid unnecessaity stderr dumping
-library(splines,quietly=TRUE) ## enable quietly to avoid unnecessaity stderr dumping
+<![CDATA[
+
+library(limma,quietly=TRUE)  ## quietly to avoid unnecessaity stderr messages
+library(edgeR,quietly=TRUE)  ## quietly to avoid unnecessaity stderr messages
+library(splines,quietly=TRUE)## quietly to avoid unnecessaity stderr messages

 ## Fetch commandline arguments
-args &lt;- commandArgs(trailingOnly = TRUE)
+args <- commandArgs(trailingOnly = TRUE)

-expression_matrix_file              = args[1]
-design_matrix_file                  = args[2]
-contrast                            = args[3]
+expression_matrix_file              <- args[1]
+design_matrix_file                  <- args[2]
+contrast                            <- args[3]

-fdr                                 = args[4]
+fdr                                 <- args[4]

-output_count_edgeR                  = args[5]
-output_cpm                          = args[6]
+output_count_edgeR                  <- args[5]
+output_cpm                          <- args[6]

-output_xpkm                         = args[7]                            ##FPKM file - yet to be implemented
+output_xpkm                         <- args[7]        ##FPKM file - to be implemented

-output_raw_counts                   = args[8]
-output_MDSplot_logFC                = args[9]
-output_MDSplot_bcv                  = args[10]
-output_BCVplot                      = args[11]
-output_MAplot                       = args[12]
-output_PValue_distribution_plot     = args[13]
-output_hierarchical_clustering_plot = args[14]
-output_heatmap_plot                 = args[15]
-output_RData_obj                    = args[16]
-output_format_images                = args[17]
+output_raw_counts                   <- args[8]
+output_MDSplot_logFC                <- args[9]
+output_MDSplot_bcv                  <- args[10]
+output_BCVplot                      <- args[11]
+output_MAplot                       <- args[12]
+output_PValue_distribution_plot     <- args[13]
+output_hierarchical_clustering_plot <- args[14]
+output_heatmap_plot                 <- args[15]
+output_RData_obj                    <- args[16]
+output_format_images                <- args[17]


-library(edgeR)
-##raw_data &lt;- read.delim(designmatrix,header=T,stringsAsFactors=T)
 ## Obtain read-counts
+expression_matrix <- read.delim(expression_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c(""))
+design_matrix <- read.delim(design_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c(""))

-expression_matrix &lt;- read.delim(expression_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c(""))
-design_matrix &lt;- read.delim(design_matrix_file,header=T,stringsAsFactors=F,row.names=1,check.names=FALSE,na.strings=c(""))
-
-colnames(design_matrix) &lt;- make.names(colnames(design_matrix))
+colnames(design_matrix) <- make.names(colnames(design_matrix))

 for(i in 1:ncol(design_matrix)) {
-  old &lt;- design_matrix[,i]
-  design_matrix[,i] &lt;- make.names(design_matrix[,i])
+  old <- design_matrix[,i]
+  design_matrix[,i] <- make.names(design_matrix[,i])
   if(paste(design_matrix[,i],collapse="\t") != paste(old,collapse="\t")) {
     print("Renaming of factors:")
     print(old)
@@ -160,46 +154,46 @@
     print(design_matrix[,i])
   }
   ## The following line seems to malfunction the script:
-  ##design_matrix[,i] &lt;- as.factor(design_matrix[,i])
+  ##design_matrix[,i] <- as.factor(design_matrix[,i])
 }

 ## 1) In the expression matrix, you only want to have the samples described in the design matrix
-columns &lt;- match(rownames(design_matrix),colnames(expression_matrix))
-columns &lt;- columns[!is.na(columns)]
-read_counts &lt;- expression_matrix[,columns]
+columns <- match(rownames(design_matrix),colnames(expression_matrix))
+columns <- columns[!is.na(columns)]
+read_counts <- expression_matrix[,columns]

 ## 2) In the design matrix, you only want to have samples of which you really have the counts
-columns &lt;- match(colnames(read_counts),rownames(design_matrix))
-columns &lt;- columns[!is.na(columns)]
-design_matrix &lt;- design_matrix[columns,,drop=FALSE]
+columns <- match(colnames(read_counts),rownames(design_matrix))
+columns <- columns[!is.na(columns)]
+design_matrix <- design_matrix[columns,,drop=FALSE]

 ## Filter for HTSeq predifined counts:
-exclude_HTSeq &lt;- c("no_feature","ambiguous","too_low_aQual","not_aligned","alignment_not_unique")
-exclude_DEXSeq &lt;- c("_ambiguous","_empty","_lowaqual","_notaligned")
+exclude_HTSeq <- c("no_feature","ambiguous","too_low_aQual","not_aligned","alignment_not_unique")
+exclude_DEXSeq <- c("_ambiguous","_empty","_lowaqual","_notaligned")

-exclude &lt;- match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts))
-exclude &lt;- exclude[is.na(exclude)==0]
+exclude <- match(c(exclude_HTSeq, exclude_DEXSeq),rownames(read_counts))
+exclude <- exclude[is.na(exclude)==0]
 if(length(exclude) != 0)  {
-  read_counts &lt;- read_counts[-exclude,]
+  read_counts <- read_counts[-exclude,]
 }


 ## sorting expression matrix with the order of the read_counts
-##order &lt;- match(colnames(read_counts) , rownames(design_matrix))
-##read_counts_ordered  &lt;- read_counts[,order2]
+##order <- match(colnames(read_counts) , rownames(design_matrix))
+##read_counts_ordered  <- read_counts[,order2]

-empty_samples &lt;- apply(read_counts,2,function(x) sum(x) == 0)
+empty_samples <- apply(read_counts,2,function(x) sum(x) == 0)
 if(sum(empty_samples) > 0) {
   write(paste("There are ",sum(empty_samples)," empty samples found:",sep=""),stderr())
   write(colnames(read_counts)[empty_samples],stderr())
 } else {

-  dge &lt;- DGEList(counts=read_counts,genes=rownames(read_counts))
+  dge <- DGEList(counts=read_counts,genes=rownames(read_counts))

-  formula &lt;- paste(c("~0",make.names(colnames(design_matrix))),collapse = " + ")
-  design_matrix_tmp &lt;- design_matrix
-  colnames(design_matrix_tmp) &lt;- make.names(colnames(design_matrix_tmp))
-  design &lt;- model.matrix(as.formula(formula),design_matrix_tmp)
+  formula <- paste(c("~0",make.names(colnames(design_matrix))),collapse = " + ")
+  design_matrix_tmp <- design_matrix
+  colnames(design_matrix_tmp) <- make.names(colnames(design_matrix_tmp))
+  design <- model.matrix(as.formula(formula),design_matrix_tmp)
   rm(design_matrix_tmp)

   # Filter prefixes
@@ -211,18 +205,18 @@

   # Do normalization
   write("Calculating normalization factors...",stdout())
-  dge &lt;- calcNormFactors(dge)
+  dge <- calcNormFactors(dge)
   write("Estimating common dispersion...",stdout())
-  dge &lt;- estimateGLMCommonDisp(dge,design)
+  dge <- estimateGLMCommonDisp(dge,design)
   write("Estimating trended dispersion...",stdout())
-  dge &lt;- estimateGLMTrendedDisp(dge,design)
+  dge <- estimateGLMTrendedDisp(dge,design)
   write("Estimating tagwise dispersion...",stdout())
-  dge &lt;- estimateGLMTagwiseDisp(dge,design)
+  dge <- estimateGLMTagwiseDisp(dge,design)


   if(output_MDSplot_logFC != "/dev/null") {
     write("Creating MDS plot (logFC method)",stdout())
-    points &lt;- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot
+    points <- plotMDS.DGEList(dge,top=500,labels=rep("",nrow(dge\$samples)))# Get coordinates of unflexible plot
     dev.off()# Kill it

     if(output_format_images == "pdf") {
@@ -237,7 +231,7 @@
     }


-    diff_x &lt;- abs(max(points\$x)-min(points\$x))
+    diff_x <- abs(max(points\$x)-min(points\$x))
     diff_y &lt;-(max(points\$y)-min(points\$y))
     plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR logFC-MDS Plot on top 500 genes",type="n", xlab="Leading logFC dim 1", ylab="Leading logFC dim 2")
     points(points\$x,points\$y,pch=20)
@@ -252,7 +246,7 @@

     ## 1. First create a virtual plot to obtain the desired coordinates
     pdf("bcvmds.pdf")
-    points &lt;- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples)))
+    points <- plotMDS.DGEList(dge,method="bcv",top=500,labels=rep("",nrow(dge\$samples)))
     dev.off()# Kill it

     ## 2. Re-plot the coordinates in a new figure with the size and settings.
@@ -267,8 +261,8 @@
       bitmap(output_MDSplot_bcv,type="png16m",height=14,width=14)
     }

-    diff_x &lt;- abs(max(points\$x)-min(points\$x))
-    diff_y &lt;-(max(points\$y)-min(points\$y))
+    diff_x <- abs(max(points\$x)-min(points\$x))
+    diff_y <- (max(points\$y)-min(points\$y))
     plot(c(min(points\$x),max(points\$x) + 0.45 * diff_x), c(min(points\$y) - 0.05 * diff_y,max(points\$y) + 0.05 * diff_y), main="edgeR BCV-MDS Plot",type="n", xlab="Leading BCV dim 1", ylab="Leading BCV dim 2")
     points(points\$x,points\$y,pch=20)
     text(points\$x, points\$y,rownames(dge\$samples),cex=1.25,col="gray",pos=4)
@@ -298,13 +292,13 @@


   write("Fitting GLM...",stdout())
-  fit &lt;- glmFit(dge,design)
+  fit <- glmFit(dge,design)

   write(paste("Performing likelihood ratio test: ",contrast,sep=""),stdout())
-  cont &lt;- c(contrast)
-  cont &lt;- makeContrasts(contrasts=cont, levels=design)
+  cont <- c(contrast)
+  cont <- makeContrasts(contrasts=cont, levels=design)

-  lrt &lt;- glmLRT(fit, contrast=cont[,1])
+  lrt <- glmLRT(fit, contrast=cont[,1])
   write(paste("Exporting to file: ",output_count_edgeR,sep=""),stdout())
   write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA)
   write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA)
@@ -313,8 +307,8 @@
   write.table(file=output_raw_counts,dge\$counts,sep="\t",row.names=TRUE,col.names=NA)

   if(output_MAplot != "/dev/null" || output_PValue_distribution_plot != "/dev/null") {
-    etable &lt;- topTags(lrt, n=nrow(dge))\$table
-    etable &lt;- etable[order(etable\$FDR), ]
+    etable <- topTags(lrt, n=nrow(dge))\$table
+    etable <- etable[order(etable\$FDR), ]

     if(output_MAplot != "/dev/null") {
       write("Creating MA plot...",stdout())
@@ -350,16 +344,16 @@
         bitmap(output_PValue_distribution_plot,type="png16m",width=14,height=14)
       }

-      expressed_genes &lt;- subset(etable, PValue &lt; 0.99)
-      h &lt;- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (&lt; 0.99)")
-      center &lt;- sum(h\$counts) / length(h\$counts)
+      expressed_genes <- subset(etable, PValue &lt; 0.99)
+      h <- hist(expressed_genes\$PValue,breaks=nrow(expressed_genes)/15,main="Binned P-Values (&lt; 0.99)")
+      center <- sum(h\$counts) / length(h\$counts)
       lines(c(0,1),c(center,center),lty=2,col="red",lwd=2)
-      k &lt;- ksmooth(h\$mid, h\$counts)
+      k <- ksmooth(h\$mid, h\$counts)
       lines(k\$x,k\$y,col="red",lwd=2)
-      rmsd &lt;- (h\$counts) - center
-      rmsd &lt;- rmsd^2
-      rmsd &lt;- sum(rmsd)
-      rmsd &lt;- sqrt(rmsd)
+      rmsd <- (h\$counts) - center
+      rmsd <- rmsd^2
+      rmsd <- sum(rmsd)
+      rmsd <- sqrt(rmsd)
       text(0,max(h\$counts),paste("e=",round(rmsd,2),sep=""),pos=4,col="blue")
       ## change e into epsilon somehow
       dev.off()
@@ -379,9 +373,9 @@
       bitmap(output_heatmap_plot,type="png16m",width=10.5)
     }

-    etable2 &lt;- topTags(lrt, n=100)\$table
-    order &lt;- rownames(etable2)
-    cpm_sub &lt;- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),]
+    etable2 <- topTags(lrt, n=100)\$table
+    order <- rownames(etable2)
+    cpm_sub <- cpm(dge,normalized.lib.sizes=TRUE,log=TRUE)[as.numeric(order),]
     heatmap(t(cpm_sub))
     dev.off()
   }
@@ -394,6 +388,7 @@

   write("Done!",stdout())
 }
+]]>
         </configfile>
     </configfiles>
--- a/edger_dge_table_to_bedgraph	Tue Sep 01 04:25:37 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,222 +0,0 @@
-#!/usr/bin/env python
-
-import re,sys,os,os.path,argparse,textwrap,datetime
-
-class GTF:
-	def __init__(self,filename,features=["exon"],symbol="gene_id"):
-		self.features = features
-		if(re.match("^[a-zA-Z0-9_\-]+$",symbol)):
-			self.symbol = symbol
-		else:
-			raise ValueError('False symbol matching symbol: '+str(symbol))
-		self.index = {}
-		self.parse(filename)
-
-	def parse(self,filename):
-		with open(filename) as infile:
-			for line in infile:
-				self.parse_line(line)
-
-	def parse_line(self,line):
-		line = line.strip().split("\t")
-		if(len(line) == 9):
-			if(line[2] in self.features):
-				gene_id = self.parse_column_9(line[8])
-				if(gene_id):
-					if(not self.index.has_key(gene_id)):
-						self.index[gene_id] = []
-					self.index[gene_id].append([line[0],line[3],line[4]])
-
-	def parse_column_9(self,line):
-		query = self.symbol+'[ =]+([^ ;]+)'
-		m = re.search(query, line)
-
-		if(m):
-			return m.group(1).strip("'").strip('"')
-		else:
-			return None
-
-	def get(self,key):
-		try:
-			return self.index[key]
-		except:
-			return False
-
-
-class EdgeR_table:
-	def __init__(self,table,gtf,columns=[3,7]):
-		self.index = {}
-		self.gtf = gtf
-		self.columns = columns
-		self.parse(table)
-
-	def parse(self,filename):
-		i = 0
-		with open(filename) as infile:
-			for line in infile:
-				if(i == 0):
-					self.parse_header(line)
-				else:
-					self.parse_line(line)
-				i += 1
-
-	def parse_header(self,line):
-		params = line.strip().split("\t")
-		if(params[1].lower().find("genes") == -1):
-			raise ValueError('False header in file - no "genes" in 2nd colum: '+line)
-		if(params[2].lower().find("logfc") == -1):
-			raise ValueError('False header in file - no "logfc" in 3rd colum: '+line)
-		if(params[3].lower().find("logcpm") == -1):
-			raise ValueError('False header in file - no "logcpm" in 4th colum: '+line)
-		if(params[4].lower().find("lr") == -1):
-			raise ValueError('False header in file - no "lr" in 5th colum: '+line)
-		if(params[5].lower().find("pvalue") == -1):
-			raise ValueError('False header in file - no "pvalue" in 6th colum: '+line)
-		if(params[6].lower().find("fdr") == -1):
-			raise ValueError('False header in file - no "fdr" in 7th colum: '+line)
-
-	def parse_line(self,line):
-		line = line.strip().split("\t")
-
-		if(len(line) == 7):
-			gene_id = line[1].strip('"').strip("'")
-			column_data = {}
-			for column in self.columns:
-				if(column in [6,7]):
-					column_data[column] = str(1.0 - float(line[column-1]))
-				else:
-					column_data[column] = line[column-1]
-
-			locations = self.gtf.get(gene_id)
-			if(not locations):
-				print "Warning: no location found for gene "+gene_id
-			else:
-				for location in locations:
-					self.insert(location,column_data)
-
-	def insert(self,location,data):
-		chrom = location[0]
-		start = location[1]
-		end = location[2]
-
-		if(not self.index.has_key(chrom)):
-			self.index[chrom] = {}
-
-		if(not self.index[chrom].has_key(start)):
-			self.index[chrom][start] = {}
-
-		if(not self.index[chrom][start].has_key(end)):
-			self.index[chrom][start][end] = []
-
-		self.index[chrom][start][end].append(data)
-
-	def export(self,filenames={3:"log_cpm.txt",7:"fdr.txt"}):
-		for column in self.columns:
-			fh = open(filenames[column],"w")
-
-			buf = False
-
-			for chrom in sorted(self.index.keys()):
-				for start in sorted(self.index[chrom].keys()):
-					for end in sorted(self.index[chrom][start].keys()):
-						fh.write(chrom+"\t"+start+"\t"+end+"\t"+self.index[chrom][start][end][0][column]+"\n")
-
-			fh.close()
-
-			os.system("sort -k1,1V -k2,2g -k3,3g '"+filenames[column]+"' > '"+filenames[column]+".sorted'")
-
-
-def remove_overlap_in_bedgraph(bedgraph_file_dirty,bedgraph_file_clean):
-	fh = open(bedgraph_file_clean,"w")
-	buf = False
-
-	with open(bedgraph_file_dirty,"r") as f:
-		for line in f:
-			cur = line.strip().split("\t")
-			cur[1] = int(cur[1])
-			cur[2] = int(cur[2])
-
-			if(not buf):
-				buf = cur
-			else:
-				if(cur[0] == buf[0] and cur[1] <= buf[2] ):
-					if(buf[1] == cur[1]):									#is subset
-						newscore = (float(buf[3])+float(cur[3]))/2
-						buf[2] = cur[2]
-						buf[3] = newscore
-					else:
-						c1 = buf[1]
-						c2 = cur[1]
-						c3 = min(buf[2],cur[2])
-						c4 = max(buf[2],cur[2])
-
-						fh.write(buf[0]+"\t"+str(c1)+"\t"+str(c2-1)+"\t"+str(buf[3])+"\n")
-
-						newscore = (float(buf[3])+float(cur[3]))/2
-						#fh.write(buf[0]+"\t"+str(c2+1)+"\t"+str(c3)+"\t"+str(newscore)+"\tp2\n")
-						#buf = [buf[0], c3+1 , c4 , cur[3]]
-
-						buf = [buf[0], c2 , c4 , cur[3]]
-
-						# find if buf is a subset -> if so, merge and send to buffer
-						# or find the overlapping region
-
-						# if current is overlapping with buffer; merge:
-						##  [       ]                 < buf
-						##       [        ]           < cur
-						##
-						##  [             ]           < buf
-						##       [    ]               < cur
-						##  111112222333333  << write 1 and 2 and keep 3 in buf
-				else:
-					fh.write(buf[0]+"\t"+str(buf[1])+"\t"+str(buf[2])+"\t"+str(buf[3])+"\n")
-					buf=cur
-
-		fh.write(buf[0]+"\t"+str(buf[1])+"\t"+str(buf[2])+"\t"+str(buf[3])+"\n")
-	fh.close()
-
-
-
-if __name__ == "__main__":
-	parser = argparse.ArgumentParser()
-
-	parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,epilog="For more info please visit:\n<https://github.com/yhoogstrate/fuma>")
-
-	parser.add_argument("-t",help="CPM table to extract columns from",nargs=1,required=True)
-	parser.add_argument("-g",help="GTF file used to extract genomic location",nargs=1,required=True)
-
-	parser.add_argument("-c3",help="Output (bedgraph) for column 3 (logFC)",nargs="?",required=False)
-	parser.add_argument("-c4",help="Output (bedgraph) for column 4 (logCPM)",nargs="?",required=False)
-	parser.add_argument("-c5",help="Output (bedgraph) for column 5 (LR)",nargs="?",required=False)
-	parser.add_argument("-c6",help="Output (bedgraph) for column 6 (PValue)",nargs="?",required=False)
-	parser.add_argument("-c7",help="Output (bedgraph) for column 7 (FDR)",nargs="?",required=False)
-
-	args = parser.parse_args()
-
-	#files = {3:"VCAP_logFC.hg19.bedgraph",7:"VCAP_fdr.hg19.bedgraph"}
-	files = {}
-
-	if(args.c3):
-		files[3] = args.c3
-	if(args.c4):
-		files[4] = args.c4
-	if(args.c5):
-		files[5] = args.c5
-	if(args.c6):
-		files[6] = args.c6
-	if(args.c7):
-		files[7] = args.c7
-
-	print "Parsing GTF file"
-	g = GTF(args.g[0])
-
-	print "Parsing EdgeR table"
-	e = EdgeR_table(args.t[0],g,files.keys())
-
-	print "Exporting raw bedgraph(s)"
-	e.export(files)
-
-	print "Removing overlapping entries in bedgraph(s)"
-	for key in files.keys():
-		remove_overlap_in_bedgraph(files[key]+".sorted",files[key])
-		os.system("rm '"+files[key]+".sorted'")
--- a/tool_dependencies.xml	Tue Sep 01 04:25:37 2015 -0400
+++ b/tool_dependencies.xml	Tue Sep 01 04:32:16 2015 -0400
@@ -7,7 +7,7 @@
     <package name="design_matrix_creator" version="1.0.0">
         <install version="1.0">
             <actions>
-                <action type="shell_command">ls -als ; echo "###############" ; ls -als $REPOSITORY_INSTALL_DIR ; pwd ; mkdir $INSTALL_DIR/bin ; cp $REPOSITORY_INSTALL_DIR/design_matrix_creator $INSTALL_DIR/bin/</action>
+                <action type="shell_command">mkdir $INSTALL_DIR/bin ; cp $REPOSITORY_INSTALL_DIR/bin/design_matrix_creator $INSTALL_DIR/bin/</action>
                 <action type="chmod">
                     <file mode="755">$INSTALL_DIR/bin/design_matrix_creator</file>
                 </action>
@@ -22,7 +22,7 @@
     <package name="edger_dge_table_to_bedgraph" version="1.0.0">
         <install version="1.0">
             <actions>
-                <action type="shell_command">mkdir $INSTALL_DIR/bin ; cp $REPOSITORY_INSTALL_DIR/edger_dge_table_to_bedgraph $INSTALL_DIR/bin/</action>
+                <action type="shell_command">mkdir $INSTALL_DIR/bin ; cp $REPOSITORY_INSTALL_DIR/bin/edger_dge_table_to_bedgraph $INSTALL_DIR/bin/</action>
                 <action type="chmod">
                     <file mode="755">$INSTALL_DIR/bin/edger_dge_table_to_bedgraph</file>
                 </action>