changeset 0:42c6b266b39e draft

Uploaded
author jeremie
date Mon, 07 Jul 2014 11:06:14 -0400
parents
children af260c953c94
files cnvnator.py
diffstat 1 files changed, 130 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cnvnator.py	Mon Jul 07 11:06:14 2014 -0400
@@ -0,0 +1,130 @@
+#!/usr/bin/python
+
+import argparse, optparse, os, shutil, subprocess, sys, tempfile, shlex, time
+from Bio import SeqIO
+
+parser = argparse.ArgumentParser(description='')
+
+parser.add_argument('-r', dest='inputFastaFile', help='the reference file to use', required=True)
+parser.add_argument('-b', dest='inputBamFile', help='the bam file', required=True)
+parser.add_argument('-s', dest='binSize', help='the bam file', required=False, default=100)
+parser.add_argument('-o1', dest='outputRawFile', help='the output', required=True)
+parser.add_argument('-o2', dest='outputVcfFile', help='the output', required=True)
+
+
+def execute(cmd, output=None):
+	# function to execute a cmd and report if an error occur
+	# print(cmd)
+	try: 
+		process = subprocess.Popen(args=shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+		process.wait()
+		stdout,stderr = process.communicate()
+	except Exception, e: # une erreur de ma commande : stderr
+		sys.stderr.write("problem doing : %s\n%s\n" %(cmd, e))
+		return
+	if output:
+		output = open(output, 'w')
+		output.write(stdout)
+		output.close()
+	if stderr != '': # une erreur interne au programme : stdout (sinon, souvent des warning arrete les programmes)
+		sys.stdout.write("warning or error while doing : %s\n-----\n%s-----\n\n" %(cmd, stderr))
+
+
+# def makeLink(filename, ext):
+# 	link = tempDir+"/"+ext+"."+ext
+# 	if not os.path.exists(link):
+# 		os.system("ln -s %s %s" % (filename, link))
+# 	# os.system("ln -s %s %s.%s" % (filename, filename, ext))
+# 	return link
+
+def makeTempLink(tempDir, filepath, newExt=None):
+	# build a link from filename to tempDir/filename.newExt
+	filename = os.path.split(filepath)[1]
+	link = tempDir+"/"+filename+"."+newExt
+	if not os.path.exists(link):
+		os.system("ln -s %s %s" % (filepath, link))
+	return link
+
+
+def fastaSplitter(fastaFile, outputFolder=None):
+	# split a fastaFile, create a new file per sequence in the outputFolder
+	input = open(fastaFile, "rU")
+	path = outputFolder+"/cnvnator_fastas"
+	if not os.path.exists(path):
+		os.makedirs(path)
+	num_rec=0
+	for record in SeqIO.parse(input, "fasta"):
+		num_rec += 1
+		# path = os.path.splitext(filename)[0]
+		# print(path)
+		filename2 = os.path.split(fastaFile)[1]
+		# output = open("%s/%s.%s" % (path, record.id, filename2), "w")
+		output = open("%s/%s.fa" % (path, record.id), "w")
+		# SeqIO.write requires a list, so turn our record into one
+		SeqIO.write([record], output, "fasta")
+		output.close()
+	input.close()
+	if (num_rec==0):
+		sys.stdout.write('not a fasta file ?')
+		print ("rec ="+str(num_rec))
+	return path
+
+
+def main():
+	args = parser.parse_args()
+	tempDir = tempfile.mkdtemp()
+
+	# cnvnator n'accepte que les .bam 
+	# en tant que fichier d'entree
+	# et .fasta en tant que reference
+
+	try:
+		if os.path.splitext(args.inputFastaFile)[1] != ".fasta":
+			args.inputFastaFile = makeTempLink(tempDir, args.inputFastaFile, "fasta")
+
+		if os.path.splitext(args.inputBamFile)[1] != ".bam" :
+			args.inputBamFile = makeTempLink(tempDir, args.inputBamFile, "bam")
+
+		path, filename = os.path.split(args.outputRawFile)
+		if not os.path.exists(path):
+			os.makedirs(path)
+
+		root = tempDir+"/root"
+
+		fastaDir = fastaSplitter(args.inputFastaFile, tempDir)
+
+		# preProcess
+		cmd = "cnvnator -root %s -tree %s" % (root, args.inputBamFile)
+		execute(cmd)
+
+		# his
+		cmd = "cnvnator -root %s -d %s -his %s" % (root, fastaDir, args.binSize)
+		execute(cmd)
+
+		# stat
+		cmd = "cnvnator -root %s -stat %s" % (root, args.binSize)
+		execute(cmd)
+
+		# eval
+		# optional
+
+		# partition
+		cmd = "cnvnator -root %s -partition %s" %(root, args.binSize)
+		execute(cmd)
+
+		# call
+		cmd = "cnvnator -root %s -call %s" % (root, args.binSize)
+		execute(cmd, args.outputRawFile)
+
+		# cnvnatortovcf
+		if args.outputVcfFile:
+			cmd = "cnvnator2VCF.pl %s %s" % (args.outputRawFile, fastaDir)
+			execute(cmd, args.outputVcfFile)
+
+	finally:
+		if os.path.exists(tempDir):
+			shutil.rmtree(tempDir)
+
+
+if __name__ == '__main__':
+	main()
\ No newline at end of file