changeset 14:ee3a2e0cc270 draft default tip

Uploaded
author jeremie
date Wed, 09 Jul 2014 10:17:54 -0400
parents 077a2dbd906a
children
files pindel.py
diffstat 1 files changed, 31 insertions(+), 68 deletions(-) [+]
line wrap: on
line diff
--- a/pindel.py	Wed Jul 09 10:17:45 2014 -0400
+++ b/pindel.py	Wed Jul 09 10:17:54 2014 -0400
@@ -1,14 +1,13 @@
 
-import argparse, optparse, os, shutil, subprocess, sys, tempfile, time, shlex
+import argparse, os, shutil, subprocess, sys, tempfile, time, shlex
 
 parser = argparse.ArgumentParser(description='')
 parser.add_argument('-r', dest='inputFastaFile', required=True, help='the reference file')
 parser.add_argument('-b', dest='inputBamFile', required=True, help='the bam file')
-parser.add_argument('-s', dest='insert_size', type=int, default='500', required=True, help='the insert size')
-parser.add_argument('-n', dest='sampleTag', default='sample', required=False, help='the sample tag')
+parser.add_argument('-s', dest='insert_size', type=int, default=None, required=False, help='the insert size')
+parser.add_argument('-t', dest='sampleTag', default='default', required=False, help='the sample tag')
 # parser.add_argument('-o1', dest='outputRaw', help='the output raw', required=True)
 parser.add_argument('-o2', dest='outputVcfFile', help='the output vcf', required=True)
-parser.add_argument('--name', dest='name', default='name')
 parser.add_argument('--number_of_threads', dest='number_of_threads', type=int, default='1')
 parser.add_argument('--window_size', dest='window_size', type=int, default='5')
 parser.add_argument('--sequencing_error_rate', dest='sequencing_error_rate', type=float, default='0.01')
@@ -31,6 +30,7 @@
 parser.add_argument('-B', '--balance_cutoff', dest='balance_cutoff', type=int, default='0')
 parser.add_argument('-A', '--anchor_quality', dest='anchor_quality', type=int, default='0')
 parser.add_argument('-M', '--minimum_support_for_event', dest='minimum_support_for_event', type=int, default='3')
+parser.add_argument('-n', '--NM', dest='NM', type=int, default='2')
 parser.add_argument('--detect_DD', dest='detect_DD', action='store_true', default=False)
 parser.add_argument('--MAX_DD_BREAKPOINT_DISTANCE', dest='MAX_DD_BREAKPOINT_DISTANCE', type=int, default='350')
 parser.add_argument('--MAX_DISTANCE_CLUSTER_READS', dest='MAX_DISTANCE_CLUSTER_READS', type=int, default='100')
@@ -39,22 +39,8 @@
 parser.add_argument('--MIN_DD_MAP_DISTANCE', dest='MIN_DD_MAP_DISTANCE', type=int, default='8000')
 parser.add_argument('--DD_REPORT_DUPLICATION_READS', dest='DD_REPORT_DUPLICATION_READS', action='store_true', default=False)
 
+parser.add_argument('-z', '--input_SV_Calls_for_assembly', dest='input_SV_Calls_for_assembly', action='store_true', default=False)
 
-# def execute(cmd, output=""):
-# 	try: 
-# 		err = open(tempDir+"/errorLog", 'a')
-# 		if output != "":
-# 			out = open(output, 'w')
-# 		else:
-# 			out = subprocess.PIPE
-# 		process = subprocess.Popen(args=shlex.split(cmd), stdout=out, stderr=err)
-# 		process.wait()
-# 		err.close()
-# 		if out != subprocess.PIPE:
-# 			out.close()
-# 	except Exception, e:
-# 		sys.stderr.write("problem doing : %s\n" %(cmd))
-# 		sys.stderr.write('%s\n\n' % str(e))
 
 def execute(cmd, output=None):
 	import subprocess, sys, shlex
@@ -82,29 +68,14 @@
 	execute(cmd)
 
 
-def config(inputBamFile, meanInsertSize, name, tempDir):
+def config(inputBamFile, meanInsertSize, tag, tempDir):
 	configFile = tempDir+"/pindel_configFile"
-	# cmd = 'echo %s\t%s\t%s > %s' %(bam, size, name, configFile)
-	# print("commande = "+cmd)
-	# os.system(cmd)
 	fil = open(configFile, 'w')
-	fil.write("%s\t%s\t%s" %(inputBamFile, meanInsertSize, name))
+	fil.write("%s\t%s\t%s" %(inputBamFile, meanInsertSize, tag))
 	fil.close()
 	return configFile
 
 
-# def pindel(reference, configFile, output):
-# 	print("pindel")
-# 	cmd="pindel -f %s -i %s -o %s" %(reference, configFile, output)
-# 	print("commande = "+cmd)
-# 	os.system(cmd)
-
-# 	os.system("rm %s" %(output))
-# 	if not os.path.exists(output):
-# 		os.makedirs(output)
-# 	os.system("mv %s_* %s" %(output, output))
-
-
 def pindel(reference, configFile, args, tempDir):
 	pindelTempDir=tempDir+"/pindel" 
 	cmd = "pindel -f %s -i %s -o %s " %(reference, configFile, pindelTempDir)
@@ -161,23 +132,23 @@
 		execute("cp %s %s" %(pindelTempDir+".vcf", outputVcf))
 
 
-def getMeanInsertSize(bamFile, tempDir):
-	samFile = tempDir+"/sam"
-	histogramFile = tempDir+"/histogram"
-	outputFile = tempDir+"/output"
-	cmd = "samtools view -h -o %s %s" % (bamFile, samFile)
-	execute(cmd, )
-	cmd = "picard-tools CollectInsertSizeMetrics H=%s O=%s I=%s" % (histogramFile, outputFile, samFile)
-	execute(cmd)
-	try:
-		f = open(outputFile, 'r')
-		lignes = f.readlines()
-		f.close()
-		meanInsertSize = lignes[7].split('\t')[0]
-	except:
-		sys.stderr.write("problem while opening file %s " %(outputFile))
-		return 0
-	return meanInsertSize
+# def getMeanInsertSize(bamFile, tempDir):
+# 	samFile = tempDir+"/sam"
+# 	histogramFile = tempDir+"/histogram"
+# 	outputFile = tempDir+"/output"
+# 	cmd = "samtools view -h %s -o %s" % (bamFile, samFile)
+# 	execute(cmd, )
+# 	cmd = "picard-tools CollectInsertSizeMetrics H=%s O=%s I=%s" % (histogramFile, outputFile, samFile)
+# 	execute(cmd)
+# 	try:
+# 		f = open(outputFile, 'r')
+# 		lignes = f.readlines()
+# 		f.close()
+# 		meanInsertSize = lignes[7].split('\t')[0]
+# 	except:
+# 		sys.stderr.write("problem while opening file %s " %(outputFile))
+# 		return 0
+# 	return meanInsertSize
 
 
 def __main__():
@@ -185,24 +156,16 @@
 	args = parser.parse_args()
 	tempDir = tempfile.mkdtemp()
 
-	# try:
-	# 	cmd = 'pindel'
-	# 	cmd += ' -i1 %s -i2 %s -o %s ' %(args.input1, args.input2, args.output1)
-	# except:
-	# 	sys.stdout.write('problem args : %s' %(cmd))
-	# try:
-	# 	os.system(cmd)
-	# except:
-	# 	sys.stdout.write('problem cmd')
-	# pipeline(args.reference, args.bam, args.output_raw, args.outputVcfFile, size, name, args)
-	
 	try:
 		indexBam(args.inputFastaFile, args.inputBamFile)
-		meanInsertSize = int(getMeanInsertSize(args.inputBamFile, tempDir))
-		configFile = config(args.inputBamFile, meanInsertSize, args.name, tempDir)
+		# if args.insert_size==None:
+		# 	meanInsertSize = int(getMeanInsertSize(args.inputBamFile, tempDir))
+		# else:
+		meanInsertSize=args.insert_size
+		configFile = config(args.inputBamFile, meanInsertSize, args.sampleTag, tempDir)
 		pindelTempDir = pindel(args.inputFastaFile, configFile, args, tempDir)
-		pindel2vcf(args.outputVcfFile, args.inputFastaFile, args.name, pindelTempDir)
-		
+		pindel2vcf(args.outputVcfFile, args.inputFastaFile, args.sampleTag, pindelTempDir)
+
 	finally:
 		if os.path.exists(tempDir):
 			shutil.rmtree(tempDir)