view pindel.py @ 4:3e64e2294f4b draft

Uploaded
author jeremie
date Wed, 02 Jul 2014 04:44:12 -0400
parents 544c5266fc0f
children de8a643a41e3
line wrap: on
line source


import argparse, optparse, os, shutil, subprocess, sys, tempfile, time, shlex

parser = argparse.ArgumentParser(description='')
parser.add_argument('-r', dest='input_reference', required=True, help='the reference file')
parser.add_argument('-b', dest='input_bam', required=True, help='the bam file')
parser.add_argument('-s', dest='insert_size', type=int, default='500', required=True, help='the insert size')
parser.add_argument('-n', dest='sampleTag', default='sample', required=False, help='the sample tag')
parser.add_argument('-o', dest='output_vcf', help='the output vcf', required=True)
parser.add_argument('--name', dest='name', default='defaults')
parser.add_argument('--number_of_threads', dest='number_of_threads', type=int, default='1')
parser.add_argument('--window_size', dest='window_size', type=int, default='5')
parser.add_argument('--sequencing_error_rate', dest='sequencing_error_rate', type=float, default='0.01')
parser.add_argument('--sensitivity', dest='sensitivity', default='0.95', type=float)
parser.add_argument('--report_long_insertions', dest='report_long_insertions', action='store_true', default=False)
parser.add_argument('--report_duplications', dest='report_duplications', action='store_true', default=False)
parser.add_argument('--report_inversions', dest='report_inversions', action='store_true', default=False)
parser.add_argument('--report_breakpoints', dest='report_breakpoints', action='store_true', default=False)

tmp_dir = tempfile.mkdtemp()
pindel_dir=tmp_dir+"/pindel"
errorFile = tmp_dir+"/errorLog"


def execute( cmd, output="" ):
	try: 
		err = open(tmp_dir+"/errorLog", 'a')
		if output != "":
			out = open(output, 'w')
		else:
			out = subprocess.PIPE
		process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err )
		process.wait()
		err.close()
		if out != subprocess.PIPE:
			out.close()
	except Exception, e:
		sys.stderr.write("problem doing : %s\n" %(cmd))
		sys.stderr.write( '%s\n\n' % str(e) )


def index (args):
	cmd = "samtools index %s" % (args.input_bam)
	# print( "commande = "+cmd )
	# os.system(cmd)
	execute(cmd)

	cmd = "samtools faidx %s" % (args.input_reference)
	# print( "commande = "+cmd )
	# os.system(cmd)
	execute(cmd)


def config( args ):
	# print("config")
	config = tmp_dir+"/pindel_config"
	# cmd = 'echo %s\t%s\t%s > %s' % (bam, size, name, config)
	# print("commande = "+cmd)
	# os.system(cmd)
	fil = open(config, 'w')
	fil.write("%s\t%s\t%s" % (args.input_bam, args.insert_size, args.name) )
	fil.close()
	return config


# def pindel( reference, config, output ):
# 	print("pindel")
# 	cmd="pindel -f %s -i %s -o %s" % (reference, config, output)
# 	print("commande = "+cmd)
# 	os.system(cmd)

# 	os.system("rm %s" % (output))
# 	if not os.path.exists(output):
# 		os.makedirs(output)
# 	os.system("mv %s_* %s" %(output, output) )


def pindel( reference, config, temp, args ):
	cmd = "pindel -f %s -i %s -o %s " % (reference, config, pindel_dir)
	cmd += "--number_of_threads %d --window_size %d --sequencing_error_rate %f --sensitivity %f" % (args.number_of_threads, args.window_size, args.sequencing_error_rate, args.sensitivity)
	if args.report_long_insertions:
		cmd += ' --report_long_insertions '
	if args.report_duplications:
		cmd += ' --report_duplications '
	if args.report_inversions:
		cmd += ' --report_inversions '
	if args.report_breakpoints:
		cmd += ' --report_breakpoints '
	print("commande = "+cmd)
	execute( cmd )





def move( avant, apres ):
	if os.path.exists(avant):
		execute("mv %s %s" %(avant, apres))


def pindel2vcf( args ):
	date = str(time.strftime('%d/%m/%y',time.localtime()))
	name = "test"
	cmd = "pindel2vcf -P %s -r %s -R %s -d %s" % (pindel_dir, args.input_reference, name, date )
	print ("commande = " + cmd)
	execute(cmd)
	if os.path.exists(pindel_dir+".vcf"):
		execute("cp %s %s" %(pindel_dir+".vcf", args.output_vcf))


def __main__():
	
	args = parser.parse_args()

	# try:
	# 	cmd = 'pindel'
	# 	cmd += ' -i1 %s -i2 %s -o %s ' % (args.input1, args.input2, args.output1)
	# except:
	# 	sys.stdout.write( 'problem args : %s' % (cmd) )
	# try:
	# 	os.system(cmd)
	# except:
	# 	sys.stdout.write( 'problem cmd' )
	# pipeline(args.reference, args.bam, args.output_raw, args.output_vcf, size, name, args)

	reference = args.input_reference
	bam = args.input_bam
	temp = args.output_vcf+"_temp/temp"
	output_vcf = args.output_vcf
	
	try:
		index( args )
		conf = config( args )

		pindel( reference, conf, temp, args )
		pindel2vcf(args)
		

	finally:
		if os.path.exists( tmp_dir ):
			shutil.rmtree( tmp_dir )

if __name__=="__main__":
	__main__()