# HG changeset patch # User jeremie # Date 1404745574 14400 # Node ID 42c6b266b39e0d5ec11b290889f32af1d32288a6 Uploaded diff -r 000000000000 -r 42c6b266b39e cnvnator.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cnvnator.py Mon Jul 07 11:06:14 2014 -0400 @@ -0,0 +1,130 @@ +#!/usr/bin/python + +import argparse, optparse, os, shutil, subprocess, sys, tempfile, shlex, time +from Bio import SeqIO + +parser = argparse.ArgumentParser(description='') + +parser.add_argument('-r', dest='inputFastaFile', help='the reference file to use', required=True) +parser.add_argument('-b', dest='inputBamFile', help='the bam file', required=True) +parser.add_argument('-s', dest='binSize', help='the bam file', required=False, default=100) +parser.add_argument('-o1', dest='outputRawFile', help='the output', required=True) +parser.add_argument('-o2', dest='outputVcfFile', help='the output', required=True) + + +def execute(cmd, output=None): + # function to execute a cmd and report if an error occur + # print(cmd) + try: + process = subprocess.Popen(args=shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE) + process.wait() + stdout,stderr = process.communicate() + except Exception, e: # une erreur de ma commande : stderr + sys.stderr.write("problem doing : %s\n%s\n" %(cmd, e)) + return + if output: + output = open(output, 'w') + output.write(stdout) + output.close() + if stderr != '': # une erreur interne au programme : stdout (sinon, souvent des warning arrete les programmes) + sys.stdout.write("warning or error while doing : %s\n-----\n%s-----\n\n" %(cmd, stderr)) + + +# def makeLink(filename, ext): +# link = tempDir+"/"+ext+"."+ext +# if not os.path.exists(link): +# os.system("ln -s %s %s" % (filename, link)) +# # os.system("ln -s %s %s.%s" % (filename, filename, ext)) +# return link + +def makeTempLink(tempDir, filepath, newExt=None): + # build a link from filename to tempDir/filename.newExt + filename = os.path.split(filepath)[1] + link = tempDir+"/"+filename+"."+newExt + if not os.path.exists(link): + os.system("ln -s %s %s" % (filepath, link)) + return link + + +def fastaSplitter(fastaFile, outputFolder=None): + # split a fastaFile, create a new file per sequence in the outputFolder + input = open(fastaFile, "rU") + path = outputFolder+"/cnvnator_fastas" + if not os.path.exists(path): + os.makedirs(path) + num_rec=0 + for record in SeqIO.parse(input, "fasta"): + num_rec += 1 + # path = os.path.splitext(filename)[0] + # print(path) + filename2 = os.path.split(fastaFile)[1] + # output = open("%s/%s.%s" % (path, record.id, filename2), "w") + output = open("%s/%s.fa" % (path, record.id), "w") + # SeqIO.write requires a list, so turn our record into one + SeqIO.write([record], output, "fasta") + output.close() + input.close() + if (num_rec==0): + sys.stdout.write('not a fasta file ?') + print ("rec ="+str(num_rec)) + return path + + +def main(): + args = parser.parse_args() + tempDir = tempfile.mkdtemp() + + # cnvnator n'accepte que les .bam + # en tant que fichier d'entree + # et .fasta en tant que reference + + try: + if os.path.splitext(args.inputFastaFile)[1] != ".fasta": + args.inputFastaFile = makeTempLink(tempDir, args.inputFastaFile, "fasta") + + if os.path.splitext(args.inputBamFile)[1] != ".bam" : + args.inputBamFile = makeTempLink(tempDir, args.inputBamFile, "bam") + + path, filename = os.path.split(args.outputRawFile) + if not os.path.exists(path): + os.makedirs(path) + + root = tempDir+"/root" + + fastaDir = fastaSplitter(args.inputFastaFile, tempDir) + + # preProcess + cmd = "cnvnator -root %s -tree %s" % (root, args.inputBamFile) + execute(cmd) + + # his + cmd = "cnvnator -root %s -d %s -his %s" % (root, fastaDir, args.binSize) + execute(cmd) + + # stat + cmd = "cnvnator -root %s -stat %s" % (root, args.binSize) + execute(cmd) + + # eval + # optional + + # partition + cmd = "cnvnator -root %s -partition %s" %(root, args.binSize) + execute(cmd) + + # call + cmd = "cnvnator -root %s -call %s" % (root, args.binSize) + execute(cmd, args.outputRawFile) + + # cnvnatortovcf + if args.outputVcfFile: + cmd = "cnvnator2VCF.pl %s %s" % (args.outputRawFile, fastaDir) + execute(cmd, args.outputVcfFile) + + finally: + if os.path.exists(tempDir): + shutil.rmtree(tempDir) + + +if __name__ == '__main__': + main() \ No newline at end of file