# HG changeset patch # User jingchunzhu # Date 1439535881 25200 # Node ID a38cc72edd75c031869c552a56dee830f5249fb7 # Parent 23d98125d20c11d477f3192f15e07c36b46b4c4f xena header diff -r 23d98125d20c -r a38cc72edd75 parseSnpEffVcf.py --- a/parseSnpEffVcf.py Thu Aug 13 23:26:33 2015 -0700 +++ b/parseSnpEffVcf.py Fri Aug 14 00:04:41 2015 -0700 @@ -436,7 +436,13 @@ myVcf = vcf(sys.stdin) - fout =open(args.ID,'w') + tmpOutput = "file_"+args.ID + fout =open(tmpOutput, 'w') + + if args.ID[-4:]==".vcf": + sampleID = args.ID[:-4] + else: + sampleID = args.ID for row in myVcf.read(): #total =total+1 @@ -460,7 +466,7 @@ AA_Change = row.effectPerGene[gene]["Amino_Acid_Change"] if AA_Change !="" and AA_Change[:2]!="p.": AA_Change="p."+AA_Change - fout.write(string.join([args.ID, row.chr, str(row.start), + fout.write(string.join([sampleID, row.chr, str(row.start), str(row.end), row.reference, row.alt, gene,row.effectPerGene[gene]["effect"], str(row.DNA_AF), str(row.RNA_AF),AA_Change @@ -470,7 +476,7 @@ gene ="" AA_Change="" effect ="" - fout.write(string.join([args.ID, row.chr, str(row.start), + fout.write(string.join([sampleID, row.chr, str(row.start), str(row.end), row.reference, row.alt, gene,effect, str(row.DNA_AF), str(row.RNA_AF),AA_Change @@ -478,10 +484,10 @@ ],"\t")+"\n") fout.close() - os.system("cat "+args.ID+" >> "+args.output) - os.system("rm -f "+args.ID) + os.system("cat "+tmpOutput+" >> "+args.output) + os.system("rm -f "+tmpOutput) if __name__ == '__main__': - main() + main() diff -r 23d98125d20c -r a38cc72edd75 vcfToXena.xml --- a/vcfToXena.xml Thu Aug 13 23:26:33 2015 -0700 +++ b/vcfToXena.xml Fri Aug 14 00:04:41 2015 -0700 @@ -4,7 +4,8 @@ java -Xmx4G -jar $__tool_directory__/snpEff/snpEff.jar -c $__tool_directory__/snpEff/snpEff.config -i vcf -upDownStreamLen 5000 $genome $input > $__tool_directory__/tmp ; - cat $__tool_directory__/tmp | python $__tool_directory__/parseSnpEffVcf.py $input.name $snpeff_output + python $__tool_directory__/xenaHeader.py $snpeff_output; + cat $__tool_directory__/tmp | python $__tool_directory__/parseSnpEffVcf.py $input.name $snpeff_output diff -r 23d98125d20c -r a38cc72edd75 xenaHeader.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xenaHeader.py Fri Aug 14 00:04:41 2015 -0700 @@ -0,0 +1,14 @@ +import argparse +import string + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("output", type=str, help="outputfile") + args = parser.parse_args() + + fout= open(args.output,'w') + fout.write("#"+string.join(["sample","chr","start","end","reference","alt","gene","effect","DNA_VAF","RNA_VAF","Amino_Acid_Change"],"\t")+"\n") + fout.close() + +if __name__ == '__main__': + main()