changeset 28:f51a95bdc38e

Add breakpoints.bam output derived from breakpoints.genome.psl
author Jim Johnson <jj@umn.edu>
date Tue, 03 Sep 2013 06:41:19 -0500
parents d57fcac025e2
children 103d61bfa1f3
files defuse.xml defuse_results_to_vcf.py
diffstat 2 files changed, 17 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/defuse.xml	Wed Aug 14 16:44:18 2013 -0500
+++ b/defuse.xml	Tue Sep 03 06:41:19 2013 -0500
@@ -75,6 +75,7 @@
       <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" />
     </when> <!-- full -->
   </conditional>  <!-- defuse_param -->
+  <param name="breakpoints_bam" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Generate a Bam file for the fusions"/>
   <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" 
          help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, 
                but they require considerable diskspace, and should be deleted and purged when no longer needed."/>
@@ -94,6 +95,9 @@
   <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads">
     <filter>do_get_reads == True</filter>
   </data>
+  <data format="bam" name="fusions_bam" label="${tool.name} on ${on_string}: fusions.bam">
+    <filter>breakpoints_bam == True</filter>
+  </data>
   <!--
    expression_plot
    circos plot
@@ -582,6 +586,17 @@
 ## if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi
 if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi
 if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi
+#if $breakpoints_bam:
+if [ -e output_dir/results.filtered.tsv ] ${amp}${amp}  [ -e output_dir/breakpoints.genome.psl ]
+then
+  awk "\\$10 ~ /^(`awk '\\$1 ~ /[0-9]+/{print \\$1}' output_dir/results.filtered.tsv | tr '\n' '|'`)\\$/{print \\$0}" output_dir/breakpoints.genome.psl > breakpoints.genome.filtered.psl ${amp}${amp}
+  psl2sam.pl breakpoints.genome.filtered.psl > breakpoints.genome.filtered.sam ${amp}${amp}
+  samtools view -b -T /panfs/roc/rissdb/galaxy/genomes/NCBIM37/defuse/defuse.reference.fa -o breakpoints.genome.filtered.bam breakpoints.genome.filtered.sam ${amp}${amp}
+  samtools sort breakpoints.genome.filtered.bam breakpoints ${amp}${amp}
+  ## samtools index breakpoints.bam
+  cp breakpoints.bam $fusions_bam
+fi
+#end if
 ## create html with links for output_dir
 #if $defuse_out.__str__ != 'None':
 if [ -e $defuse_out ]
--- a/defuse_results_to_vcf.py	Wed Aug 14 16:44:18 2013 -0500
+++ b/defuse_results_to_vcf.py	Tue Sep 03 06:41:19 2013 -0500
@@ -258,8 +258,8 @@
       info1 = [svtype,'MATEID=%s' % mate_id2] + info
       info2 = [svtype,'MATEID=%s' % mate_id1] + info
       qual = int(float(fields[columns.index('probability')]) * 255) if columns.index('probability') else '.'
-      vcf1 = '%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s'% (gene_chromosome1,genomic_break_pos1, mate_id1, ref1, alt1, qual, filt, ';'.join(info1) )
-      vcf2 = '%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s'% (gene_chromosome2,genomic_break_pos2, mate_id2, ref2, alt2, qual, filt, ';'.join(info2) )
+      vcf1 = '%s\t%d\t%s\t%s\t%s\t%s\t%d\t%s'% (gene_chromosome1,genomic_break_pos1, mate_id1, ref1, alt1, qual, filt, ';'.join(info1) )
+      vcf2 = '%s\t%d\t%s\t%s\t%s\t%s\t%d\t%s'% (gene_chromosome2,genomic_break_pos2, mate_id2, ref2, alt2, qual, filt, ';'.join(info2) )
       add_vcf_line(gene_chromosome1,genomic_break_pos1,mate_id1,vcf1)
       add_vcf_line(gene_chromosome2,genomic_break_pos2,mate_id2,vcf2)
     write_vcf()