Mercurial > repos > jjohnson > defuse
changeset 28:f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 03 Sep 2013 06:41:19 -0500 |
parents | d57fcac025e2 |
children | 103d61bfa1f3 |
files | defuse.xml defuse_results_to_vcf.py |
diffstat | 2 files changed, 17 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/defuse.xml Wed Aug 14 16:44:18 2013 -0500 +++ b/defuse.xml Tue Sep 03 06:41:19 2013 -0500 @@ -75,6 +75,7 @@ <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" /> </when> <!-- full --> </conditional> <!-- defuse_param --> + <param name="breakpoints_bam" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Generate a Bam file for the fusions"/> <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, but they require considerable diskspace, and should be deleted and purged when no longer needed."/> @@ -94,6 +95,9 @@ <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads"> <filter>do_get_reads == True</filter> </data> + <data format="bam" name="fusions_bam" label="${tool.name} on ${on_string}: fusions.bam"> + <filter>breakpoints_bam == True</filter> + </data> <!-- expression_plot circos plot @@ -582,6 +586,17 @@ ## if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi +#if $breakpoints_bam: +if [ -e output_dir/results.filtered.tsv ] ${amp}${amp} [ -e output_dir/breakpoints.genome.psl ] +then + awk "\\$10 ~ /^(`awk '\\$1 ~ /[0-9]+/{print \\$1}' output_dir/results.filtered.tsv | tr '\n' '|'`)\\$/{print \\$0}" output_dir/breakpoints.genome.psl > breakpoints.genome.filtered.psl ${amp}${amp} + psl2sam.pl breakpoints.genome.filtered.psl > breakpoints.genome.filtered.sam ${amp}${amp} + samtools view -b -T /panfs/roc/rissdb/galaxy/genomes/NCBIM37/defuse/defuse.reference.fa -o breakpoints.genome.filtered.bam breakpoints.genome.filtered.sam ${amp}${amp} + samtools sort breakpoints.genome.filtered.bam breakpoints ${amp}${amp} + ## samtools index breakpoints.bam + cp breakpoints.bam $fusions_bam +fi +#end if ## create html with links for output_dir #if $defuse_out.__str__ != 'None': if [ -e $defuse_out ]
--- a/defuse_results_to_vcf.py Wed Aug 14 16:44:18 2013 -0500 +++ b/defuse_results_to_vcf.py Tue Sep 03 06:41:19 2013 -0500 @@ -258,8 +258,8 @@ info1 = [svtype,'MATEID=%s' % mate_id2] + info info2 = [svtype,'MATEID=%s' % mate_id1] + info qual = int(float(fields[columns.index('probability')]) * 255) if columns.index('probability') else '.' - vcf1 = '%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s'% (gene_chromosome1,genomic_break_pos1, mate_id1, ref1, alt1, qual, filt, ';'.join(info1) ) - vcf2 = '%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s'% (gene_chromosome2,genomic_break_pos2, mate_id2, ref2, alt2, qual, filt, ';'.join(info2) ) + vcf1 = '%s\t%d\t%s\t%s\t%s\t%s\t%d\t%s'% (gene_chromosome1,genomic_break_pos1, mate_id1, ref1, alt1, qual, filt, ';'.join(info1) ) + vcf2 = '%s\t%d\t%s\t%s\t%s\t%s\t%d\t%s'% (gene_chromosome2,genomic_break_pos2, mate_id2, ref2, alt2, qual, filt, ';'.join(info2) ) add_vcf_line(gene_chromosome1,genomic_break_pos1,mate_id1,vcf1) add_vcf_line(gene_chromosome2,genomic_break_pos2,mate_id2,vcf2) write_vcf()