comparison defuse.xml @ 45:aedaa66483f1 draft

Uploaded
author jjohnson
date Wed, 18 Oct 2017 16:55:57 -0400
parents 225750bf3770
children e500b50b72fd
comparison
equal deleted inserted replaced
44:225750bf3770 45:aedaa66483f1
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <requirements> 6 <requirements>
7 <expand macro="defuse_requirement" /> 7 <expand macro="defuse_requirement" />
8 <expand macro="mapping_requirements" />
9 <expand macro="r_requirements" />
10 </requirements> 8 </requirements>
11 <command interpreter="command"> /bin/bash $shscript </command> 9 <command><![CDATA[
10 #if $defuse_out.__str__ != 'None':
11 ## ln to output_dir in from_work_dir
12 mkdir -p $defuse_out.dataset.extra_files_path &&
13 ln -s $defuse_out.dataset.extra_files_path output_dir &&
14 #else
15 mkdir -p output_dir &&
16 #end if
17 ## Put executable paths in config file
18 $__tool_directory__/config_sub.sh $defuse_config output_dir/defuse.cfg &&
19 ## copy config to output
20 cp defuse.cfg $config_txt &&
21 ## make a data_dir and ln -s the input fastq
22 mkdir -p data_dir &&
23 ln -s "$left_pairendreads" data_dir/reads_1.fastq &&
24 ln -s "$right_pairendreads" data_dir/reads_2.fastq &&
25 ## run
26 perl defuse_run.pl --name "$library_name" --config defuse.cfg -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir -p \$GALAXY_SLOTS &&
27 grep -v cluster_id output_dir/results.filtered.tsv | awk '{print $1}' > cluster_id_list &&
28 get_fusion_fastq.pl --list cluster_id_list --output output_dir --fastq1 results.fusions_1.fq --fastq2 results.fusions_2.fq &&
29 cp output_dir/results.* . &&
30 cp `find output_dir -name defuse.log` $defuse_log
31 #if $defuse_out.__str__ != 'None':
32 && $__tool_directory__/make_html.sh $defuse_out $defuse_out.dataset.extra_files_path
33 #end if
34 ]]></command>
12 <inputs> 35 <inputs>
13 <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/> 36 <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/>
14 <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/> 37 <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/>
15 <param name="library_name" type="text" value="unknown" label="library name" help="Value to put in the results library_name column"> 38 <param name="library_name" type="text" value="unknown" label="library name" help="Value to put in the results library_name column">
16 <validator type="length" min="1"/> 39 <validator type="length" min="1"/>
59 <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" /> 82 <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" />
60 <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" /> 83 <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" />
61 <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold"> 84 <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold">
62 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> 85 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
63 </param> 86 </param>
87 <param name="multi_exon_transcripts_stats" type="select" label="Use multiple exon transcripts for stats calculations" help="should be enabled for very small libraries">
88 <option value="no" select="true">no</option>
89 <option value="yes">yes</option>
90 </param>
64 <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density"> 91 <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density">
65 <help>Position density when calculating covariance</help> 92 <help>Position density when calculating covariance</help>
66 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> 93 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
94 </param>
95 <param name="max_paired_alignments" type="integer" value="10" optional="true" label="max_paired_alignments">
96 <help>Maximum number of alignments for a read pair, Pairs with more alignments are filtered, default is 10</help>
97 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="1" max="100"/>
67 </param> 98 </param>
68 <param name="denovo_assembly" type="select" label="denovo_assembly" help=""> 99 <param name="denovo_assembly" type="select" label="denovo_assembly" help="">
69 <option value="">Use Default</option> 100 <option value="">Use Default</option>
70 <option value="no">no</option> 101 <option value="no">no</option>
71 <option value="yes">yes</option> 102 <option value="yes">yes</option>
74 <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/> 105 <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/>
75 --> 106 -->
76 <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" /> 107 <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" />
77 </when> <!-- full --> 108 </when> <!-- full -->
78 </conditional> <!-- defuse_param --> 109 </conditional> <!-- defuse_param -->
79 <param name="breakpoints_bam" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Generate a Bam file for the fusions"/>
80 <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" 110 <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files"
81 help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, 111 help="The defuse output working directory can be helpful for determining errors that may have occurred during the run,
82 but they require considerable diskspace, and should be deleted and purged when no longer needed."/> 112 but they require considerable diskspace, and should be deleted and purged when no longer needed."/>
113 <param name="breakpoints_bam" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="Generate a Bam file for the fusions"/>
83 <param name="do_get_reads" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/> 114 <param name="do_get_reads" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/>
84 </inputs> 115 </inputs>
85 <stdio>
86 <exit_code range="1:" level="fatal" description="Error Running Defuse" />
87 </stdio>
88 <outputs> 116 <outputs>
89 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> 117 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/>
90 <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" /> 118 <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" />
91 <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)"> 119 <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)">
92 <filter>keep_output == True</filter> 120 <filter>keep_output == True</filter>
93 </data> 121 </data>
94 <data format="defuse.results.tsv" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" /> 122 <data format="defuse.results.tsv" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" from_work_dir="results.classify.tsv"/>
95 <data format="defuse.results.tsv" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" /> 123 <data format="defuse.results.tsv" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" from_work_dir="results.filtered.tsv"/>
96 <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads"> 124 <data format="fastqsanger" name="results_fusions1_fq" label="${tool.name} on ${on_string}: fusions_1.fq" from_work_dir="results.fusions_1.fq" />
97 <filter>do_get_reads == True</filter> 125 <data format="fastqsanger" name="results_fusions2_fq" label="${tool.name} on ${on_string}: fusions_2.fq" from_work_dir="results.fusions_2.fq" />
98 </data>
99 <data format="bam" name="fusions_bam" label="${tool.name} on ${on_string}: fusions.bam">
100 <filter>breakpoints_bam == True</filter>
101 </data>
102 <!-- 126 <!--
103 expression_plot 127 expression_plot
104 circos plot 128 circos plot
105 --> 129 -->
106 </outputs> 130 </outputs>
107 <configfiles> 131 <configfiles>
108 <configfile name="defuse_config"> 132 <configfile name="defuse_config">
109 #import re 133 #import re
110 #set $ds = chr(36)
111 #if $refGenomeSource.genomeSource == "history": 134 #if $refGenomeSource.genomeSource == "history":
112 #set config_file = $refGenomeSource.config.__str__ 135 #set config_file = $refGenomeSource.config.__str__
113 #else 136 #else
114 #set config_file = $refGenomeSource.index.value 137 #set config_file = $refGenomeSource.index.value
115 #end if 138 #end if
321 #try 344 #try
322 $ref_dict['bowtie_quals'] 345 $ref_dict['bowtie_quals']
323 #except 346 #except
324 --phred33-quals 347 --phred33-quals
325 #end try 348 #end try
349 bowtie_params = #slurp
350 #try
351 $ref_dict['bowtie_params']
352 #except
353 --chunkmbs 200
354 #end try
326 max_insert_size = #slurp 355 max_insert_size = #slurp
327 #if $defuse_param.settings == "full" and $defuse_param.max_insert_size.__str__ != "": 356 #if $defuse_param.settings == "full" and $defuse_param.max_insert_size.__str__ != "":
328 $defuse_param.max_insert_size 357 $defuse_param.max_insert_size
329 #else 358 #else
330 #try 359 #try
479 0.50 508 0.50
480 #end try 509 #end try
481 #end if 510 #end if
482 positive_controls = \$(data_directory)/controls.txt 511 positive_controls = \$(data_directory)/controls.txt
483 512
513 # Use multiple exon transcripts for stats calculations (yes/no)
514 # should be enabled for very small libraries
515 multi_exon_transcripts_stats = #slurp
516 #if $defuse_param.settings == "full" and $defuse_param.multi_exon_transcripts_stats.__str__ != ""
517 $defuse_param.multi_exon_transcripts_stats
518 #else
519 #try
520 $ref_dict['multi_exon_transcripts_stats']
521 #except
522 no
523 #end try
524 #end if
525
484 # Position density when calculating covariance 526 # Position density when calculating covariance
485 covariance_sampling_density = #slurp 527 covariance_sampling_density = #slurp
486 #if $defuse_param.settings == "full" and $defuse_param.covariance_sampling_density.__str__ != "" 528 #if $defuse_param.settings == "full" and $defuse_param.covariance_sampling_density.__str__ != ""
487 $defuse_param.covariance_sampling_density 529 $defuse_param.covariance_sampling_density
488 #else 530 #else
490 $ref_dict['covariance_sampling_density'] 532 $ref_dict['covariance_sampling_density']
491 #except 533 #except
492 0.01 534 0.01
493 #end try 535 #end try
494 #end if 536 #end if
537
538 # Maximum number of alignments for a read pair
539 # Pairs with more alignments are filtered
540 max_paired_alignments = #slurp
541 #if $defuse_param.settings == "full" and $defuse_param.max_paired_alignments.__str__ != ""
542 $defuse_param.max_paired_alignments
543 #else
544 #try
545 $ref_dict['max_paired_alignments']
546 #except
547 10
548 #end try
549 #end if
550
495 # Number of reads for each job in split 551 # Number of reads for each job in split
496 reads_per_job = #slurp 552 reads_per_job = #slurp
497 #if $defuse_param.settings == "full" and $defuse_param.reads_per_job.__str__ != "" 553 #if $defuse_param.settings == "full" and $defuse_param.reads_per_job.__str__ != ""
498 $defuse_param.reads_per_job 554 $defuse_param.reads_per_job
499 #else 555 #else
510 566
511 # Remove temp files 567 # Remove temp files
512 remove_job_files = yes 568 remove_job_files = yes
513 remove_job_temp_files = yes 569 remove_job_temp_files = yes
514 570
571 qsub_params = ""
572
515 #end raw 573 #end raw
516 574
517
518 </configfile>
519 <configfile name="shscript">
520 #!/bin/bash
521 ## define some things for cheetah proccessing
522 #set $ds = chr(36)
523 #set $amp = chr(38)
524 #set $gt = chr(62)
525 #set $lt = chr(60)
526 #set $echo_cmd = 'echo'
527 ## Find the defuse.pl in the galaxy tool path
528 #import Cheetah.FileUtils
529 ## declare a bash function for converting a results tsv into html with links to the get_reads output files
530 results2html() {
531 rlts=${ds}1
532 rslt_name=`basename ${ds}rlts`
533 html=${ds}2
534 echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse '${ds}rslt_name'${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt} ${ds}html
535 echo '${lt}h2${gt}Defuse '${ds}rslt_name'${lt}/h2${gt}${lt}table${gt}' ${gt}${gt} ${ds}html
536 if [ -z "${ds}3" ]
537 then
538 awk '${ds}1 ~ /cluster_id/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}th${gt}%s${lt}/th${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}\
539 ${ds}1 ~ /[1-9][0-9]*/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}td${gt}%s${lt}/td${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}' ${ds}rlts ${gt}${gt} ${ds}html
540 echo '${lt}/table${gt}' ${gt}${gt} ${ds}html
541 echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} ${ds}html
542 else
543 export _EFP=${ds}3
544 mkdir -p ${ds}_EFP
545 awk '${ds}1 ~ /cluster_id/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}th${gt}%s${lt}/th${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}\
546 ${ds}1 ~ /[1-9][0-9]*/{fn="cluster_"${ds}1"_reads.txt"; \
547 printf("${lt}tr${gt}${lt}td${gt}${lt}a href=\"%s\"${gt}%s${lt}/a${gt}${lt}/td${gt}",fn, ${ds}1);for (i = 2; i ${lt}= NF; i++) {printf("${lt}td${gt}%s${lt}/td${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}' ${ds}rlts ${gt}${gt} ${ds}html
548 echo '${lt}/table${gt}' ${gt}${gt} ${ds}html
549 echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} ${ds}html
550 for i in `awk '${ds}1 ~ /[1-9][0-9]*/{print ${ds}1}' ${ds}rlts`;
551 do fn=cluster_${ds}{i}_reads.txt;
552 pn=${ds}_EFP/${ds}fn;
553 perl \${DEFUSE_PATH}/scripts/get_reads.pl -c $defuse_config -o output_dir -i ${ds}i ${gt} ${ds}pn;
554 done
555 fi
556 }
557 ## substitute pathnames into config file
558 if `grep __DEFUSE_PATH__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DEFUSE_PATH__#\${DEFUSE_PATH}#" $defuse_config; fi
559 if `grep __SAMTOOLS_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} SAMTOOLS_BIN=`which samtools`;then sed -i'.tmp' "s#__SAMTOOLS_BIN__#\${SAMTOOLS_BIN}#" $defuse_config; fi
560 if `grep __BOWTIE_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BIN=`which bowtie`;then sed -i'.tmp' "s#__BOWTIE_BIN__#\${BOWTIE_BIN}#" $defuse_config; fi
561 if `grep __BOWTIE_BUILD_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BUILD_BIN=`which bowtie-build`;then sed -i'.tmp' "s#__BOWTIE_BUILD_BIN__#\${BOWTIE_BUILD_BIN}#" $defuse_config; fi
562 if `grep __BLAT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BLAT_BIN=`which blat`;then sed -i'.tmp' "s#__BLAT_BIN__#\${BLAT_BIN}#" $defuse_config; fi
563 if `grep __FATOTWOBIT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} FATOTWOBIT_BIN=`which faToTwoBit`;then sed -i'.tmp' "s#__FATOTWOBIT_BIN__#\${FATOTWOBIT_BIN}#" $defuse_config; fi
564 if `grep __GMAP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_BIN=`which gmap`;then sed -i'.tmp' "s#__GMAP_BIN__#\${GMAP_BIN}#" $defuse_config; fi
565 if `grep __GMAP_SETUP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_SETUP_BIN=`which gmap_setup`;then sed -i'.tmp' "s#__GMAP_SETUP_BIN__#\${GMAP_SETUP_BIN}#" $defuse_config; fi
566 if `grep __R_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} R_BIN=`which R`;then sed -i'.tmp' "s#__R_BIN__#\${R_BIN}#" $defuse_config; fi
567 if `grep __RSCRIPT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} RSCRIPT_BIN=`which Rscript`;then sed -i'.tmp' "s#__RSCRIPT_BIN__#\${RSCRIPT_BIN}#" $defuse_config; fi
568
569
570 ## copy config to output
571 cp $defuse_config $config_txt
572 ## make a data_dir and ln -s the input fastq
573 mkdir -p data_dir
574 ## ln -s "$left_pairendreads" data_dir/reads_1.fastq
575 ## ln -s "$right_pairendreads" data_dir/reads_2.fastq
576 cp "$left_pairendreads" data_dir/reads_1.fastq
577 cp "$right_pairendreads" data_dir/reads_2.fastq
578 ## ln to output_dir in from_work_dir
579 #if $defuse_out.__str__ != 'None':
580 mkdir -p $defuse_out.dataset.extra_files_path
581 ln -s $defuse_out.dataset.extra_files_path output_dir
582 #else
583 mkdir -p output_dir
584 #end if
585 ## run defuse.pl
586 perl \${DEFUSE_PATH}/scripts/defuse.pl -name "$library_name" -c $defuse_config -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir -p \$GALAXY_SLOTS
587 ## copy primary results to output datasets
588 if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi
589 ## if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi
590 if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi
591 if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi
592 #if $breakpoints_bam:
593 if [ -e output_dir/results.filtered.tsv ] ${amp}${amp} [ -e output_dir/breakpoints.genome.psl ]
594 then
595 awk "\\$10 ~ /^(`awk '\\$1 ~ /[0-9]+/{print \\$1}' output_dir/results.filtered.tsv | tr '\n' '|'`)\\$/{print \\$0}" output_dir/breakpoints.genome.psl > breakpoints.genome.filtered.psl ${amp}${amp}
596 psl2sam.pl breakpoints.genome.filtered.psl > breakpoints.genome.filtered.sam ${amp}${amp}
597 samtools view -b -T /panfs/roc/rissdb/galaxy/genomes/NCBIM37/defuse/defuse.reference.fa -o breakpoints.genome.filtered.bam breakpoints.genome.filtered.sam ${amp}${amp}
598 samtools sort breakpoints.genome.filtered.bam breakpoints ${amp}${amp}
599 ## samtools index breakpoints.bam
600 cp breakpoints.bam $fusions_bam
601 fi
602 #end if
603 ## create html with links for output_dir
604 #if $defuse_out.__str__ != 'None':
605 if [ -e $defuse_out ]
606 then
607 echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse Output${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt} $defuse_out
608 echo '${lt}h2${gt}Defuse Output Files${lt}/h2${gt}${lt}ul${gt}' ${gt}${gt} $defuse_out
609 pushd $defuse_out.dataset.extra_files_path
610 for f in `find -L . -maxdepth 1 -type f`;
611 do fn=`basename ${ds}f`; echo '${lt}li${gt}${lt}a href="'${ds}fn'"${gt}'${ds}fn'${lt}/a${gt}${lt}/li${gt}' ${gt}${gt} $defuse_out;
612 done
613 popd
614 echo '${lt}/ul${gt}' ${gt}${gt} $defuse_out
615 echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} $defuse_out
616 fi
617 #end if
618 ## run get_reads.pl on each cluster
619 #if $fusion_reads.__str__ != 'None':
620 if [ -e output_dir/results.filtered.tsv -a -e $fusion_reads ]
621 then
622 mkdir -p $fusion_reads.dataset.extra_files_path
623 results2html output_dir/results.filtered.tsv $fusion_reads $fusion_reads.dataset.extra_files_path
624 fi
625 #end if
626 </configfile> 575 </configfile>
627 </configfiles> 576 </configfiles>
628 577
629 <tests> 578 <tests>
630 </tests> 579 </tests>