Mercurial > repos > badger > trinityrnaseq
changeset 14:961b5bf3fcc8 draft default tip
remove other tools
author | Eric Badger <badger@msi.umn.edu> |
---|---|
date | Tue, 08 Apr 2014 13:16:06 -0500 |
parents | 6425d1da3746 |
children | |
files | RSEM_abundance_estimation.xml tool_dependencies.xml transcriptsToOrfs.xml trinityrnaseq.xml trinityrnaseq_norm.xml |
diffstat | 5 files changed, 5 insertions(+), 397 deletions(-) [+] |
line wrap: on
line diff
--- a/RSEM_abundance_estimation.xml Fri Apr 04 15:30:44 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,91 +0,0 @@ -<tool id="RSEM_abundance_estimation" name="RSEM abundance estimation" version="0.0.2"> - <description>run RSEM to estimate transcript abundances</description> - <requirements> - <requirement type="package" version="2013_08_14">trinityrnaseq</requirement> - <requirement type="package" version="1.1.17">rsem</requirement> - </requirements> - <command> - \$TRINITY_HOME/util/RSEM_util/run_RSEM_align_n_estimate.pl --transcripts $transcripts - ## Inputs. - #if str($read_type.paired_or_single) == "single": - #if $read_type.single_reads.extension.startswith( "fastq"): - --seqType fq - #else - --seqType fa - #end if - --single $read_type.single_reads - #else - #if $read_type.left_reads.extension.startswith( "fastq"): - --seqType fq - #else - --seqType fa - #end if - --left $read_type.left_reads - --right $read_type.right_reads - #end if - #if $transcript.source == "other": - --no_group_by_component - --gene_trans_map $transcript.gene_trans_map - #end if - </command> - <inputs> - <param name="transcripts" type="data" format="fasta" label="transcripts_fasta" help="Fasta sequences for which reads are aligned." /> - <conditional name="read_type"> - <param name="paired_or_single" type="select" label="Paired or Single-end data?"> - <option value="paired">Paired</option> - <option value="single">Single</option> - </param> - <when value="paired"> - <param name="left_reads" type="data" format="fasta,fastq" label="left reads" help="" /> - <param name="right_reads" type="data" format="fasta,fastq" label="right reads" help="" /> - <param name="ss_lib_type" type="select" label="strand-specific library type"> - <option value="RF">RF</option> - <option value="FR">FR</option> - </param> - </when> - <when value="single"> - <param name="single_reads" type="data" format="fasta,fastq" label="single reads" help="" /> - <param name="ss_lib_type" type="select" label="strand-specific library type"> - <option value="F">F</option> - <option value="R">R</option> - </param> - </when> - </conditional> - <conditional name="transcript"> - <param name="source" type="select" label="Transcripts Source"> - <option value="trinity">Trinity</option> - <option value="other">NOT trinity</option> - </param> - <when value="trinity"/> - <when value="other"> - <param name="gene_trans_map" type="data" format="tabular" optional="true" label="Map of gene ids to transcript (isoform) ids" > - <help> - Each line of should be of the form: gene_id transcript_id ( with the two fields separated by a tab character ) - </help> - </param> - </when> - </conditional> - </inputs> - <stdio> - <exit_code range="1:" level="fatal" description="Error Running RSEM" /> - </stdio> - <outputs> - <data format="text" name="transcript_counts" label="${tool.name} on ${on_string}: Isoform Counts" from_work_dir="RSEM.isoforms.results"/> - <data format="text" name="gene_counts" label="${tool.name} on ${on_string}: Gene counts" from_work_dir="RSEM.genes.results"/> - </outputs> - <tests> - <test> - <param name="target" value="trinity/Trinity.fasta" /> - <param name="aligner" value="bowtie" /> - <param name="paired_or_single" value="single" /> - <param name="library_type" value="None" /> - <param name="input" value="trinity/reads.left.fq" /> - </test> - </tests> - <help> - .. _Trinity: http://trinityrnaseq.sourceforge.net - - $TRINITY_HOME/util/RSEM_util/run_RSEM_align_n_estimate.pl --transcripts Trinity.fasta \ - --seqType fq --left left.reads.fq --right right.reads.fq - </help> -</tool>
--- a/tool_dependencies.xml Fri Apr 04 15:30:44 2014 -0500 +++ b/tool_dependencies.xml Tue Apr 08 13:16:06 2014 -0500 @@ -9,11 +9,5 @@ <package name="bowtie" version="1.0.0"> <repository changeset_revision="e682af6a72cd" name="package_bowtie_1_0_0" owner="jjohnson" toolshed="http://testtoolshed.g2.bx.psu.edu" /> </package> - <package name="hmmer" version="3.0"> - <repository changeset_revision="a44693e3a2c4" name="package_hmmer_3_0" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> - </package> - <package name="rsem" version="1.1.17"> - <repository changeset_revision="7d060ea51c6f" name="package_rsem_1_1_17" owner="jjohnson" toolshed="http://testtoolshed.g2.bx.psu.edu" /> - </package> </tool_dependency>
--- a/transcriptsToOrfs.xml Fri Apr 04 15:30:44 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,117 +0,0 @@ -<tool id="transcriptsToOrfs" name="transcriptsToOrfs" version="0.0.2"> - <description>Trinity Transcripts to Candidate Peptides</description> - <requirements> - <requirement type="package" version="2013_08_14">trinityrnaseq</requirement> - <requirement type="package" version="3.0">hmmer</requirement> - </requirements> - <command> - \$TRINITY_HOME/trinity-plugins/transdecoder/transcripts_to_best_scoring_ORFs.pl - -t $transcripts - #if $min_prot_length: - -m $min_prot_length - #end if - #if $retain_long_orfs: - --retain_long_orfs $retain_long_orfs - #end if - #if $training_count: - -T $training_count - #end if - #if str($strand_specificity) == 'SS': - -S - #end if - #if $genetic_code.__str__ != '': - -G $genetic_code - #end if - #if $search.use_pfam == 'yes': - --search_pfam "${ filter( lambda x: str( x[0] ) == str( $search.pfam_db ), $__app__.tool_data_tables[ 'pfam_databases' ].get_fields() )[0][-1] }" - --CPU $search.CPU - #end if - </command> - <inputs> - <param format="fasta" name="transcripts" type="data" label="Transcripts sequences in fastA format" help="" /> - <param name="min_prot_length" type="integer" value="" optional="true" label="Minimum peptide length (in amino acids)" help="default: 100"> - <validator type="in_range" message="Minimum peptide length should be at least 50" min="50" /> - </param> - <param name="retain_long_orfs" type="integer" value="" optional="true" label="Retain all ORFs found that are of minimum length in nucleotides" help="default: 900" > - <validator type="in_range" message="ORF length should be at least 50" min="50" /> - </param> - <param name="training_count" type="integer" value="" optional="true" label="Number of top longest ORFs to train Markov Model (hexamer stats)" help="default: 500" > - <validator type="in_range" message="ORF count should be at least 50" min="50" /> - </param> - <param name="strand_specificity" type="select" label="Strand specificity type"> - <option value="DS">NOT strand specific, examine both strands</option> - <option value="SS">Strand specific, examine only top strand</option> - </param> - <param name="genetic_code" type="select" label="Genetic Code"> - <option value="">use default(universal)</option> - <option value="universal">universal</option> - <option value="Euplotes">Euplotes</option> - <option value="Tetrahymena">Tetrahymena</option> - <option value="Candida">Candida</option> - <option value="Acetabularia">Acetabularia</option> - </param> - <conditional name="search"> - <param name="use_pfam" type="select" label="Search PFAM database"> - <option value="no">NO</option> - <option value="yes">YES</option> - </param> - <when value="no"/> - <when value="yes"> - <param name="pfam_db" type="select" label="Pfam database"> - <options from_data_table="pfam_databases" /> - </param> - <param name="CPU" type="integer" value="2" min="1" label="CPU" help="Number of CPUs to use by hmmscan" /> - </when> - </conditional> - </inputs> - <stdio> - <exit_code range="1:" level="fatal" description="Failed" /> - <regex match="Error" - source="stderr" - level="fatal" - description="Failed" /> - </stdio> - <outputs> - <data format="txt" name="trinity_pep_pfam" label="${tool.name} on ${on_string}: Pfam matches to Candidate Peptide Sequences" from_work_dir="longest_orfs.pep.pfam.dat"> - <filter>search['use_pfam'] == 'yes'</filter> - </data> - <data format="gff3" name="trinity_pep_gff3" label="${tool.name} on ${on_string} Candidate Peptide Features" from_work_dir="best_candidates.eclipsed_orfs_removed.gff3" /> - <data format="bed" name="trinity_pep_bed" label="${tool.name} on ${on_string} Candidate Peptide Coordinates" from_work_dir="best_candidates.eclipsed_orfs_removed.bed" /> - <data format="fasta" name="trinity_pep_cds" label="${tool.name} on ${on_string}: Candidate Peptide CDS Sequences" from_work_dir="best_candidates.eclipsed_orfs_removed.cds"/> - <data format="fasta" name="trinity_pep_seqs" label="${tool.name} on ${on_string}: Candidate Peptide Sequences" from_work_dir="best_candidates.eclipsed_orfs_removed.pep"/> - </outputs> - <tests> - <test> - <param name="transcripts" ftype="fasta" value="TrinitySingle.fasta"/> - <param name="min_prot_length" value="100"/> - <param name="use_pfam" value="no"/> - <output name="trinity_pep_seqs"> - <assert_contents> - <has_text text="WAAKAWLITARSLYPADF" /> - </assert_contents> - </output> - <output name="trinity_pep_cds"> - <assert_contents> - <has_text text="TGGGCAGCCAAGGCATGGCTGATCACGGCCCGCA" /> - </assert_contents> - </output> - <output name="trinity_pep_bed"> - <assert_contents> - <has_text text="comp10_c0_seq1" /> - </assert_contents> - </output> - <output name="trinity_pep_gff3"> - <assert_contents> - <has_text text="comp10_c0_seq1" /> - </assert_contents> - </output> - </test> - </tests> - <help> - ** transcriptsToOrfs ** - Trinity_ is a de novo transcript assembler that uses RNA-seq data as input. - This tool searches for open reading frames in the assembled transcripts. - - .. _Trinity: http://trinityrnaseq.sourceforge.net - </help> -</tool>
--- a/trinityrnaseq.xml Fri Apr 04 15:30:44 2014 -0500 +++ b/trinityrnaseq.xml Tue Apr 08 13:16:06 2014 -0500 @@ -1,4 +1,4 @@ -<tool id="trinityrnaseq" name="Trinity" version="0.0.3"> +<tool id="trinityrnaseq-itasca" name="Trinity - Itasca BETA" version="0.0.3"> <!-- Written by Jeremy Goecks, now maintained here by bhaas --> <description>De novo assembly of RNA-Seq data Using Trinity</description> <requirements> @@ -51,10 +51,10 @@ </command> <inputs> <param name="computenode" type="select" label="Compute node" help="Select the size of node needed"> - <option value="22G">22GB - 8 cores</option> - <option value="62G">62GB - 16 cores</option> - <option value="126G">126GB - 16 cores</option> - <option value="256G">254GB - 16 cores</option> + <option value="21G">21GB - 8 cores</option> + <option value="58G">58GB - 16 cores</option> + <option value="125G">125GB - 16 cores</option> + <option value="253G">253GB - 16 cores</option> </param> <conditional name="inputs">
--- a/trinityrnaseq_norm.xml Fri Apr 04 15:30:44 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,178 +0,0 @@ -<tool id="trinityrnaseq_norm" name="Trinity read normalization" version="0.0.2"> - <!-- Written by Jeremy Goecks, modified by Josh Bowden for normalization proceedure, now maintained here by bhaas --> - <description>Pre-process RNA-seq data to reduce coverage of highly covered areas</description> - <requirements> - <requirement type="package" version="2013_08_14">trinityrnaseq</requirement> - </requirements> - <command> - ## symlink input in work_dir - #if str($inputs.paired_or_single) == "paired": - ln -s $inputs.left_input left_reads && - ln -s $inputs.right_input right_reads && - #else: - ln -s $inputs.input single_reads && - #end if - \${TRINITY_HOME}/util/normalize_by_kmer_coverage.pl --JM $JM --max_cov $max_cov - ## Inputs. - #if str($inputs.paired_or_single) == "paired": - --left left_reads --right right_reads - #if $inputs.left_input.ext == 'fa': - --seqType fa - #else: - --seqType fq - #end if - $inputs.pe_reads_unordered - #if str($inputs.library_type) != "None": - --SS_lib_type $inputs.library_type - #end if - $inputs.pairs_together - $inputs.parallel_stats - #else: - --single single_reads - #if str($inputs.input.ext) == 'fa': - --seqType fa - #else: - --seqType fq - #end if - #if str($inputs.library_type) != "None": - --SS_lib_type $inputs.library_type - #end if - #end if - #if $kmer_size: - --KMER_SIZE $kmer_size - #end if - #if $max_pct_stdev: - --max_pct_stdev $max_pct_stdev - #end if - ## direct stdio to output - | tee $trinity_coverage_normalization_log && - #if str($inputs.paired_or_single) == "paired": - cp left_reads.normalized* $output_left && - cp right_reads.normalized* $output_right - #else: - cp single_reads.normalized* $output_single - #end if - </command> - <inputs> - <param name="JM" type="select" label="JM" help="Amount of memory to allocate to Jellyfish for Kmer catalog construction"> - <option value="1G">1G</option> - <option value="10G">10G</option> - <option value="20G">20G</option> - <option value="50G">50G</option> - <option value="100G">100G</option> - </param> - - <param name="max_cov" type="select" label="max_cov" help="Read coverage in terms of maximum covarge to keep"> - <option value="30">30</option> - <option value="40">40</option> - <option value="50">50</option> - <option value="60">60</option> - <option value="70">70</option> - <option value="100">100</option> - </param> - - <conditional name="inputs"> - <param name="paired_or_single" type="select" label="Paired or Single-end data?"> - <option value="paired">Paired</option> - <option value="single">Single</option> - </param> - <when value="paired"> - <param format="fasta,fastq" name="left_input" type="data" label="Left/Forward strand reads" help=""/> - <param format="fasta,fastq" name="right_input" type="data" label="Right/Reverse strand reads" help=""/> - <param name="library_type" type="select" label="Strand-specific Library Type"> - <option value="None">None</option> - <option value="FR">FR</option> - <option value="RF">RF</option> - </param> - <param name="pe_reads_unordered" type="boolean" truevalue="--PE_reads_unordered" falsevalue="" checked="false" label="set if the input paired-end reads are not identically ordered"/> - <param name="pairs_together" type="boolean" truevalue="--pairs_together" falsevalue="" checked="false" label="process paired reads by averaging stats between pairs and retaining linking info"/> - <param name="parallel_stats" type="boolean" truevalue="--PARALLEL_STATS" falsevalue="" checked="false" label="generate read stats in parallel for paired reads" help="(Figure 2X Inchworm memory requirement)"/> - </when> - <when value="single"> - <param format="fasta,fastq" name="input" type="data" label="Single-end reads" help=""/> - <param name="library_type" type="select" label="Strand-specific Library Type"> - <option value="None">None</option> - <option value="F">F</option> - <option value="R">R</option> - </param> - </when> - </conditional> - <param name="kmer_size" type="integer" value="" optional="true" label="KMER SIZE" help="default: 25"> - <validator type="in_range" message="kmer size between 3 and 200" min="3" max="200"/> - </param> - <param name="max_pct_stdev" type="integer" value="" optional="true" label="maximum pct of mean for stdev of kmer coverage across read" help="default: 100"> - <validator type="in_range" message="kmer size between 10 and 100" min="10" max="100"/> - </param> - - </inputs> - <stdio> - <exit_code range="1:" level="fatal" description="Failed" /> - <regex match="Error" - source="stdout" - level="fatal" - description="Failed" /> - </stdio> - <outputs> - <!-- I have not found a way to do condional outputs so all potential output files are specified and some will be empty --> - <data format="txt" name="trinity_coverage_normalization_log" label="${tool.name} on ${on_string}: log" /> - <data format_source="left_input" name="output_left" label="${tool.name} on ${on_string}: Normalized left reads"> - <filter>inputs['paired_or_single'] == "paired"</filter> - </data> - <data format_source="right_input" name="output_right" label="${tool.name} on ${on_string}: Normalized right reads"> - <filter>inputs['paired_or_single'] == "paired"</filter> - </data> - <data format_source="input" name="output_single" label="${tool.name} on ${on_string}: Normalized reads"> - <filter>inputs['paired_or_single'] == "single"</filter> - </data> - </outputs> - <tests> - <test> - <param name="JM" value="1G"/> - <param name="max_cov" value="30"/> - <param name="paired_or_single" value="single"/> - <param name="input" ftype="fastq" value="reads.left.fq"/> - <param name="library_type" value="None"/> - <output name="trinity_coverage_normalization_log"> - <assert_contents> - <has_text text="Normalization complete." /> - </assert_contents> - </output> - <output name="output_single"> - <assert_contents> - <has_text text="ACTGCATCCTGGAAAGAATCAATGGTGGCCGGAAAGTGTTTTTCAAATACAAGAGTGACAATGTGCCCTGTTGTTT" /> - </assert_contents> - </output> - </test> - <test> - <param name="JM" value="1G"/> - <param name="max_cov" value="30"/> - <param name="paired_or_single" value="paired"/> - <param name="left_input" ftype="fastq" value="reads.left.fq"/> - <param name="right_input" ftype="fastq" value="reads.right.fq"/> - <param name="library_type" value="None"/> - <param name="pe_reads_unordered" value="False"/> - <param name="pairs_together" value="False"/> - <param name="parallel_stats" value="False"/> - <output name="trinity_coverage_normalization_log"> - <assert_contents> - <has_text text="Normalization complete." /> - </assert_contents> - </output> - <output name="output_left"> - <assert_contents> - <has_text text="CTGGGCTGCAGCTAAGTTCTCTGCATCCTCCTTCTTGCTTGTGGCTGGGAAGAAGACAATGTTGTCGATGGTCTGG" /> - </assert_contents> - </output> - <output name="output_right"> - <assert_contents> - <has_text text="CTCAAATGGTTAATTCTCAGGCTGCAAATATTCGTTCAGGATGGAAGAACATTTTCTCAGTATTCCATCTAGCTGC" /> - </assert_contents> - </output> - </test> - </tests> - <help> - Runs script Trinity_ script util/normalize_by_kmer_coverage.pl which reduces data sizes with minimal impact on recovered transcripts when used by Trinity.pl. - - .. _Trinity: http://trinityrnaseq.sourceforge.net - </help> -</tool>