# HG changeset patch # User fubar # Date 1393976408 18000 # Node ID 9d72d0221c7ffd0cafa0019de7a61ab0e2c40eaf # Parent 1e40b977aa58eaaa1b019add52a9986cd4b78c86 Changed over to a separate STAR package dependency diff -r 1e40b977aa58 -r 9d72d0221c7f prepStar.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prepStar.sh Tue Mar 04 18:40:08 2014 -0500 @@ -0,0 +1,21 @@ +# automate rnastar index generation +# from existing fasta genomes using limited ram - cut genomeChrbinNbits down to 13, genomeSAindexNbases 12 genomeSAsparseD 1 + +INDEXROOT=/mnt/galaxyIndices/genomes +# fix that, the list of genomes below and SOURCE below to suit your needs. This is for GVL august 2013 +for genome in Arabidopsis_thaliana_TAIR9 ce10 danRer7 dm3 hg19 hg_g1k_v37 mm9 phiX rn5 sacCer2 sacCer3 xenTro2 +do + SOURCE=${INDEXROOT}/${genome}/seq/ + TARGET=${INDEXROOT}/${genome}/rnastar + mkdir -p $TARGET + cd $TARGET +/mnt/galaxy/tools/rnastar/2.3.0e/fubar/rgrnastar_203e/9425706f4e97/bin/STAR --runMode genomeGenerate --genomeDir $TARGET --genomeFastaFiles ${SOURCE}/${genome}.fa --runThreadN 2 genomeLoad=NoSharedMemory --genomeChrBinNbits 13 genomeSAindexNbases 12 genomeSAsparseD 1 +done + +echo "#add these lines TABS not spaces please to rnastar_indexes.loc in tool-data" > rnastar_indexes.loc +for genome in Arabidopsis_thaliana_TAIR9 ce10 danRer7 dm3 hg19 hg_g1k_v37 mm9 phiX rn5 sacCer2 sacCer3 xenTro2 +do + SOURCE=${INDEXROOT}/${genome}/seq/ + TARGET=${INDEXROOT}/${genome}/rnastar + echo ${genome}'\t'${genome}'\t'${genome}'\t'/mnt/galaxyIndices/genomes/${genome}/rnastar >> rnastar_indexes.loc +done diff -r 1e40b977aa58 -r 9d72d0221c7f rg_rnaStar.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rg_rnaStar.xml Tue Mar 04 18:40:08 2014 -0500 @@ -0,0 +1,287 @@ + + Gapped-read mapper for RNA-seq data + + rnastar + samtools + + + ## + ## Run STAR. + ## + + STAR + ## Can adjust this as appropriate for the system. + --genomeLoad NoSharedMemory + + --genomeDir ${refGenomeSource.index.fields.path} + --readFilesIn $input1 + #if $singlePaired.sPaired == "paired" + $singlePaired.input2 + #end if + --runThreadN 4 + #if $params.settingsType == "full": + --chimSegmentMin $params.chim_segment_min + --chimScoreMin $params.chim_score_min + #end if + + ## may or may not need to generate SAM tags and handle non-canonicals for Cufflinks tools. + ${outSAMstrandField} ${outFilterIntronMotifs} ${outSAMattributes} + + ; + + ## + ## BAM conversion. + ## + + ## Convert aligned reads. + samtools view -Shb Aligned.out.sam | samtools sort - Aligned.out + + ## Convert chimeric reads. + #if $params.settingsType == "full" and $params.chim_segment_min > 0: + ; samtools view -Shb Chimeric.out.sam | samtools sort - Chimeric.out + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (params['settingsType'] == 'full' and params['chim_segment_min'] > 0) + + + + + + + + + + + + + + + + (params['settingsType'] == 'full' and params['chim_segment_min'] > 0) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** +Runs the rna star gapped aligner. Suited to paired or single end rna-seq. + +8.2: SAM alignments + +The number of loci Nmap a read maps to (multi-mapping) is given by NH:i: field. +The mapping quality MAPQ (column 5) is 255 for uniquely mapping reads, and int(-10*log10(1-1/Nmap)) for +multi-mapping reads. This scheme is same as the one used by Tophat and is compatible with Cufflinks. + +For multi-mappers, all alignments except one are marked with 0x100 (secondary alignment) in the FLAG +column 2. The un-marked alignment is either the best one (i.e. highest scoring), or is randomly selected from +the alignments of equal quality. + +8.2.1: Standard SAM attributes +With default --outSAMattributes Standard option the following SAM attributes will be generated: + +Column 12: NH: number of loci a read (pair) maps to +Column 13: IH: alignment index for all alignments of a read +Column 14: aS: alignment score +Column 15: nM: number of mismatches (does not include indels) + +8.2.2: Extra SAM attrbiutes +If --outSAMattributes All option is used, the following additional attributes will be output: + +Column 16: jM:B:c,M1,M2,... Intron motifs for all junctions (i.e. N in CIGAR): +0: non-canonical; 1:GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. + +If splice junctions database is used, and a junction is annotated, 20 is added to its motif value. +Column 17: jI:B:I,Start1,End1,Start2,End2,... Start and End of introns for all junctions (1-based) + +Note, that samtools 0.1.18 or later have to be used with these extra attributes. + + +8.2.3: XS SAM strand attribute for Cufflinks/Cuffdiff + +If you have un-stranded RNA-seq data, and wish to run Cufflinks/Cuffdiff on STAR alignments, you will +need to run STAR with --outSAMstrandField intronMotif option, which will generate the XS +strand attribute for all alignments that contain splice junctions. The spliced alignments that have undefined +strand (i.e. containing only non-canonical junctions) will be suppressed. + +If you have stranded RNA-seq data, you do not need to use any specific STAR options. Instead, you need +to run Cufflinks with the library option --library-type options. For example, cufflinks with +library-type fr-firststrand should be used for the “standard” dUTP protocol. +This option has to be used only for Cufflinks runs and not for STAR runs. + +It is recommended to remove the non-canonical junctions for Cufflinks runs using – + +--outFilterIntronMotifs RemoveNoncanonical +filter out alignments that contain non-canonical junctions + +OR + +--outFilterIntronMotifs RemoveNoncanonicalUnannotated +filter out alignments that contain non-canonical unannotated junctions +when using annotated splice junctions database. The annotated non- +canonical junctions will be kept. + + +**Attributions** + +Note that each component has its own license. Good luck with figuring out your obligations. + +rna_star - see the web site at rna_star_ + +For details, please see the rna_starMS_ +"STAR: ultrafast universal RNA-seq aligner" +A. Dobin et al, Bioinformatics 2012; doi: 10.1093/bioinformatics/bts635 + +Galaxy_ (that's what you are using right now!) for gluing everything together + +Most of the work for this wrapper XML is Jeremy Goecks' original STAR_ wrapper + +Minor tweaks to output names to suit our downstream purposes, toolshed automated dependencies +and odds and ends of other code and documentation comprising this tool was +written by Ross Lazarus and that part is licensed_ the same way as other rgenetics artefacts + +.. _STAR: https://bitbucket.org/jgoecks/jeremys-code/raw/fa1930a689b8e2f6b59cc1706e5ba0ed8ad357be/galaxy/tool-wrappers/star.xml +.. _licensed: http://creativecommons.org/licenses/by-nc-nd/3.0/ +.. _rna_star: http://code.google.com/p/rna-star/ +.. _rna_starMS: http://bioinformatics.oxfordjournals.org/content/29/1/15.full +.. _Galaxy: http://getgalaxy.org + + + diff -r 1e40b977aa58 -r 9d72d0221c7f rgrnastar/prepStar.sh --- a/rgrnastar/prepStar.sh Sat Aug 31 03:06:03 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -# automate rnastar index generation -# from existing fasta genomes using limited ram - cut genomeChrbinNbits down to 13, genomeSAindexNbases 12 genomeSAsparseD 1 - -INDEXROOT=/mnt/galaxyIndices/genomes -# fix that, the list of genomes below and SOURCE below to suit your needs. This is for GVL august 2013 -for genome in Arabidopsis_thaliana_TAIR9 ce10 danRer7 dm3 hg19 hg_g1k_v37 mm9 phiX rn5 sacCer2 sacCer3 xenTro2 -do - SOURCE=${INDEXROOT}/${genome}/seq/ - TARGET=${INDEXROOT}/${genome}/rnastar - mkdir -p $TARGET - cd $TARGET - /mnt/galaxy/tools/rnastar/2.3.0e/fubar/rgrnastar_203e/9425706f4e97/bin/STAR --runMode genomeGenerate --genomeDir $TARGET --genomeFastaFiles ${SOURCE}/${genome}.fa --runThreadN 2 genomeLoad=NoSharedMemory -done - -echo "#add these lines TABS not spaces please to rnastar_indexes.loc in tool-data" > rnastar_indexes.loc -for genome in Arabidopsis_thaliana_TAIR9 ce10 danRer7 dm3 hg19 hg_g1k_v37 mm9 phiX rn5 sacCer2 sacCer3 xenTro2 -do - SOURCE=${INDEXROOT}/${genome}/seq/ - TARGET=${INDEXROOT}/${genome}/rnastar - echo ${genome}'\t'${genome}'\t'${genome}'\t'/mnt/galaxyIndices/genomes/${genome}/rnastar >> rnastar_indexes.loc -done diff -r 1e40b977aa58 -r 9d72d0221c7f rgrnastar/rg_rnaStar.xml --- a/rgrnastar/rg_rnaStar.xml Sat Aug 31 03:06:03 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,287 +0,0 @@ - - Gapped-read mapper for RNA-seq data - - rnastar - samtools - - - ## - ## Run STAR. - ## - - STAR - ## Can adjust this as appropriate for the system. - --genomeLoad NoSharedMemory - - --genomeDir ${refGenomeSource.index.fields.path} - --readFilesIn $input1 - #if $singlePaired.sPaired == "paired" - $singlePaired.input2 - #end if - --runThreadN 4 - #if $params.settingsType == "full": - --chimSegmentMin $params.chim_segment_min - --chimScoreMin $params.chim_score_min - #end if - - ## may or may not need to generate SAM tags and handle non-canonicals for Cufflinks tools. - ${outSAMstrandField} ${outFilterIntronMotifs} ${outSAMattributes} - - ; - - ## - ## BAM conversion. - ## - - ## Convert aligned reads. - samtools view -Shb Aligned.out.sam | samtools sort - Aligned.out - - ## Convert chimeric reads. - #if $params.settingsType == "full" and $params.chim_segment_min > 0: - ; samtools view -Shb Chimeric.out.sam | samtools sort - Chimeric.out - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - (params['settingsType'] == 'full' and params['chim_segment_min'] > 0) - - - - - - - - - - - - - - - - (params['settingsType'] == 'full' and params['chim_segment_min'] > 0) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** -Runs the rna star gapped aligner. Suited to paired or single end rna-seq. - -8.2: SAM alignments - -The number of loci Nmap a read maps to (multi-mapping) is given by NH:i: field. -The mapping quality MAPQ (column 5) is 255 for uniquely mapping reads, and int(-10*log10(1-1/Nmap)) for -multi-mapping reads. This scheme is same as the one used by Tophat and is compatible with Cufflinks. - -For multi-mappers, all alignments except one are marked with 0x100 (secondary alignment) in the FLAG -column 2. The un-marked alignment is either the best one (i.e. highest scoring), or is randomly selected from -the alignments of equal quality. - -8.2.1: Standard SAM attributes -With default --outSAMattributes Standard option the following SAM attributes will be generated: - -Column 12: NH: number of loci a read (pair) maps to -Column 13: IH: alignment index for all alignments of a read -Column 14: aS: alignment score -Column 15: nM: number of mismatches (does not include indels) - -8.2.2: Extra SAM attrbiutes -If --outSAMattributes All option is used, the following additional attributes will be output: - -Column 16: jM:B:c,M1,M2,... Intron motifs for all junctions (i.e. N in CIGAR): -0: non-canonical; 1:GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. - -If splice junctions database is used, and a junction is annotated, 20 is added to its motif value. -Column 17: jI:B:I,Start1,End1,Start2,End2,... Start and End of introns for all junctions (1-based) - -Note, that samtools 0.1.18 or later have to be used with these extra attributes. - - -8.2.3: XS SAM strand attribute for Cufflinks/Cuffdiff - -If you have un-stranded RNA-seq data, and wish to run Cufflinks/Cuffdiff on STAR alignments, you will -need to run STAR with --outSAMstrandField intronMotif option, which will generate the XS -strand attribute for all alignments that contain splice junctions. The spliced alignments that have undefined -strand (i.e. containing only non-canonical junctions) will be suppressed. - -If you have stranded RNA-seq data, you do not need to use any specific STAR options. Instead, you need -to run Cufflinks with the library option --library-type options. For example, cufflinks with -library-type fr-firststrand should be used for the “standard” dUTP protocol. -This option has to be used only for Cufflinks runs and not for STAR runs. - -It is recommended to remove the non-canonical junctions for Cufflinks runs using – - ---outFilterIntronMotifs RemoveNoncanonical -filter out alignments that contain non-canonical junctions - -OR - ---outFilterIntronMotifs RemoveNoncanonicalUnannotated -filter out alignments that contain non-canonical unannotated junctions -when using annotated splice junctions database. The annotated non- -canonical junctions will be kept. - - -**Attributions** - -Note that each component has its own license. Good luck with figuring out your obligations. - -rna_star - see the web site at rna_star_ - -For details, please see the rna_starMS_ -"STAR: ultrafast universal RNA-seq aligner" -A. Dobin et al, Bioinformatics 2012; doi: 10.1093/bioinformatics/bts635 - -Galaxy_ (that's what you are using right now!) for gluing everything together - -Most of the work for this wrapper XML is Jeremy Goecks' original STAR_ wrapper - -Minor tweaks to output names to suit our downstream purposes, toolshed automated dependencies -and odds and ends of other code and documentation comprising this tool was -written by Ross Lazarus and that part is licensed_ the same way as other rgenetics artefacts - -.. _STAR: https://bitbucket.org/jgoecks/jeremys-code/raw/fa1930a689b8e2f6b59cc1706e5ba0ed8ad357be/galaxy/tool-wrappers/star.xml -.. _licensed: http://creativecommons.org/licenses/by-nc-nd/3.0/ -.. _rna_star: http://code.google.com/p/rna-star/ -.. _rna_starMS: http://bioinformatics.oxfordjournals.org/content/29/1/15.full -.. _Galaxy: http://getgalaxy.org - - - diff -r 1e40b977aa58 -r 9d72d0221c7f rgrnastar/tool-data/rnastar_indices.loc.sample --- a/rgrnastar/tool-data/rnastar_indices.loc.sample Sat Aug 31 03:06:03 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of rna-star indexed sequences data files. You will -#need to create these data files and then create a bowtie_indices.loc -#file similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The bowtie2_indices.loc -#file has this format (longer white space characters are TAB characters): -# -# -# -#hg19 hg19 hg19 full /mnt/galaxyIndices/genomes/hg19/rnastar - diff -r 1e40b977aa58 -r 9d72d0221c7f rgrnastar/tool_data_table_conf.xml.sample --- a/rgrnastar/tool_data_table_conf.xml.sample Sat Aug 31 03:06:03 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ - - - - value, dbkey, name, path - -
-
diff -r 1e40b977aa58 -r 9d72d0221c7f rgrnastar/tool_dependencies.xml --- a/rgrnastar/tool_dependencies.xml Sat Aug 31 03:06:03 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ - - - - - - https://rna-star.googlecode.com/files/STAR_2.3.0e.tgz - make - $INSTALL_DIR - $INSTALL_DIR/bin - - STAR - $INSTALL_DIR/bin - - - $INSTALL_DIR/bin - - - - -Installs the STAR binary for rnastar - see https://code.google.com/p/rna-star/ - - - - diff -r 1e40b977aa58 -r 9d72d0221c7f tool-data/rnastar_indices.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/rnastar_indices.loc.sample Tue Mar 04 18:40:08 2014 -0500 @@ -0,0 +1,11 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of rna-star indexed sequences data files. You will +#need to create these data files and then create a bowtie_indices.loc +#file similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The bowtie2_indices.loc +#file has this format (longer white space characters are TAB characters): +# +# +# +#hg19 hg19 hg19 full /mnt/galaxyIndices/genomes/hg19/rnastar + diff -r 1e40b977aa58 -r 9d72d0221c7f tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Mar 04 18:40:08 2014 -0500 @@ -0,0 +1,7 @@ + + + + value, dbkey, name, path + +
+
diff -r 1e40b977aa58 -r 9d72d0221c7f tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Mar 04 18:40:08 2014 -0500 @@ -0,0 +1,15 @@ + + + + + + + + + + +Installs the STAR wrapper and dependency packages samtools and star - see https://code.google.com/p/rna-star/ +STAR is a very fast mapper for rna-seq giving junctions if the indexes are constructed with a junction library + + +