# HG changeset patch
# User fubar
# Date 1393976408 18000
# Node ID 9d72d0221c7ffd0cafa0019de7a61ab0e2c40eaf
# Parent 1e40b977aa58eaaa1b019add52a9986cd4b78c86
Changed over to a separate STAR package dependency
diff -r 1e40b977aa58 -r 9d72d0221c7f prepStar.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/prepStar.sh Tue Mar 04 18:40:08 2014 -0500
@@ -0,0 +1,21 @@
+# automate rnastar index generation
+# from existing fasta genomes using limited ram - cut genomeChrbinNbits down to 13, genomeSAindexNbases 12 genomeSAsparseD 1
+
+INDEXROOT=/mnt/galaxyIndices/genomes
+# fix that, the list of genomes below and SOURCE below to suit your needs. This is for GVL august 2013
+for genome in Arabidopsis_thaliana_TAIR9 ce10 danRer7 dm3 hg19 hg_g1k_v37 mm9 phiX rn5 sacCer2 sacCer3 xenTro2
+do
+ SOURCE=${INDEXROOT}/${genome}/seq/
+ TARGET=${INDEXROOT}/${genome}/rnastar
+ mkdir -p $TARGET
+ cd $TARGET
+/mnt/galaxy/tools/rnastar/2.3.0e/fubar/rgrnastar_203e/9425706f4e97/bin/STAR --runMode genomeGenerate --genomeDir $TARGET --genomeFastaFiles ${SOURCE}/${genome}.fa --runThreadN 2 genomeLoad=NoSharedMemory --genomeChrBinNbits 13 genomeSAindexNbases 12 genomeSAsparseD 1
+done
+
+echo "#add these lines TABS not spaces please to rnastar_indexes.loc in tool-data" > rnastar_indexes.loc
+for genome in Arabidopsis_thaliana_TAIR9 ce10 danRer7 dm3 hg19 hg_g1k_v37 mm9 phiX rn5 sacCer2 sacCer3 xenTro2
+do
+ SOURCE=${INDEXROOT}/${genome}/seq/
+ TARGET=${INDEXROOT}/${genome}/rnastar
+ echo ${genome}'\t'${genome}'\t'${genome}'\t'/mnt/galaxyIndices/genomes/${genome}/rnastar >> rnastar_indexes.loc
+done
diff -r 1e40b977aa58 -r 9d72d0221c7f rg_rnaStar.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rg_rnaStar.xml Tue Mar 04 18:40:08 2014 -0500
@@ -0,0 +1,287 @@
+
+ Gapped-read mapper for RNA-seq data
+
+ rnastar
+ samtools
+
+
+ ##
+ ## Run STAR.
+ ##
+
+ STAR
+ ## Can adjust this as appropriate for the system.
+ --genomeLoad NoSharedMemory
+
+ --genomeDir ${refGenomeSource.index.fields.path}
+ --readFilesIn $input1
+ #if $singlePaired.sPaired == "paired"
+ $singlePaired.input2
+ #end if
+ --runThreadN 4
+ #if $params.settingsType == "full":
+ --chimSegmentMin $params.chim_segment_min
+ --chimScoreMin $params.chim_score_min
+ #end if
+
+ ## may or may not need to generate SAM tags and handle non-canonicals for Cufflinks tools.
+ ${outSAMstrandField} ${outFilterIntronMotifs} ${outSAMattributes}
+
+ ;
+
+ ##
+ ## BAM conversion.
+ ##
+
+ ## Convert aligned reads.
+ samtools view -Shb Aligned.out.sam | samtools sort - Aligned.out
+
+ ## Convert chimeric reads.
+ #if $params.settingsType == "full" and $params.chim_segment_min > 0:
+ ; samtools view -Shb Chimeric.out.sam | samtools sort - Chimeric.out
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (params['settingsType'] == 'full' and params['chim_segment_min'] > 0)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (params['settingsType'] == 'full' and params['chim_segment_min'] > 0)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+Runs the rna star gapped aligner. Suited to paired or single end rna-seq.
+
+8.2: SAM alignments
+
+The number of loci Nmap a read maps to (multi-mapping) is given by NH:i: field.
+The mapping quality MAPQ (column 5) is 255 for uniquely mapping reads, and int(-10*log10(1-1/Nmap)) for
+multi-mapping reads. This scheme is same as the one used by Tophat and is compatible with Cufflinks.
+
+For multi-mappers, all alignments except one are marked with 0x100 (secondary alignment) in the FLAG
+column 2. The un-marked alignment is either the best one (i.e. highest scoring), or is randomly selected from
+the alignments of equal quality.
+
+8.2.1: Standard SAM attributes
+With default --outSAMattributes Standard option the following SAM attributes will be generated:
+
+Column 12: NH: number of loci a read (pair) maps to
+Column 13: IH: alignment index for all alignments of a read
+Column 14: aS: alignment score
+Column 15: nM: number of mismatches (does not include indels)
+
+8.2.2: Extra SAM attrbiutes
+If --outSAMattributes All option is used, the following additional attributes will be output:
+
+Column 16: jM:B:c,M1,M2,... Intron motifs for all junctions (i.e. N in CIGAR):
+0: non-canonical; 1:GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT.
+
+If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.
+Column 17: jI:B:I,Start1,End1,Start2,End2,... Start and End of introns for all junctions (1-based)
+
+Note, that samtools 0.1.18 or later have to be used with these extra attributes.
+
+
+8.2.3: XS SAM strand attribute for Cufflinks/Cuffdiff
+
+If you have un-stranded RNA-seq data, and wish to run Cufflinks/Cuffdiff on STAR alignments, you will
+need to run STAR with --outSAMstrandField intronMotif option, which will generate the XS
+strand attribute for all alignments that contain splice junctions. The spliced alignments that have undefined
+strand (i.e. containing only non-canonical junctions) will be suppressed.
+
+If you have stranded RNA-seq data, you do not need to use any specific STAR options. Instead, you need
+to run Cufflinks with the library option --library-type options. For example, cufflinks with
+library-type fr-firststrand should be used for the “standard” dUTP protocol.
+This option has to be used only for Cufflinks runs and not for STAR runs.
+
+It is recommended to remove the non-canonical junctions for Cufflinks runs using –
+
+--outFilterIntronMotifs RemoveNoncanonical
+filter out alignments that contain non-canonical junctions
+
+OR
+
+--outFilterIntronMotifs RemoveNoncanonicalUnannotated
+filter out alignments that contain non-canonical unannotated junctions
+when using annotated splice junctions database. The annotated non-
+canonical junctions will be kept.
+
+
+**Attributions**
+
+Note that each component has its own license. Good luck with figuring out your obligations.
+
+rna_star - see the web site at rna_star_
+
+For details, please see the rna_starMS_
+"STAR: ultrafast universal RNA-seq aligner"
+A. Dobin et al, Bioinformatics 2012; doi: 10.1093/bioinformatics/bts635
+
+Galaxy_ (that's what you are using right now!) for gluing everything together
+
+Most of the work for this wrapper XML is Jeremy Goecks' original STAR_ wrapper
+
+Minor tweaks to output names to suit our downstream purposes, toolshed automated dependencies
+and odds and ends of other code and documentation comprising this tool was
+written by Ross Lazarus and that part is licensed_ the same way as other rgenetics artefacts
+
+.. _STAR: https://bitbucket.org/jgoecks/jeremys-code/raw/fa1930a689b8e2f6b59cc1706e5ba0ed8ad357be/galaxy/tool-wrappers/star.xml
+.. _licensed: http://creativecommons.org/licenses/by-nc-nd/3.0/
+.. _rna_star: http://code.google.com/p/rna-star/
+.. _rna_starMS: http://bioinformatics.oxfordjournals.org/content/29/1/15.full
+.. _Galaxy: http://getgalaxy.org
+
+
+
diff -r 1e40b977aa58 -r 9d72d0221c7f rgrnastar/prepStar.sh
--- a/rgrnastar/prepStar.sh Sat Aug 31 03:06:03 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-# automate rnastar index generation
-# from existing fasta genomes using limited ram - cut genomeChrbinNbits down to 13, genomeSAindexNbases 12 genomeSAsparseD 1
-
-INDEXROOT=/mnt/galaxyIndices/genomes
-# fix that, the list of genomes below and SOURCE below to suit your needs. This is for GVL august 2013
-for genome in Arabidopsis_thaliana_TAIR9 ce10 danRer7 dm3 hg19 hg_g1k_v37 mm9 phiX rn5 sacCer2 sacCer3 xenTro2
-do
- SOURCE=${INDEXROOT}/${genome}/seq/
- TARGET=${INDEXROOT}/${genome}/rnastar
- mkdir -p $TARGET
- cd $TARGET
- /mnt/galaxy/tools/rnastar/2.3.0e/fubar/rgrnastar_203e/9425706f4e97/bin/STAR --runMode genomeGenerate --genomeDir $TARGET --genomeFastaFiles ${SOURCE}/${genome}.fa --runThreadN 2 genomeLoad=NoSharedMemory
-done
-
-echo "#add these lines TABS not spaces please to rnastar_indexes.loc in tool-data" > rnastar_indexes.loc
-for genome in Arabidopsis_thaliana_TAIR9 ce10 danRer7 dm3 hg19 hg_g1k_v37 mm9 phiX rn5 sacCer2 sacCer3 xenTro2
-do
- SOURCE=${INDEXROOT}/${genome}/seq/
- TARGET=${INDEXROOT}/${genome}/rnastar
- echo ${genome}'\t'${genome}'\t'${genome}'\t'/mnt/galaxyIndices/genomes/${genome}/rnastar >> rnastar_indexes.loc
-done
diff -r 1e40b977aa58 -r 9d72d0221c7f rgrnastar/rg_rnaStar.xml
--- a/rgrnastar/rg_rnaStar.xml Sat Aug 31 03:06:03 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,287 +0,0 @@
-
- Gapped-read mapper for RNA-seq data
-
- rnastar
- samtools
-
-
- ##
- ## Run STAR.
- ##
-
- STAR
- ## Can adjust this as appropriate for the system.
- --genomeLoad NoSharedMemory
-
- --genomeDir ${refGenomeSource.index.fields.path}
- --readFilesIn $input1
- #if $singlePaired.sPaired == "paired"
- $singlePaired.input2
- #end if
- --runThreadN 4
- #if $params.settingsType == "full":
- --chimSegmentMin $params.chim_segment_min
- --chimScoreMin $params.chim_score_min
- #end if
-
- ## may or may not need to generate SAM tags and handle non-canonicals for Cufflinks tools.
- ${outSAMstrandField} ${outFilterIntronMotifs} ${outSAMattributes}
-
- ;
-
- ##
- ## BAM conversion.
- ##
-
- ## Convert aligned reads.
- samtools view -Shb Aligned.out.sam | samtools sort - Aligned.out
-
- ## Convert chimeric reads.
- #if $params.settingsType == "full" and $params.chim_segment_min > 0:
- ; samtools view -Shb Chimeric.out.sam | samtools sort - Chimeric.out
- #end if
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- (params['settingsType'] == 'full' and params['chim_segment_min'] > 0)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- (params['settingsType'] == 'full' and params['chim_segment_min'] > 0)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-Runs the rna star gapped aligner. Suited to paired or single end rna-seq.
-
-8.2: SAM alignments
-
-The number of loci Nmap a read maps to (multi-mapping) is given by NH:i: field.
-The mapping quality MAPQ (column 5) is 255 for uniquely mapping reads, and int(-10*log10(1-1/Nmap)) for
-multi-mapping reads. This scheme is same as the one used by Tophat and is compatible with Cufflinks.
-
-For multi-mappers, all alignments except one are marked with 0x100 (secondary alignment) in the FLAG
-column 2. The un-marked alignment is either the best one (i.e. highest scoring), or is randomly selected from
-the alignments of equal quality.
-
-8.2.1: Standard SAM attributes
-With default --outSAMattributes Standard option the following SAM attributes will be generated:
-
-Column 12: NH: number of loci a read (pair) maps to
-Column 13: IH: alignment index for all alignments of a read
-Column 14: aS: alignment score
-Column 15: nM: number of mismatches (does not include indels)
-
-8.2.2: Extra SAM attrbiutes
-If --outSAMattributes All option is used, the following additional attributes will be output:
-
-Column 16: jM:B:c,M1,M2,... Intron motifs for all junctions (i.e. N in CIGAR):
-0: non-canonical; 1:GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT.
-
-If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.
-Column 17: jI:B:I,Start1,End1,Start2,End2,... Start and End of introns for all junctions (1-based)
-
-Note, that samtools 0.1.18 or later have to be used with these extra attributes.
-
-
-8.2.3: XS SAM strand attribute for Cufflinks/Cuffdiff
-
-If you have un-stranded RNA-seq data, and wish to run Cufflinks/Cuffdiff on STAR alignments, you will
-need to run STAR with --outSAMstrandField intronMotif option, which will generate the XS
-strand attribute for all alignments that contain splice junctions. The spliced alignments that have undefined
-strand (i.e. containing only non-canonical junctions) will be suppressed.
-
-If you have stranded RNA-seq data, you do not need to use any specific STAR options. Instead, you need
-to run Cufflinks with the library option --library-type options. For example, cufflinks with
-library-type fr-firststrand should be used for the “standard” dUTP protocol.
-This option has to be used only for Cufflinks runs and not for STAR runs.
-
-It is recommended to remove the non-canonical junctions for Cufflinks runs using –
-
---outFilterIntronMotifs RemoveNoncanonical
-filter out alignments that contain non-canonical junctions
-
-OR
-
---outFilterIntronMotifs RemoveNoncanonicalUnannotated
-filter out alignments that contain non-canonical unannotated junctions
-when using annotated splice junctions database. The annotated non-
-canonical junctions will be kept.
-
-
-**Attributions**
-
-Note that each component has its own license. Good luck with figuring out your obligations.
-
-rna_star - see the web site at rna_star_
-
-For details, please see the rna_starMS_
-"STAR: ultrafast universal RNA-seq aligner"
-A. Dobin et al, Bioinformatics 2012; doi: 10.1093/bioinformatics/bts635
-
-Galaxy_ (that's what you are using right now!) for gluing everything together
-
-Most of the work for this wrapper XML is Jeremy Goecks' original STAR_ wrapper
-
-Minor tweaks to output names to suit our downstream purposes, toolshed automated dependencies
-and odds and ends of other code and documentation comprising this tool was
-written by Ross Lazarus and that part is licensed_ the same way as other rgenetics artefacts
-
-.. _STAR: https://bitbucket.org/jgoecks/jeremys-code/raw/fa1930a689b8e2f6b59cc1706e5ba0ed8ad357be/galaxy/tool-wrappers/star.xml
-.. _licensed: http://creativecommons.org/licenses/by-nc-nd/3.0/
-.. _rna_star: http://code.google.com/p/rna-star/
-.. _rna_starMS: http://bioinformatics.oxfordjournals.org/content/29/1/15.full
-.. _Galaxy: http://getgalaxy.org
-
-
-
diff -r 1e40b977aa58 -r 9d72d0221c7f rgrnastar/tool-data/rnastar_indices.loc.sample
--- a/rgrnastar/tool-data/rnastar_indices.loc.sample Sat Aug 31 03:06:03 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of rna-star indexed sequences data files. You will
-#need to create these data files and then create a bowtie_indices.loc
-#file similar to this one (store it in this directory) that points to
-#the directories in which those files are stored. The bowtie2_indices.loc
-#file has this format (longer white space characters are TAB characters):
-#
-#
-#
-#hg19 hg19 hg19 full /mnt/galaxyIndices/genomes/hg19/rnastar
-
diff -r 1e40b977aa58 -r 9d72d0221c7f rgrnastar/tool_data_table_conf.xml.sample
--- a/rgrnastar/tool_data_table_conf.xml.sample Sat Aug 31 03:06:03 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-
-
-
- value, dbkey, name, path
-
-
-
diff -r 1e40b977aa58 -r 9d72d0221c7f rgrnastar/tool_dependencies.xml
--- a/rgrnastar/tool_dependencies.xml Sat Aug 31 03:06:03 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-
-
-
-
-
- https://rna-star.googlecode.com/files/STAR_2.3.0e.tgz
- make
- $INSTALL_DIR
- $INSTALL_DIR/bin
-
- STAR
- $INSTALL_DIR/bin
-
-
- $INSTALL_DIR/bin
-
-
-
-
-Installs the STAR binary for rnastar - see https://code.google.com/p/rna-star/
-
-
-
-
diff -r 1e40b977aa58 -r 9d72d0221c7f tool-data/rnastar_indices.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/rnastar_indices.loc.sample Tue Mar 04 18:40:08 2014 -0500
@@ -0,0 +1,11 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of rna-star indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie2_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#
+#
+#hg19 hg19 hg19 full /mnt/galaxyIndices/genomes/hg19/rnastar
+
diff -r 1e40b977aa58 -r 9d72d0221c7f tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Mar 04 18:40:08 2014 -0500
@@ -0,0 +1,7 @@
+
+
+
+ value, dbkey, name, path
+
+
+
diff -r 1e40b977aa58 -r 9d72d0221c7f tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Tue Mar 04 18:40:08 2014 -0500
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+Installs the STAR wrapper and dependency packages samtools and star - see https://code.google.com/p/rna-star/
+STAR is a very fast mapper for rna-seq giving junctions if the indexes are constructed with a junction library
+
+
+