Mercurial > repos > mingchen0919 > aurora_star

--- a/star.Rmd	Sat Mar 03 12:21:18 2018 -0500
+++ b/star.Rmd	Sun Mar 04 00:59:59 2018 -0500
@@ -41,3 +41,45 @@
 tags$code(tags$pre(readChar(index_genome_sh, file.info(index_genome_sh)$size )))
 ```

+# Running mapping jobs
+
+```{bash}
+cd ${X_d}
+
+cat >temp.sh <<EOF
+STAR \\
+  --genomeDir ${X_d}/genomeDir \\
+  --readFilesIn \\
+  ${X_F} \\
+  ${X_R} \\
+  > /dev/null 2>&1
+EOF
+
+grep -v None temp.sh > mapping.sh
+
+# run mapping
+sh mapping.sh
+```
+
+
+```{r}
+# display skewer job script
+mapping_sh = paste0(opt$X_d, '/mapping.sh')
+tags$code(tags$pre(readChar(mapping_sh, file.info(mapping_sh)$size )))
+```
+
+
+# SAM to sorted BAM
+
+```{bash}
+cd ${X_d}
+samtools view \
+  -b Aligned.out.sam \
+  -h \
+  -o Aligned.out.bam
+
+samtools sort \
+  -o Aligned.out.sorted.bam \
+  Aligned.out.bam
+```
+
--- a/star.sh	Sat Mar 03 12:21:18 2018 -0500
+++ b/star.sh	Sun Mar 04 00:59:59 2018 -0500
@@ -8,4 +8,7 @@

     -A '$genomeFastaFiles'
     -B '$sjdbGTFfile'
-    -C '$sjdbOverhang'
\ No newline at end of file
+    -C '$sjdbOverhang'
+    -F '$first_reads'
+    -R '$second_reads'
+    -S '$sorted_bam'
\ No newline at end of file
--- a/star.xml	Sat Mar 03 12:21:18 2018 -0500
+++ b/star.xml	Sun Mar 04 00:59:59 2018 -0500
@@ -6,6 +6,7 @@
         <requirement type="package" version="1.20.0">r-getopt</requirement>
         <requirement type="package" version="1.6">r-rmarkdown</requirement>
         <requirement type="package" version="2.5.4a">star</requirement>
+        <requirement type="package" version="1.7">samtools</requirement>
     </requirements>
     <stdio>
         <regex match="XXX" source="stderr" level="warning"
@@ -21,7 +22,10 @@

     -A '$genomeFastaFiles'
     -B '$sjdbGTFfile'
-    -C '$sjdbOverhang']]></command>
+    -C '$sjdbOverhang'
+    -F '$first_reads'
+    -R '$second_reads'
+    -S '$sorted_bam']]></command>
     <inputs>
         <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
                label="Display analysis code in report?"/>
@@ -33,10 +37,13 @@
         <param type="integer" name="sjdbOverhang" argument="--sjdbOverhang" label="sjdbOverhang"
                help="the length of the genomic sequence around the annotated junction to be used in constructing the splice junctions database. Ideally, this length should be equal to the ReadLength-1, where ReadLength is the length of the reads. For instance, for Illumina 2x100b paired-end reads, the ideal value is 100-1=99. In case of reads of varying length, the ideal value is max(ReadLength)-1. In most cases, a generic value of 100 will work as well as the ideal value."
                optional="False" value="100" min="1"/>
+        <param type="data" name="first_reads" label="First reads" optional="False" format="fastq,fastqsanger"/>
+        <param type="data" name="second_reads" label="Second reads" optional="True" format="fastq,fastqsanger"/>
     </inputs>
     <outputs>
-        <data format="html" name="report" label="tool report"/>
+        <data name="report" format="html" label="${tool.name} report" hidden="false"/>
         <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
+        <data name="sorted_bam" format="bam" hidden="false"/>
     </outputs>
     <citations>
         <citation type="bibtex"><![CDATA[
@@ -58,5 +65,15 @@
             publisher={CRC Press}
             }
         ]]></citation>
+        <citation type="bibtex"><![CDATA[@article{dobin2013star,
+  title={STAR: ultrafast universal RNA-seq aligner},
+  author={Dobin, Alexander and Davis, Carrie A and Schlesinger, Felix and Drenkow, Jorg and Zaleski, Chris and Jha, Sonali and Batut, Philippe and Chaisson, Mark and Gingeras, Thomas R},
+  journal={Bioinformatics},
+  volume={29},
+  number={1},
+  pages={15--21},
+  year={2013},
+  publisher={Oxford University Press}
+}]]></citation>
     </citations>
 </tool>
--- a/star_render.R	Sat Mar 03 12:21:18 2018 -0500
+++ b/star_render.R	Sun Mar 04 00:59:59 2018 -0500
@@ -23,12 +23,12 @@
 spec_matrix = as.matrix(
   data.frame(stringsAsFactors=FALSE,
               long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_A", "X_B",
-                             "X_C"),
-             short_flags = c("e", "o", "d", "s", "t", "A", "B", "C"),
-     argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
+                             "X_C", "X_F", "X_R", "X_S"),
+             short_flags = c("e", "o", "d", "s", "t", "A", "B", "C", "F", "R", "S"),
+     argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
          data_type_flags = c("character", "character", "character", "character",
                              "character", "character", "character",
-                             "character")
+                             "character", "character", "character", "character")
   )
 )
 opt = getopt(spec_matrix)