Mercurial > repos > mingchen0919 > aurora_star
changeset 3:75a55316ff32 draft
add samtools
author | mingchen0919 |
---|---|
date | Sun, 04 Mar 2018 00:59:59 -0500 |
parents | b5a576fc2a7c |
children | 5fabc85515ff |
files | star.Rmd star.sh star.xml star_render.R |
diffstat | 4 files changed, 69 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/star.Rmd Sat Mar 03 12:21:18 2018 -0500 +++ b/star.Rmd Sun Mar 04 00:59:59 2018 -0500 @@ -41,3 +41,45 @@ tags$code(tags$pre(readChar(index_genome_sh, file.info(index_genome_sh)$size ))) ``` +# Running mapping jobs + +```{bash} +cd ${X_d} + +cat >temp.sh <<EOF +STAR \\ + --genomeDir ${X_d}/genomeDir \\ + --readFilesIn \\ + ${X_F} \\ + ${X_R} \\ + > /dev/null 2>&1 +EOF + +grep -v None temp.sh > mapping.sh + +# run mapping +sh mapping.sh +``` + + +```{r} +# display skewer job script +mapping_sh = paste0(opt$X_d, '/mapping.sh') +tags$code(tags$pre(readChar(mapping_sh, file.info(mapping_sh)$size ))) +``` + + +# SAM to sorted BAM + +```{bash} +cd ${X_d} +samtools view \ + -b Aligned.out.sam \ + -h \ + -o Aligned.out.bam + +samtools sort \ + -o Aligned.out.sorted.bam \ + Aligned.out.bam +``` +
--- a/star.sh Sat Mar 03 12:21:18 2018 -0500 +++ b/star.sh Sun Mar 04 00:59:59 2018 -0500 @@ -8,4 +8,7 @@ -A '$genomeFastaFiles' -B '$sjdbGTFfile' - -C '$sjdbOverhang' \ No newline at end of file + -C '$sjdbOverhang' + -F '$first_reads' + -R '$second_reads' + -S '$sorted_bam' \ No newline at end of file
--- a/star.xml Sat Mar 03 12:21:18 2018 -0500 +++ b/star.xml Sun Mar 04 00:59:59 2018 -0500 @@ -6,6 +6,7 @@ <requirement type="package" version="1.20.0">r-getopt</requirement> <requirement type="package" version="1.6">r-rmarkdown</requirement> <requirement type="package" version="2.5.4a">star</requirement> + <requirement type="package" version="1.7">samtools</requirement> </requirements> <stdio> <regex match="XXX" source="stderr" level="warning" @@ -21,7 +22,10 @@ -A '$genomeFastaFiles' -B '$sjdbGTFfile' - -C '$sjdbOverhang']]></command> + -C '$sjdbOverhang' + -F '$first_reads' + -R '$second_reads' + -S '$sorted_bam']]></command> <inputs> <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?"/> @@ -33,10 +37,13 @@ <param type="integer" name="sjdbOverhang" argument="--sjdbOverhang" label="sjdbOverhang" help="the length of the genomic sequence around the annotated junction to be used in constructing the splice junctions database. Ideally, this length should be equal to the ReadLength-1, where ReadLength is the length of the reads. For instance, for Illumina 2x100b paired-end reads, the ideal value is 100-1=99. In case of reads of varying length, the ideal value is max(ReadLength)-1. In most cases, a generic value of 100 will work as well as the ideal value." optional="False" value="100" min="1"/> + <param type="data" name="first_reads" label="First reads" optional="False" format="fastq,fastqsanger"/> + <param type="data" name="second_reads" label="Second reads" optional="True" format="fastq,fastqsanger"/> </inputs> <outputs> - <data format="html" name="report" label="tool report"/> + <data name="report" format="html" label="${tool.name} report" hidden="false"/> <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> + <data name="sorted_bam" format="bam" hidden="false"/> </outputs> <citations> <citation type="bibtex"><![CDATA[ @@ -58,5 +65,15 @@ publisher={CRC Press} } ]]></citation> + <citation type="bibtex"><![CDATA[@article{dobin2013star, + title={STAR: ultrafast universal RNA-seq aligner}, + author={Dobin, Alexander and Davis, Carrie A and Schlesinger, Felix and Drenkow, Jorg and Zaleski, Chris and Jha, Sonali and Batut, Philippe and Chaisson, Mark and Gingeras, Thomas R}, + journal={Bioinformatics}, + volume={29}, + number={1}, + pages={15--21}, + year={2013}, + publisher={Oxford University Press} +}]]></citation> </citations> </tool>
--- a/star_render.R Sat Mar 03 12:21:18 2018 -0500 +++ b/star_render.R Sun Mar 04 00:59:59 2018 -0500 @@ -23,12 +23,12 @@ spec_matrix = as.matrix( data.frame(stringsAsFactors=FALSE, long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_A", "X_B", - "X_C"), - short_flags = c("e", "o", "d", "s", "t", "A", "B", "C"), - argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), + "X_C", "X_F", "X_R", "X_S"), + short_flags = c("e", "o", "d", "s", "t", "A", "B", "C", "F", "R", "S"), + argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), data_type_flags = c("character", "character", "character", "character", "character", "character", "character", - "character") + "character", "character", "character", "character") ) ) opt = getopt(spec_matrix)