Mercurial > repos > mingchen0919 > aurora_htseq_site
changeset 1:9be3a8754fb3 draft
initial
author | mingchen0919 |
---|---|
date | Wed, 07 Mar 2018 08:51:53 -0500 |
parents | 0bfeaf18a871 |
children | fad3864136c9 |
files | htseq_count.Rmd htseq_count.sh htseq_count.xml htseq_count_index.Rmd htseq_count_render.R htseq_count_site.Rmd htseq_count_site.sh htseq_count_site.xml htseq_count_site_index.Rmd |
diffstat | 9 files changed, 216 insertions(+), 215 deletions(-) [+] |
line wrap: on
line diff
--- a/htseq_count.Rmd Wed Mar 07 08:45:02 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ ---- -title: 'htseq-count analysis' -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = as.logical(opt$X_e), - error = TRUE -) -``` - - -## Run htseq-count - -```{bash} -cd ${X_d} - -cat >htseq-count.sh <<EOF - htseq-count \\ - $(echo ${X_A} | sed 's/,/ /g') \\ - ${X_G} \\ - -f ${X_f} \\ - -r ${X_r} \\ - -s ${X_S} \\ - -a ${X_a} \\ - -t ${X_T} \\ - -i ${X_i} \\ - -m ${X_m} > counts.txt - - grep -v '__no_feature\|__ambiguous\|__too_low_aQual\|__not_aligned\|__alignment_not_unique' counts.txt > ${X_c} -EOF -``` - - -```{r} -# display htseq-count job script -htseq_count_sh = paste0(opt$X_d, '/htseq-count.sh') -tags$code(tags$pre(readChar(htseq_count_sh, file.info(htseq_count_sh)$size ))) -``` - -```{bash} -cd ${X_d} -sh htseq-count.sh -``` - -## Counts - -```{r} -count_data = read.table(paste0(opt$X_d, '/counts.txt'), row.names = 1) -sample_names = trimws(strsplit(opt$X_B, ',')[[1]]) -colnames(count_data) = rep(sample_names, length = ncol(count_data)) -DT::datatable(count_data, caption = "htseq counts for each sample") -``` - -```{r} -# save count data as an R object -save(count_data, file = paste0(opt$X_d, '/counts.RData')) -``` - -```{bash} -cp ${X_d}/counts.RData ${X_O} -``` - -
--- a/htseq_count.sh Wed Mar 07 08:45:02 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -Rscript '${__tool_directory__}/htseq_count_render.R' - - -e $echo - -o $report - -d $report.files_path - -s $sink_message - -t '${__tool_directory__}' - - -A '$alignment_files' - -B '$sample_names' - -G $gff - -f $format - -r $order - -S $stranded - -a $minaqual - -T $feature_type - -i $idattr - -m $mode - -c $count - -O $count_rdata \ No newline at end of file
--- a/htseq_count.xml Wed Mar 07 08:45:02 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ -<tool id="aurora_htseq_size" name="Aurora htseq site" version="1.0.0"> - <description>Counting reads in features.</description> - <requirements> - <requirement type="package" version="1.15.0.6-0">pandoc</requirement> - <requirement type="package" version="1.20.0">r-getopt</requirement> - <requirement type="package" version="1.6">r-rmarkdown</requirement> - <requirement type="package" version="0.9.1">htseq</requirement> - <requirement type="package" version="0.2">r-dt</requirement> - </requirements> - <stdio> - <regex match="XXX" source="stderr" level="warning" - description="Check the warnings_and_errors.txt file for more details."/> - </stdio> - <command><![CDATA[Rscript '${__tool_directory__}/htseq_count_site_render.R' - - -e $echo - -o $report - -d $report.files_path - -s $sink_message - -t '${__tool_directory__}' - - -A '$alignment_files' - -B '$sample_names' - -G $gff - -f $format - -r $order - -S $stranded - -a $minaqual - -T $feature_type - -i $idattr - -m $mode - -c $count - -O $count_rdata]]></command> - <inputs> - <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" - label="Display analysis code in report?"/> - <param type="data" name="alignment_files" label="Alignment file (SAM/BAM)" optional="False" format="sam,bam" - multiple="True"/> - <param type="text" name="sample_names" label="Sample names" - help="sample names for each input SAM/BAM file. Use comma to separate multiple names, for example, sample_1, sample_2, sample_3. The number of sample names should be the same as the number of input SAM/BAM files." - optional="False" size="5x25"/> - <param type="select" name="format" argument="-f" label="Input data format" optional="False" multiple="False"> - <option value="sam" selected="false">SAM</option> - <option value="bam" selected="true">BAM</option> - </param> - <param type="data" name="gff" label="GFF file" optional="False" format="gff, gff3"/> - <param type="select" name="order" argument="-r" label="Order" - help="Use this option to indicate how the input data has been sorted. The default is name. " - optional="False" multiple="False"> - <option value="name" selected="false">Name</option> - <option value="pos" selected="false">Position</option> - </param> - <param type="select" name="stranded" argument="-s" label="Stranded" optional="False" multiple="False"> - <option value="yes" selected="true">Yes</option> - <option value="no" selected="false">No</option> - <option value="reverse" selected="false">Reverse</option> - </param> - <param type="integer" name="minaqual" argument="-a" label="Alignment quality " - help="Skip all reads with alignment quality lower than the given minimum value (default: 10)" - optional="False" value="10" min="0"/> - <param type="text" name="feature_type" argument="-t" label="Feature type" - help="Feature type (3rd column in GFF file) to be used, all features of other type are ignored (default, suitable for RNA-Seq analysis using an Ensembl GTF file: exon)" - optional="False" value="exon"/> - <param type="text" name="idattr" argument="-i" label="ID attribute" - help="GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. The default, suitable for RNA-Seq analysis using an Ensembl GTF file, is gene_id." - optional="False" value="gene_id"/> - <param type="select" name="mode" argument="-m" label="Mode" - help="Mode to handle reads overlapping more than one feature. Possible values for <mode> are union, intersection-strict and intersection-nonempty (default: union)" - optional="False" multiple="False"> - <option value="union" selected="true">Union</option> - <option value="intersection-strict" selected="false">Intersection (strict)</option> - <option value="intersection-nonempty" selected="false">Intersection (nonempty)</option> - </param> - </inputs> - <outputs> - <data name="report" format="html" label="Aurora htseq-count site on ${on_string}" hidden="false"/> - <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> - <data name="count" format="txt" label="Aurora htseq-count site on ${on_string}" hidden="false"/> - <data name="count_rdata" format="rdata" hidden="false"/> - </outputs> - <citations> - <citation type="bibtex"><![CDATA[ - @article{allaire2016rmarkdown, - title={rmarkdown: Dynamic Documents for R, 2016}, - author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff - and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, - journal={R package version 0.9}, - volume={6}, - year={2016} - } - ]]></citation> - <citation type="bibtex"><![CDATA[ - @book{xie2015dynamic, - title={Dynamic Documents with R and knitr}, - author={Xie, Yihui}, - volume={29}, - year={2015}, - publisher={CRC Press} - } - ]]></citation> - <citation type="bibtex"><![CDATA[@article{anders2015htseq, - title={HTSeq—a Python framework to work with high-throughput sequencing data}, - author={Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang}, - journal={Bioinformatics}, - volume={31}, - number={2}, - pages={166--169}, - year={2015}, - publisher={Oxford University Press} -}]]></citation> - </citations> -</tool>
--- a/htseq_count_index.Rmd Wed Mar 07 08:45:02 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ ---- -title: "htseq-count Report" -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set(echo = TRUE, error = TRUE) -``` - -## References - -Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob (2016). rmarkdown: Dynamic Documents for R, 2016. In R package version 0.9, 6. - -Xie, Yihui (2015). Dynamic Documents with R and knitr, CRC Press, Vol.29. - -Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang (2015). HTSeq—a Python framework to work with high-throughput sequencing data. In Bioinformatics, 31 (2), pp. 166--169. \ No newline at end of file
--- a/htseq_count_render.R Wed Mar 07 08:45:02 2018 -0500 +++ b/htseq_count_render.R Wed Mar 07 08:51:53 2018 -0500 @@ -10,6 +10,7 @@ library(getopt) library(rmarkdown) +library(htmltools) #------------------------------------------------ @@ -58,9 +59,9 @@ #-----------------render site-------------- # copy site generating materials into OUTPUT_DIR dir.create(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE) -system(paste0('cp -r ', TOOL_DIR, '/htseq_count.Rmd ', OUTPUT_DIR, '/site_generator/htseq_count.Rmdd')) +system(paste0('cp -r ', TOOL_DIR, '/htseq_count_site.Rmd ', OUTPUT_DIR, '/site_generator/htseq_count_site.Rmd')) system(paste0('cp -r ', TOOL_DIR, '/htseq_count_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml')) -system(paste0('cp -r ', TOOL_DIR, '/htseq_count_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd')) +system(paste0('cp -r ', TOOL_DIR, '/htseq_count_site_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd')) # render site to OUTPUT_DIR/_site, this is configured in the "_site.yml" file render_site(input = paste0(OUTPUT_DIR, '/site_generator')) # remove site generating materials from output associated directory
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_count_site.Rmd Wed Mar 07 08:51:53 2018 -0500 @@ -0,0 +1,65 @@ +--- +title: 'htseq-count analysis' +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + + +## Run htseq-count + +```{bash} +cd ${X_d} + +cat >htseq-count.sh <<EOF + htseq-count \\ + $(echo ${X_A} | sed 's/,/ /g') \\ + ${X_G} \\ + -f ${X_f} \\ + -r ${X_r} \\ + -s ${X_S} \\ + -a ${X_a} \\ + -t ${X_T} \\ + -i ${X_i} \\ + -m ${X_m} > counts.txt + + grep -v '__no_feature\|__ambiguous\|__too_low_aQual\|__not_aligned\|__alignment_not_unique' counts.txt > ${X_c} +EOF +``` + + +```{r} +# display htseq-count job script +htseq_count_sh = paste0(opt$X_d, '/htseq-count.sh') +tags$code(tags$pre(readChar(htseq_count_sh, file.info(htseq_count_sh)$size ))) +``` + +```{bash} +cd ${X_d} +sh htseq-count.sh +``` + +## Counts + +```{r} +count_data = read.table(paste0(opt$X_d, '/counts.txt'), row.names = 1) +sample_names = trimws(strsplit(opt$X_B, ',')[[1]]) +colnames(count_data) = rep(sample_names, length = ncol(count_data)) +DT::datatable(count_data, caption = "htseq counts for each sample") +``` + +```{r} +# save count data as an R object +save(count_data, file = paste0(opt$X_d, '/counts.RData')) +``` + +```{bash} +cp ${X_d}/counts.RData ${X_O} +``` + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_count_site.sh Wed Mar 07 08:51:53 2018 -0500 @@ -0,0 +1,20 @@ +Rscript '${__tool_directory__}/htseq_count_render.R' + + -e $echo + -o $report + -d $report.files_path + -s $sink_message + -t '${__tool_directory__}' + + -A '$alignment_files' + -B '$sample_names' + -G $gff + -f $format + -r $order + -S $stranded + -a $minaqual + -T $feature_type + -i $idattr + -m $mode + -c $count + -O $count_rdata \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_count_site.xml Wed Mar 07 08:51:53 2018 -0500 @@ -0,0 +1,112 @@ +<tool id="aurora_htseq_size" name="Aurora htseq site" version="1.0.0"> + <description>Counting reads in features.</description> + <requirements> + <requirement type="package" version="1.15.0.6-0">pandoc</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.6">r-rmarkdown</requirement> + <requirement type="package" version="0.9.1">htseq</requirement> + <requirement type="package" version="0.2">r-dt</requirement> + </requirements> + <stdio> + <regex match="XXX" source="stderr" level="warning" + description="Check the warnings_and_errors.txt file for more details."/> + </stdio> + <command><![CDATA[Rscript '${__tool_directory__}/htseq_count_site_render.R' + + -e $echo + -o $report + -d $report.files_path + -s $sink_message + -t '${__tool_directory__}' + + -A '$alignment_files' + -B '$sample_names' + -G $gff + -f $format + -r $order + -S $stranded + -a $minaqual + -T $feature_type + -i $idattr + -m $mode + -c $count + -O $count_rdata]]></command> + <inputs> + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" + label="Display analysis code in report?"/> + <param type="data" name="alignment_files" label="Alignment file (SAM/BAM)" optional="False" format="sam,bam" + multiple="True"/> + <param type="text" name="sample_names" label="Sample names" + help="sample names for each input SAM/BAM file. Use comma to separate multiple names, for example, sample_1, sample_2, sample_3. The number of sample names should be the same as the number of input SAM/BAM files." + optional="False" size="5x25"/> + <param type="select" name="format" argument="-f" label="Input data format" optional="False" multiple="False"> + <option value="sam" selected="false">SAM</option> + <option value="bam" selected="true">BAM</option> + </param> + <param type="data" name="gff" label="GFF file" optional="False" format="gff, gff3"/> + <param type="select" name="order" argument="-r" label="Order" + help="Use this option to indicate how the input data has been sorted. The default is name. " + optional="False" multiple="False"> + <option value="name" selected="false">Name</option> + <option value="pos" selected="false">Position</option> + </param> + <param type="select" name="stranded" argument="-s" label="Stranded" optional="False" multiple="False"> + <option value="yes" selected="true">Yes</option> + <option value="no" selected="false">No</option> + <option value="reverse" selected="false">Reverse</option> + </param> + <param type="integer" name="minaqual" argument="-a" label="Alignment quality " + help="Skip all reads with alignment quality lower than the given minimum value (default: 10)" + optional="False" value="10" min="0"/> + <param type="text" name="feature_type" argument="-t" label="Feature type" + help="Feature type (3rd column in GFF file) to be used, all features of other type are ignored (default, suitable for RNA-Seq analysis using an Ensembl GTF file: exon)" + optional="False" value="exon"/> + <param type="text" name="idattr" argument="-i" label="ID attribute" + help="GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. The default, suitable for RNA-Seq analysis using an Ensembl GTF file, is gene_id." + optional="False" value="gene_id"/> + <param type="select" name="mode" argument="-m" label="Mode" + help="Mode to handle reads overlapping more than one feature. Possible values for <mode> are union, intersection-strict and intersection-nonempty (default: union)" + optional="False" multiple="False"> + <option value="union" selected="true">Union</option> + <option value="intersection-strict" selected="false">Intersection (strict)</option> + <option value="intersection-nonempty" selected="false">Intersection (nonempty)</option> + </param> + </inputs> + <outputs> + <data name="report" format="html" label="Aurora htseq-count site on ${on_string}" hidden="false"/> + <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> + <data name="count" format="txt" label="Aurora htseq-count site on ${on_string}" hidden="false"/> + <data name="count_rdata" format="rdata" hidden="false"/> + </outputs> + <citations> + <citation type="bibtex"><![CDATA[ + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + ]]></citation> + <citation type="bibtex"><![CDATA[ + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + ]]></citation> + <citation type="bibtex"><![CDATA[@article{anders2015htseq, + title={HTSeq—a Python framework to work with high-throughput sequencing data}, + author={Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang}, + journal={Bioinformatics}, + volume={31}, + number={2}, + pages={166--169}, + year={2015}, + publisher={Oxford University Press} +}]]></citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_count_site_index.Rmd Wed Mar 07 08:51:53 2018 -0500 @@ -0,0 +1,16 @@ +--- +title: "htseq-count Report" +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(echo = TRUE, error = TRUE) +``` + +## References + +Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob (2016). rmarkdown: Dynamic Documents for R, 2016. In R package version 0.9, 6. + +Xie, Yihui (2015). Dynamic Documents with R and knitr, CRC Press, Vol.29. + +Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang (2015). HTSeq—a Python framework to work with high-throughput sequencing data. In Bioinformatics, 31 (2), pp. 166--169. \ No newline at end of file