Mercurial > repos > mingchen0919 > aurora_htseq
changeset 2:db9da9716914 draft
add r-dt
author | mingchen0919 |
---|---|
date | Wed, 07 Mar 2018 00:12:55 -0500 |
parents | cc33706a1d4b |
children | 1545e24c9ba7 |
files | htseq_count.Rmd htseq_count.sh htseq_count.xml htseq_count_render.R spec.txt |
diffstat | 5 files changed, 65 insertions(+), 23 deletions(-) [+] |
line wrap: on
line diff
--- a/htseq_count.Rmd Tue Mar 06 17:38:58 2018 -0500 +++ b/htseq_count.Rmd Wed Mar 07 00:12:55 2018 -0500 @@ -11,15 +11,47 @@ ``` -# Code for computational analysis +## Run htseq-count + +```{bash} +cd ${X_d} -```{r 'step 1'} +cat >htseq-count.sh <<EOF + htseq-count \\ + $(echo ${X_A} | sed 's/,/ /g') \\ + ${X_G} \\ + -f ${X_f} \\ + -r ${X_r} \\ + -s ${X_S} \\ + -a ${X_a} \\ + -t ${X_T} \\ + -i ${X_i} \\ + -m ${X_m} > counts.txt + + grep -v '__no_feature\|__ambiguous\|__too_low_aQual\|__not_aligned\|__alignment_not_unique' counts.txt > ${X_c} +EOF +``` + + +```{r} +# display htseq-count job script +htseq_count_sh = paste0(opt$X_d, '/htseq-count.sh') +tags$code(tags$pre(readChar(htseq_count_sh, file.info(htseq_count_sh)$size ))) opt ``` +```{bash} +cd ${X_d} +sh htseq-count.sh +``` -```{r 'ste[ 2'} +## Counts +```{r} +count_data = read.table(paste0(opt$X_d, '/counts.txt'), row.names = 1) +sample_names = trimws(strsplit(opt$X_B, ',')[[1]]) +colnames(count_data) = rep(sample_names, length = ncol(count_data)) +DT::datatable(count_data, caption = "htseq counts for each sample") ```
--- a/htseq_count.sh Tue Mar 06 17:38:58 2018 -0500 +++ b/htseq_count.sh Wed Mar 07 00:12:55 2018 -0500 @@ -1,4 +1,4 @@ -Rscript '${__tool_directory__}/aurora_htseq_count.R' +Rscript '${__tool_directory__}/htseq_count_render.R' -e $echo -o $report @@ -6,11 +6,14 @@ -s $sink_message -t '${__tool_directory__}' + -A '$alignment_files' + -B '$sample_names' + -G $gff -f $format -r $order -S $stranded -a $minaqual -T $feature_type -i $idattr - -I $additional_attr - -m $mode \ No newline at end of file + -m $mode + -c $count \ No newline at end of file
--- a/htseq_count.xml Tue Mar 06 17:38:58 2018 -0500 +++ b/htseq_count.xml Wed Mar 07 00:12:55 2018 -0500 @@ -10,7 +10,7 @@ <regex match="XXX" source="stderr" level="warning" description="Check the warnings_and_errors.txt file for more details."/> </stdio> - <command><![CDATA[Rscript '${__tool_directory__}/aurora_htseq_count.R' + <command><![CDATA[Rscript '${__tool_directory__}/htseq_count_render.R' -e $echo -o $report @@ -18,23 +18,30 @@ -s $sink_message -t '${__tool_directory__}' + -A '$alignment_files' + -B '$sample_names' + -G $gff -f $format -r $order -S $stranded -a $minaqual -T $feature_type -i $idattr - -I $additional_attr - -m $mode]]></command> + -m $mode + -c $count]]></command> <inputs> <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?"/> - <param type="data" name="alignment_file" label="Alignment file (SAM/BAM)" optional="False" format="sam,bam"/> - <param type="data" name="gff" label="GFF file" optional="False" format="gff, gff3"/> + <param type="data" name="alignment_files" label="Alignment file (SAM/BAM)" optional="False" format="sam,bam" + multiple="True"/> + <param type="text" name="sample_names" label="Sample names" + help="sample names for each input SAM/BAM file. Use comma to separate multiple names, for example, sample_1, sample_2, sample_3. The number of sample names should be the same as the number of input SAM/BAM files." + optional="False" size="5x25"/> <param type="select" name="format" argument="-f" label="Input data format" optional="False" multiple="False"> <option value="sam" selected="false">SAM</option> <option value="bam" selected="false">BAM</option> </param> + <param type="data" name="gff" label="GFF file" optional="False" format="gff, gff3"/> <param type="select" name="order" argument="-r" label="Order" help="Use this option to indicate how the input data has been sorted. The default is name. " optional="False" multiple="False"> @@ -55,9 +62,6 @@ <param type="text" name="idattr" argument="-i" label="ID attribute" help="GFF attribute to be used as feature ID. Several GFF lines with the same feature ID will be considered as parts of the same feature. The feature ID is used to identity the counts in the output table. The default, suitable for RNA-Seq analysis using an Ensembl GTF file, is gene_id." optional="False" value="gene_id"/> - <param type="text" name="additional_attr" argument="--additional-attr" label="Additional attributes" - help="Additional feature attributes, which will be printed as an additional column after the primary attribute column but before the counts column(s). The default is none, a suitable value to get gene names using an Ensembl GTF file is gene_name." - optional="True"/> <param type="select" name="mode" argument="-m" label="Mode" help="Mode to handle reads overlapping more than one feature. Possible values for <mode> are union, intersection-strict and intersection-nonempty (default: union)" optional="False" multiple="False"> @@ -67,7 +71,7 @@ </param> </inputs> <outputs> - <data name="Aurora htseq" format="html" label="Aurora htseq on ${on_string}" hidden="false"/> + <data name="report" format="html" label="Aurora htseq on ${on_string}" hidden="false"/> <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> <data name="count" format="txt" label="Aurora htseq-count on ${on_string}" hidden="false"/> </outputs>
--- a/htseq_count_render.R Tue Mar 06 17:38:58 2018 -0500 +++ b/htseq_count_render.R Wed Mar 07 00:12:55 2018 -0500 @@ -10,6 +10,7 @@ library(getopt) library(rmarkdown) +library(htmltools) #------------------------------------------------ @@ -21,15 +22,16 @@ spec_matrix = as.matrix( data.frame(stringsAsFactors=FALSE, - long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_f", "X_r", - "X_S", "X_a", "X_T", "X_i", "X_I", "X_m"), - short_flags = c("e", "o", "d", "s", "t", "f", "r", "S", "a", "T", - "i", "I", "m"), - argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), + long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_A", "X_B", "X_G", + "X_f", "X_r", "X_S", "X_a", "X_T", "X_i", "X_m", "X_c"), + short_flags = c("e", "o", "d", "s", "t", "A", "B", "G", "f", "r", "S", + "a", "T", "i", "m", "c"), + argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, + 1L, 1L, 1L), data_type_flags = c("character", "character", "character", "character", - "character", "character", "character", + "character", "character", "character", "character", "character", "character", "character", "character", - "character", "character") + "character", "character", "character", "character") ) ) opt = getopt(spec_matrix)
--- a/spec.txt Tue Mar 06 17:38:58 2018 -0500 +++ b/spec.txt Wed Mar 07 00:12:55 2018 -0500 @@ -4,11 +4,12 @@ X_d,d,1,character X_s,s,1,character X_t,t,1,character +X_A,A,1,character +X_G,G,1,character X_f,f,1,character X_r,r,1,character X_S,S,1,character X_a,a,1,character X_T,T,1,character X_i,i,1,character -X_I,I,1,character X_m,m,1,character