comparison nastiseq.xml @ 0:a68b3c34f2b5 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
author bgruening
date Tue, 21 Feb 2017 11:11:02 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a68b3c34f2b5
1 <tool id="nastiseq" name="NASTIseq" version="1.0">
2 <description>Identify cis-NATs using ssRNA-seq</description>
3
4 <requirements>
5 <requirement type="package" version="1.0">r-nastiseq</requirement>
6 </requirements>
7 <stdio>
8 <regex match="Execution halted"
9 source="both"
10 level="fatal"
11 description="Execution halted." />
12 <regex match="Error in"
13 source="both"
14 level="fatal"
15 description="An undefined error occured, please check your intput carefully and contact your administrator." />
16 </stdio>
17 <command>
18 <![CDATA[
19 Rscript '$script_file'
20 ]]>
21 </command>
22 <configfiles>
23 <configfile name="script_file">
24 library(NASTIseq)
25
26 genepos = read.delim("${annotation}", header=FALSE, comment.char="#")
27 colnames(genepos) = c("seqname", "source", "feature", "start", "end", "score", "strand", "frame", "attributes")
28 genepos = subset(genepos, feature=="gene")
29
30 get_id = function(attri){
31 gene_info = strsplit(attri, ";")[[1]][1]
32 gene_id = strsplit(gene_info, " ")[[1]][2]
33 gene_id = gsub('\"', '', gene_id)
34 return(gene_id)
35 }
36
37 genepos\$attributes = as.character(lapply(as.character(genepos\$attributes), get_id))
38
39 pospairs = read.table("${positive_pair}", sep = "\t", as.is = TRUE)
40
41 smat = as.matrix(read.table("${count_smt}", sep = "\t", row.names = 1))
42
43 asmat = as.matrix(read.table("${count_asmt}", sep = "\t", row.names = 1))
44
45 WRscore = getNASTIscore(smat, asmat)
46
47 negpairs = getnegativepairs(genepos)
48
49 WRpred = NASTIpredict(smat,asmat, pospairs, negpairs)
50
51 WRpred_rocr = prediction(WRpred\$predictions,WRpred\$labels)
52
53 thr = defineFDR(WRpred_rocr,0.05)
54
55 WR_names = FindNATs(WRscore, thr, pospairs, genepos)
56
57 write.table(WR_names\$newpairs, file = "output_newpairs.tsv", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)
58
59 write.table(WR_names\$neworphan, file = "output_neworphan.tsv", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)
60
61 </configfile>
62 </configfiles>
63 <inputs>
64 <param name="annotation" type="data" format="gtf" label="Annotation file"
65 help="The gene ids should be in agreement in the files of annotation, known pairs and read count.">
66 </param>
67 <param name="positive_pair" type="data" format="tabular" label="Known pairs"
68 help="A known pair of cis-natural antisense transcripts">
69 </param>
70 <param name="count_smt" type="data" format="tabular" label="Read count of sense strand"
71 help="">
72 </param>
73 <param name="count_asmt" type="data" format="tabular" label="Read count of antisense strand"
74 help="">
75 </param>
76 </inputs>
77 <outputs>
78 <data name="newpairs" format="tabular"
79 from_work_dir="output_newpairs.tsv"
80 label="${tool.name} on ${on_string}: New pairs">
81 </data>
82 <data name="neworphan" format="tabular"
83 from_work_dir="output_neworphan.tsv"
84 label="${tool.name} on ${on_string}: New orphans">
85 </data>
86 </outputs>
87 <tests>
88 <test>
89 <param name="annotation" value="input_TAIR10_annotation.gtf" ftype="gtf" />
90 <param name="positive_pair" value="input_positive_pair.tsv" ftype="tabular" />
91 <param name="count_smt" value="input_read_count_smt.tsv" ftype="tabular" />
92 <param name="count_asmt" value="input_read_count_asmt.tsv" ftype="tabular" />
93 <output name="newpairs" file="output_newpairs.tsv" ftype="tabular"/>
94 <output name="neworphan" file="output_neworphan.tsv" ftype="tabular"/>
95 </test>
96 </tests>
97 <help>
98 <![CDATA[
99 .. class:: infomark
100
101 **What it does**
102
103 Pairs of RNA molecules transcribed from partially or entirely complementary loci
104 are called cis-natural antisense transcripts (cis-NATs),
105 and they play key roles in the regulation of gene expression in many organisms.
106 A promising experimental tool for profiling sense and antisense transcription
107 is strand-specific RNA sequencing (ssRNA-seq). `NASTIseq`_ is to identify
108 cis-NATs using ssRNA-seq. `NASTIseq`_ is based on model comparison that incorporates
109 the inherent variable efficiency of generating perfectly strand-specific libraries.
110 Applying the method to the ssRNA-seq data from whole root and
111 cell-type specific Arabidopsis libraries confirmed most of
112 the known cis-NAT pairs and identified hundreds of additional cis-NAT pairs.
113
114 .. _NASTIseq: https://ohlerlab.mdc-berlin.de/software/NASTIseq_104/
115
116 .. class:: infomark
117
118 **Inputs**
119
120 ``Annotation file``: the annotation in `gtf`_ format
121
122 .. _gtf: http://www.ensembl.org/info/website/upload/gff.html
123
124 ``Known pairs``: a table of two column matrix, with each row contains the
125 names of a known pair of cis-natural antisense transcripts. Example as following::
126
127 AT2G46910 AT2G46915
128 AT3G12250 AT3G12260
129 AT5G50315 AT5G50320
130
131 ``Read count of sense strand``: a table of N by M matrix of read count for reads that mapped
132 to the sense strand. N is the number of gene loci. M is the
133 number of biological replicates in the sample. Each
134 rowname must be a unique locus name. Example as following::
135
136 AT1G38440 0 2 0
137 AT1G43171 2 8 1
138 AT1G67670 3 7 0
139
140 ``Read count of antisense strand``: a table of N by M matrix of read count for reads that mapped
141 to the antisense strand. N is the number of gene loci. M is the
142 number of biological replicates in the sample. Each
143 rowname must be a unique locus name. Example as following::
144
145 AT1G38440 0 0 0
146 AT1G43171 0 0 0
147 AT1G67670 0 0 2
148
149 Read counts can be obtained using popular software such as `RSamtools`_.
150
151 .. _RSamtools: http://bioconductor.org/packages/release/bioc/html/Rsamtools.html
152
153 .. class:: infomark
154
155 **Outputs**
156
157 ``New pairs``: a table of two column matrix, with each row contains the
158 names of a new pair of cis-natural antisense transcripts. Example as following::
159
160 AT1G76630 AT1G76640
161 AT2G06045 AT2G06050
162 AT4G30100 AT4G30110
163
164
165 ``New orphans``: a list of new orphan transcripts. Example as following::
166
167 ATMG00030
168 AT5G49440
169 AT2G11240
170 ]]>
171 </help>
172 <citations>
173 <citation type="doi">10.1101/gr.149310.112</citation>
174 </citations>
175 </tool>