Mercurial > repos > bgruening > nastiseq
comparison nastiseq.xml @ 0:a68b3c34f2b5 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
author | bgruening |
---|---|
date | Tue, 21 Feb 2017 11:11:02 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a68b3c34f2b5 |
---|---|
1 <tool id="nastiseq" name="NASTIseq" version="1.0"> | |
2 <description>Identify cis-NATs using ssRNA-seq</description> | |
3 | |
4 <requirements> | |
5 <requirement type="package" version="1.0">r-nastiseq</requirement> | |
6 </requirements> | |
7 <stdio> | |
8 <regex match="Execution halted" | |
9 source="both" | |
10 level="fatal" | |
11 description="Execution halted." /> | |
12 <regex match="Error in" | |
13 source="both" | |
14 level="fatal" | |
15 description="An undefined error occured, please check your intput carefully and contact your administrator." /> | |
16 </stdio> | |
17 <command> | |
18 <![CDATA[ | |
19 Rscript '$script_file' | |
20 ]]> | |
21 </command> | |
22 <configfiles> | |
23 <configfile name="script_file"> | |
24 library(NASTIseq) | |
25 | |
26 genepos = read.delim("${annotation}", header=FALSE, comment.char="#") | |
27 colnames(genepos) = c("seqname", "source", "feature", "start", "end", "score", "strand", "frame", "attributes") | |
28 genepos = subset(genepos, feature=="gene") | |
29 | |
30 get_id = function(attri){ | |
31 gene_info = strsplit(attri, ";")[[1]][1] | |
32 gene_id = strsplit(gene_info, " ")[[1]][2] | |
33 gene_id = gsub('\"', '', gene_id) | |
34 return(gene_id) | |
35 } | |
36 | |
37 genepos\$attributes = as.character(lapply(as.character(genepos\$attributes), get_id)) | |
38 | |
39 pospairs = read.table("${positive_pair}", sep = "\t", as.is = TRUE) | |
40 | |
41 smat = as.matrix(read.table("${count_smt}", sep = "\t", row.names = 1)) | |
42 | |
43 asmat = as.matrix(read.table("${count_asmt}", sep = "\t", row.names = 1)) | |
44 | |
45 WRscore = getNASTIscore(smat, asmat) | |
46 | |
47 negpairs = getnegativepairs(genepos) | |
48 | |
49 WRpred = NASTIpredict(smat,asmat, pospairs, negpairs) | |
50 | |
51 WRpred_rocr = prediction(WRpred\$predictions,WRpred\$labels) | |
52 | |
53 thr = defineFDR(WRpred_rocr,0.05) | |
54 | |
55 WR_names = FindNATs(WRscore, thr, pospairs, genepos) | |
56 | |
57 write.table(WR_names\$newpairs, file = "output_newpairs.tsv", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE) | |
58 | |
59 write.table(WR_names\$neworphan, file = "output_neworphan.tsv", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE) | |
60 | |
61 </configfile> | |
62 </configfiles> | |
63 <inputs> | |
64 <param name="annotation" type="data" format="gtf" label="Annotation file" | |
65 help="The gene ids should be in agreement in the files of annotation, known pairs and read count."> | |
66 </param> | |
67 <param name="positive_pair" type="data" format="tabular" label="Known pairs" | |
68 help="A known pair of cis-natural antisense transcripts"> | |
69 </param> | |
70 <param name="count_smt" type="data" format="tabular" label="Read count of sense strand" | |
71 help=""> | |
72 </param> | |
73 <param name="count_asmt" type="data" format="tabular" label="Read count of antisense strand" | |
74 help=""> | |
75 </param> | |
76 </inputs> | |
77 <outputs> | |
78 <data name="newpairs" format="tabular" | |
79 from_work_dir="output_newpairs.tsv" | |
80 label="${tool.name} on ${on_string}: New pairs"> | |
81 </data> | |
82 <data name="neworphan" format="tabular" | |
83 from_work_dir="output_neworphan.tsv" | |
84 label="${tool.name} on ${on_string}: New orphans"> | |
85 </data> | |
86 </outputs> | |
87 <tests> | |
88 <test> | |
89 <param name="annotation" value="input_TAIR10_annotation.gtf" ftype="gtf" /> | |
90 <param name="positive_pair" value="input_positive_pair.tsv" ftype="tabular" /> | |
91 <param name="count_smt" value="input_read_count_smt.tsv" ftype="tabular" /> | |
92 <param name="count_asmt" value="input_read_count_asmt.tsv" ftype="tabular" /> | |
93 <output name="newpairs" file="output_newpairs.tsv" ftype="tabular"/> | |
94 <output name="neworphan" file="output_neworphan.tsv" ftype="tabular"/> | |
95 </test> | |
96 </tests> | |
97 <help> | |
98 <![CDATA[ | |
99 .. class:: infomark | |
100 | |
101 **What it does** | |
102 | |
103 Pairs of RNA molecules transcribed from partially or entirely complementary loci | |
104 are called cis-natural antisense transcripts (cis-NATs), | |
105 and they play key roles in the regulation of gene expression in many organisms. | |
106 A promising experimental tool for profiling sense and antisense transcription | |
107 is strand-specific RNA sequencing (ssRNA-seq). `NASTIseq`_ is to identify | |
108 cis-NATs using ssRNA-seq. `NASTIseq`_ is based on model comparison that incorporates | |
109 the inherent variable efficiency of generating perfectly strand-specific libraries. | |
110 Applying the method to the ssRNA-seq data from whole root and | |
111 cell-type specific Arabidopsis libraries confirmed most of | |
112 the known cis-NAT pairs and identified hundreds of additional cis-NAT pairs. | |
113 | |
114 .. _NASTIseq: https://ohlerlab.mdc-berlin.de/software/NASTIseq_104/ | |
115 | |
116 .. class:: infomark | |
117 | |
118 **Inputs** | |
119 | |
120 ``Annotation file``: the annotation in `gtf`_ format | |
121 | |
122 .. _gtf: http://www.ensembl.org/info/website/upload/gff.html | |
123 | |
124 ``Known pairs``: a table of two column matrix, with each row contains the | |
125 names of a known pair of cis-natural antisense transcripts. Example as following:: | |
126 | |
127 AT2G46910 AT2G46915 | |
128 AT3G12250 AT3G12260 | |
129 AT5G50315 AT5G50320 | |
130 | |
131 ``Read count of sense strand``: a table of N by M matrix of read count for reads that mapped | |
132 to the sense strand. N is the number of gene loci. M is the | |
133 number of biological replicates in the sample. Each | |
134 rowname must be a unique locus name. Example as following:: | |
135 | |
136 AT1G38440 0 2 0 | |
137 AT1G43171 2 8 1 | |
138 AT1G67670 3 7 0 | |
139 | |
140 ``Read count of antisense strand``: a table of N by M matrix of read count for reads that mapped | |
141 to the antisense strand. N is the number of gene loci. M is the | |
142 number of biological replicates in the sample. Each | |
143 rowname must be a unique locus name. Example as following:: | |
144 | |
145 AT1G38440 0 0 0 | |
146 AT1G43171 0 0 0 | |
147 AT1G67670 0 0 2 | |
148 | |
149 Read counts can be obtained using popular software such as `RSamtools`_. | |
150 | |
151 .. _RSamtools: http://bioconductor.org/packages/release/bioc/html/Rsamtools.html | |
152 | |
153 .. class:: infomark | |
154 | |
155 **Outputs** | |
156 | |
157 ``New pairs``: a table of two column matrix, with each row contains the | |
158 names of a new pair of cis-natural antisense transcripts. Example as following:: | |
159 | |
160 AT1G76630 AT1G76640 | |
161 AT2G06045 AT2G06050 | |
162 AT4G30100 AT4G30110 | |
163 | |
164 | |
165 ``New orphans``: a list of new orphan transcripts. Example as following:: | |
166 | |
167 ATMG00030 | |
168 AT5G49440 | |
169 AT2G11240 | |
170 ]]> | |
171 </help> | |
172 <citations> | |
173 <citation type="doi">10.1101/gr.149310.112</citation> | |
174 </citations> | |
175 </tool> |