changeset 0:f5ad1114a2ee draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tools/GraphClust/AggregateAlignments commit 5bbebbb071bdd0c3de481ddaf5f78cbb95d93d76
author bgruening
date Sat, 27 Oct 2018 12:56:16 -0400
parents
children
files graphclust_aggregate.xml test-data/1.alignment.cons.tsv test-data/2.alignment.cons.tsv test-data/3.alignment.cons.tsv test-data/clusters1.bed test-data/metrics1.tsv
diffstat 6 files changed, 73 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graphclust_aggregate.xml	Sat Oct 27 12:56:16 2018 -0400
@@ -0,0 +1,60 @@
+<tool id="graphclust_aggregate_alignments" name="Aggregate and filter alignment metrics" version="0.1">
+ <description>of individual clusters, like the output of graphclust_align_cluster</description> 
+    <requirements>
+        <requirement type="package" version="0.6.0">graphclust-wrappers</requirement>
+        <requirement type="package" version="0.23.0">pandas</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+    <![CDATA[
+        mkdir ./clusters_metrics &&
+        #import re    
+        #for $tsv in $alignment_cons_tsv:
+            #set $safename_tsv = re.sub('[^\w\-_\.]', '_', $tsv.element_identifier)
+            ln -f -s  '$tsv' ./clusters_metrics/${safename_tsv}.tsv &&
+        #end for
+        aggregate_align_metrics.py 
+        --RNAz-prob-threshold $RNAz_prob_threshold  
+        --rscape-bp-threshold $rscape_bp_threshold 
+        --min-seq-num $min_seq_num
+        $exclude_spurious_structs
+        $additonal_columns
+        --clusters-tsv-pattern 
+        "clusters_metrics/*.tsv" 
+        --filtered-tsv-out filtered-alignment-metrics.tsv
+        --bed-out bed-cluster-locations.bed
+            
+    ]]></command>
+    <inputs>
+        <param type="data" name="alignment_cons_tsv" format="tabular" multiple="True" label="conservation-metrics-tsv"
+            help="Tabular tsv file of the computed conservation metrics from align cluster step"/>
+        <param argument="min_seq_num" type="integer" value="3"   label="Minimum cluster size" 
+            help="Clusters with fewer seqeunces are excluded"/>
+        <param argument="RNAz_prob_threshold" type="float" value="0.5" label="RNAz bed threshold" 
+            help="Minimum RNAz SVM RNA-class probablity for bed annotation as RNAz hit"/>
+        <param argument="rscape_bp_threshold" type="integer" value="2" label="Rscape bed threshold"
+            help="Minimum number of significant covarying basepairs by Rscape for bed annotation as Rscape hit"/>
+        <param name="exclude_spurious_structs" type="boolean" checked="True" truevalue="--exclude-spurious-structs" falsevalue="" 
+            help="Exclude spurious alignment structs with poor SCI less than 0.01"/>
+        <param name="additonal_columns" type="boolean" checked="False" truevalue="--all-columns" falsevalue="" help="Output additional prediction metrics"/>
+    </inputs>
+    <outputs>
+        <data name="filtered_tsv" format="tabular" from_work_dir="filtered-alignment-metrics.tsv" label="filtered-alignment-metrics.tsv"  />
+        <data name="bed_clusters" format="tabular" from_work_dir="bed-cluster-locations.bed" label="bed-cluster-locations.bed"  />
+    </outputs>
+    <tests>
+        <test>
+            <param name="alignment_cons_tsv" value="1.alignment.cons.tsv,2.alignment.cons.tsv,3.alignment.cons.tsv"/>            
+            <output name="filtered_tsv" file="metrics1.tsv"  />
+            <output name="bed_clusters" file="clusters1.bed"  />
+        </test>
+    </tests>
+    <help><![CDATA[
+Aggregate and filter alignment metrics of individual clusters, like the output
+of graphclust_align_cluster
+    ]]></help>
+
+  <citations>
+    <citation type="doi">10.5281/zenodo.597695</citation>
+  </citations>
+
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/1.alignment.cons.tsv	Sat Oct 27 12:56:16 2018 -0400
@@ -0,0 +1,2 @@
+	Background model	Columns	Combinations/Pair	Consensus MFE	Covariance contribution	Decision model	Energy contribution	G+C content	Mean pairwise identity	Mean single sequence MFE	Mean z-score	Prediction	Reading direction	SVM RNA-class probability	SVM decision value	Sequences	Shannon entropy	Structure conservation index	alignment	multihit per_species	num_human_seqs_all	num_seqs	num_seqs_all	rscape_#target_E-val	rscape_Found	rscape_MSA	rscape_PPV	rscape_SEN	rscape_TP	rscape_True	rscape_alen	rscape_avgid	rscape_method	rscape_nseq	rscape_out	cluster_bed	cluster_human_ids	cluster_human_loc
+0	dinucleotide	54	3.33	-6.39	-4.0	structural RNA alignment quality	-2.39	0.33862	43.52	-5.78	-0.01	RNA	forward	0.999911	3.51	12.0	1.24215	1.1	cluster.aln	0	0	12	12	0.05	9	result.aln_1	88.89	88.89	8	9	51	45.46	GTp	12	RESULTS/result.aln.sum			
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2.alignment.cons.tsv	Sat Oct 27 12:56:16 2018 -0400
@@ -0,0 +1,2 @@
+	Background model	Columns	Combinations/Pair	Consensus MFE	Covariance contribution	Decision model	Energy contribution	G+C content	Mean pairwise identity	Mean single sequence MFE	Mean z-score	Prediction	Reading direction	SVM RNA-class probability	SVM decision value	Sequences	Shannon entropy	Structure conservation index	alignment	multihit per_species	num_human_seqs_all	num_seqs	num_seqs_all	# seqId	basePairCount	beginPos	bgCykProb	bgProb	cykScore	endPos	fold	posScore	score	strCykProb	strPostProb	strProb	rscape_#target_E-val	rscape_Found	rscape_MSA	rscape_PPV	rscape_SEN	rscape_TP	rscape_True	rscape_alen	rscape_avgid	rscape_method	rscape_nseq	rscape_out	cluster_bed	cluster_human_ids	cluster_human_loc
+0	dinucleotide	70	1.0	-14.41	0.0	structural RNA alignment quality	-14.41	0.5	99.1	-14.54	-0.06	OTHER	forward	0.012596	-1.62	19.0	0.0255	0.99	cluster.aln	0	0	19	19														0.05	0	result.aln_1	0.0	0.0	0	20	70	99.1	GTp	19	RESULTS/result.aln.sum			
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3.alignment.cons.tsv	Sat Oct 27 12:56:16 2018 -0400
@@ -0,0 +1,2 @@
+	Background model	Columns	Combinations/Pair	Consensus MFE	Covariance contribution	Decision model	Energy contribution	G+C content	Mean pairwise identity	Mean single sequence MFE	Mean z-score	Prediction	Reading direction	SVM RNA-class probability	SVM decision value	Sequences	Shannon entropy	Structure conservation index	alignment	multihit per_species	num_human_seqs_all	num_seqs	num_seqs_all	# seqId	basePairCount	beginPos	bgCykProb	bgProb	cykScore	endPos	fold	posScore	score	strCykProb	strPostProb	strProb	rscape_#target_E-val	rscape_Found	rscape_MSA	rscape_PPV	rscape_SEN	rscape_TP	rscape_True	rscape_alen	rscape_avgid	rscape_method	rscape_nseq	rscape_out	cluster_bed	cluster_human_ids	cluster_human_loc
+0	dinucleotide	70	1.0	-14.41	0.0	structural RNA alignment quality	-14.41	0.5	99.1	-14.54	-0.06	OTHER	forward	0.012596	-1.62	19.0	0.0255	0.99	cluster.aln	0	1	19	19														0.05	0	result.aln_1	0.0	0.0	0	20	70	99.1	GTp	19	RESULTS/result.aln.sum	"chr7	27164936	27165005	cluster-X	0	-"	SEQ1_526_595_+	1714-1783
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/clusters1.bed	Sat Oct 27 12:56:16 2018 -0400
@@ -0,0 +1,3 @@
+track name= description="GraphClust-2.0 " visibility=3 itemRgb=On
+browser position chr7	27164936	27165005
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metrics1.tsv	Sat Oct 27 12:56:16 2018 -0400
@@ -0,0 +1,4 @@
+cluster	num_seqs_all	Mean pairwise identity	Structure conservation index	SVM RNA-class probability	rscape_TP	score	cluster_human_loc	cluster_bed
+1	12	43.52	1.1	0.999911	8	nan	nan	nan
+2	19	99.1	0.99	0.012596	0	nan	nan	nan
+3	19	99.1	0.99	0.012596	0	nan	1714-1783	"chr7	27164936	27165005	cluster-X	0	-"