Mercurial > repos > bgruening > graphclust_aggregate_alignments
changeset 0:f5ad1114a2ee draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tools/GraphClust/AggregateAlignments commit 5bbebbb071bdd0c3de481ddaf5f78cbb95d93d76
author | bgruening |
---|---|
date | Sat, 27 Oct 2018 12:56:16 -0400 |
parents | |
children | |
files | graphclust_aggregate.xml test-data/1.alignment.cons.tsv test-data/2.alignment.cons.tsv test-data/3.alignment.cons.tsv test-data/clusters1.bed test-data/metrics1.tsv |
diffstat | 6 files changed, 73 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/graphclust_aggregate.xml Sat Oct 27 12:56:16 2018 -0400 @@ -0,0 +1,60 @@ +<tool id="graphclust_aggregate_alignments" name="Aggregate and filter alignment metrics" version="0.1"> + <description>of individual clusters, like the output of graphclust_align_cluster</description> + <requirements> + <requirement type="package" version="0.6.0">graphclust-wrappers</requirement> + <requirement type="package" version="0.23.0">pandas</requirement> + </requirements> + <command detect_errors="exit_code"> + <![CDATA[ + mkdir ./clusters_metrics && + #import re + #for $tsv in $alignment_cons_tsv: + #set $safename_tsv = re.sub('[^\w\-_\.]', '_', $tsv.element_identifier) + ln -f -s '$tsv' ./clusters_metrics/${safename_tsv}.tsv && + #end for + aggregate_align_metrics.py + --RNAz-prob-threshold $RNAz_prob_threshold + --rscape-bp-threshold $rscape_bp_threshold + --min-seq-num $min_seq_num + $exclude_spurious_structs + $additonal_columns + --clusters-tsv-pattern + "clusters_metrics/*.tsv" + --filtered-tsv-out filtered-alignment-metrics.tsv + --bed-out bed-cluster-locations.bed + + ]]></command> + <inputs> + <param type="data" name="alignment_cons_tsv" format="tabular" multiple="True" label="conservation-metrics-tsv" + help="Tabular tsv file of the computed conservation metrics from align cluster step"/> + <param argument="min_seq_num" type="integer" value="3" label="Minimum cluster size" + help="Clusters with fewer seqeunces are excluded"/> + <param argument="RNAz_prob_threshold" type="float" value="0.5" label="RNAz bed threshold" + help="Minimum RNAz SVM RNA-class probablity for bed annotation as RNAz hit"/> + <param argument="rscape_bp_threshold" type="integer" value="2" label="Rscape bed threshold" + help="Minimum number of significant covarying basepairs by Rscape for bed annotation as Rscape hit"/> + <param name="exclude_spurious_structs" type="boolean" checked="True" truevalue="--exclude-spurious-structs" falsevalue="" + help="Exclude spurious alignment structs with poor SCI less than 0.01"/> + <param name="additonal_columns" type="boolean" checked="False" truevalue="--all-columns" falsevalue="" help="Output additional prediction metrics"/> + </inputs> + <outputs> + <data name="filtered_tsv" format="tabular" from_work_dir="filtered-alignment-metrics.tsv" label="filtered-alignment-metrics.tsv" /> + <data name="bed_clusters" format="tabular" from_work_dir="bed-cluster-locations.bed" label="bed-cluster-locations.bed" /> + </outputs> + <tests> + <test> + <param name="alignment_cons_tsv" value="1.alignment.cons.tsv,2.alignment.cons.tsv,3.alignment.cons.tsv"/> + <output name="filtered_tsv" file="metrics1.tsv" /> + <output name="bed_clusters" file="clusters1.bed" /> + </test> + </tests> + <help><![CDATA[ +Aggregate and filter alignment metrics of individual clusters, like the output +of graphclust_align_cluster + ]]></help> + + <citations> + <citation type="doi">10.5281/zenodo.597695</citation> + </citations> + +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.alignment.cons.tsv Sat Oct 27 12:56:16 2018 -0400 @@ -0,0 +1,2 @@ + Background model Columns Combinations/Pair Consensus MFE Covariance contribution Decision model Energy contribution G+C content Mean pairwise identity Mean single sequence MFE Mean z-score Prediction Reading direction SVM RNA-class probability SVM decision value Sequences Shannon entropy Structure conservation index alignment multihit per_species num_human_seqs_all num_seqs num_seqs_all rscape_#target_E-val rscape_Found rscape_MSA rscape_PPV rscape_SEN rscape_TP rscape_True rscape_alen rscape_avgid rscape_method rscape_nseq rscape_out cluster_bed cluster_human_ids cluster_human_loc +0 dinucleotide 54 3.33 -6.39 -4.0 structural RNA alignment quality -2.39 0.33862 43.52 -5.78 -0.01 RNA forward 0.999911 3.51 12.0 1.24215 1.1 cluster.aln 0 0 12 12 0.05 9 result.aln_1 88.89 88.89 8 9 51 45.46 GTp 12 RESULTS/result.aln.sum
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2.alignment.cons.tsv Sat Oct 27 12:56:16 2018 -0400 @@ -0,0 +1,2 @@ + Background model Columns Combinations/Pair Consensus MFE Covariance contribution Decision model Energy contribution G+C content Mean pairwise identity Mean single sequence MFE Mean z-score Prediction Reading direction SVM RNA-class probability SVM decision value Sequences Shannon entropy Structure conservation index alignment multihit per_species num_human_seqs_all num_seqs num_seqs_all # seqId basePairCount beginPos bgCykProb bgProb cykScore endPos fold posScore score strCykProb strPostProb strProb rscape_#target_E-val rscape_Found rscape_MSA rscape_PPV rscape_SEN rscape_TP rscape_True rscape_alen rscape_avgid rscape_method rscape_nseq rscape_out cluster_bed cluster_human_ids cluster_human_loc +0 dinucleotide 70 1.0 -14.41 0.0 structural RNA alignment quality -14.41 0.5 99.1 -14.54 -0.06 OTHER forward 0.012596 -1.62 19.0 0.0255 0.99 cluster.aln 0 0 19 19 0.05 0 result.aln_1 0.0 0.0 0 20 70 99.1 GTp 19 RESULTS/result.aln.sum
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3.alignment.cons.tsv Sat Oct 27 12:56:16 2018 -0400 @@ -0,0 +1,2 @@ + Background model Columns Combinations/Pair Consensus MFE Covariance contribution Decision model Energy contribution G+C content Mean pairwise identity Mean single sequence MFE Mean z-score Prediction Reading direction SVM RNA-class probability SVM decision value Sequences Shannon entropy Structure conservation index alignment multihit per_species num_human_seqs_all num_seqs num_seqs_all # seqId basePairCount beginPos bgCykProb bgProb cykScore endPos fold posScore score strCykProb strPostProb strProb rscape_#target_E-val rscape_Found rscape_MSA rscape_PPV rscape_SEN rscape_TP rscape_True rscape_alen rscape_avgid rscape_method rscape_nseq rscape_out cluster_bed cluster_human_ids cluster_human_loc +0 dinucleotide 70 1.0 -14.41 0.0 structural RNA alignment quality -14.41 0.5 99.1 -14.54 -0.06 OTHER forward 0.012596 -1.62 19.0 0.0255 0.99 cluster.aln 0 1 19 19 0.05 0 result.aln_1 0.0 0.0 0 20 70 99.1 GTp 19 RESULTS/result.aln.sum "chr7 27164936 27165005 cluster-X 0 -" SEQ1_526_595_+ 1714-1783
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/clusters1.bed Sat Oct 27 12:56:16 2018 -0400 @@ -0,0 +1,3 @@ +track name= description="GraphClust-2.0 " visibility=3 itemRgb=On +browser position chr7 27164936 27165005 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/metrics1.tsv Sat Oct 27 12:56:16 2018 -0400 @@ -0,0 +1,4 @@ +cluster num_seqs_all Mean pairwise identity Structure conservation index SVM RNA-class probability rscape_TP score cluster_human_loc cluster_bed +1 12 43.52 1.1 0.999911 8 nan nan nan +2 19 99.1 0.99 0.012596 0 nan nan nan +3 19 99.1 0.99 0.012596 0 nan 1714-1783 "chr7 27164936 27165005 cluster-X 0 -"