changeset 0:90d4dfb2d266 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cojac commit 38ed91999d4bbe3bedc294197926ea332eb6cd5e
author iuc
date Thu, 11 Aug 2022 13:48:28 +0000
parents
children df723877bcd8
files cooc_mutbamscan.xml macros.xml test-data/amplicons111.yaml test-data/cooc-test111.json test-data/nCoV-2019.insert.V3.bed test-data/omicron_ba1_mutations.yaml test-data/tbam11.bam
diffstat 7 files changed, 349 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cooc_mutbamscan.xml	Thu Aug 11 13:48:28 2022 +0000
@@ -0,0 +1,155 @@
+<tool id="cooc_mutbamscan" name="Cojac: mutbamscan" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"
+      profile="@PROFILE@">
+    <description>
+        scan an alignment file for mutation co-occurrences
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="biotools"/>
+    <expand macro="requirements"/>
+    <expand macro="version"/>
+    <command detect_errors="exit_code"><![CDATA[
+ln -s '$bed_file' 'bed_file' &&
+@VOCDIR_COMMAND@
+#import re
+#set identifier = re.sub('[^\s\w\-\\.]', '_', str($bam_file.element_identifier))
+ln -s '$bam_file' ${identifier}.bam &&
+ln -s '${bam_file.metadata.bam_index}' ${identifier}.bai &&
+cooc-mutbamscan
+    -a '${identifier}.bam'
+    -b 'bed_file'
+    -m '$vocdir'
+    -y cooc.yaml
+    -j cooc.json
+    #if $amplicons_file.choice == 'build'
+        -A amplicons.yaml
+    #else
+        -Q '$amplicons_file.in_amp'
+    #end if
+    -t cooc.tsv
+    --cooc $cooc
+    ]]></command>
+    <inputs>
+        <expand macro="vocdir_input"/>
+        <param name="bed_file" type="data" format="bed"
+               label="BED file defining the amplicons"/>
+        <param name="bam_file" type="data" format="bam,cram,sam"
+               label="Alignment BAM/CRAM/SAM file"/>
+        <param argument="--cooc" type="integer" min="1" value="2"
+               label="Minimum number of cooccurence mutations on the same amplicon"/>
+        <conditional name="amplicons_file">
+            <param name="choice" type="select" label="Source of amplicons YAML file">
+                <option value="build">Build from BED + set of YAMLs for variants of concern</option>
+                <option value="custom">From history</option>
+            </param>
+            <when value="build"/>
+            <when value="custom">
+                <param name="in_amp" type="data" format="yaml"
+                       label="YAML file to query amplicons"/>
+            </when>
+        </conditional>
+        <param name="output_files" type="select" display="checkboxes"
+               multiple="true" label="Output files">
+            <option value="yaml" selected="true">YAML</option>
+            <option value="json">JSON</option>
+            <option value="tabular">tabular</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="cooc_yaml" format="yaml"
+              label="${tool.name} on ${on_string}: Mutation cooccurrence (yaml)"
+              from_work_dir="cooc.yaml">
+            <filter>'yaml' in output_files</filter>
+        </data>
+        <data name="cooc_json" format="json"
+              label="${tool.name} on ${on_string}: Mutation cooccurrence (json)"
+              from_work_dir="cooc.json">
+            <filter>'json' in output_files</filter>
+        </data>
+        <data name="cooc_tsv" format="tabular"
+              label="${tool.name} on ${on_string}: Mutation cooccurrence (tabular)"
+              from_work_dir="cooc.tsv">
+            <filter>'tabular' in output_files</filter>
+        </data>
+        <data name="amplicons" format="yaml"
+              label="${tool.name} on ${on_string}: Amplicons (yaml)"
+              from_work_dir="amplicons.yaml">
+            <filter>amplicons_file['choice'] == 'build'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Test 1: build yaml for amplicons from bed and voc/ -->
+        <test expect_num_outputs="4">
+            <conditional name="vocdir_option">
+                <param name="choice" value="custom"/>
+                <param name="voc_file" value="omicron_ba1_mutations.yaml"/>
+            </conditional>
+            <param name="bam_file" value="tbam11.bam"/>
+            <param name="bed_file" value="nCoV-2019.insert.V3.bed"/>
+            <conditional name="amplicons_file">
+                <param name="choice" value="build"/>
+            </conditional>
+            <param name="output_files" value="yaml,json,tabular"/>
+            <output name="cooc_yaml" ftype="yaml">
+                <assert_contents>
+                    <has_text text="76_om1"/>
+                    <has_text text="81_om1"/>
+                </assert_contents>
+            </output>
+            <output name="cooc_json" ftype="json">
+                <assert_contents>
+                    <has_text text="76_om1"/>
+                    <has_text text="81_om1"/>
+                </assert_contents>
+            </output>
+            <output name="cooc_tsv" ftype="tabular">
+                <assert_contents>
+                    <has_text text="76_om1"/>
+                    <has_text text="81_om1"/>
+                </assert_contents>
+            </output>
+            <output name="amplicons" ftype="yaml">
+                <assert_contents>
+                    <has_text text="76_om1"/>
+                    <has_text text="81_om1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 2: supply yaml for amplicons and voc/ from cache -->
+        <test expect_num_outputs="2">
+            <conditional name="vocdir_option">
+                <param name="choice" value="cache"/>
+            </conditional>
+            <param name="bam_file" value="tbam11.bam"/>
+            <param name="bed_file" value="nCoV-2019.insert.V3.bed"/>
+            <conditional name="amplicons_file">
+                <param name="choice" value="custom"/>
+                <param name="in_amp" value="amplicons111.yaml"/>
+            </conditional>
+            <param name="output_files" value="yaml,tabular"/>
+            <output name="cooc_yaml" ftype="yaml">
+                <assert_contents>
+                    <has_text text="76_om1"/>
+                    <has_text text="81_om1"/>
+                </assert_contents>
+            </output>
+            <output name="cooc_tsv" ftype="tabular">
+                <assert_contents>
+                    <has_text text="76_om1"/>
+                    <has_text text="81_om1"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+@HELP_HEADER@
+
+Information about **cooc-mutbamscan** method
+============================================
+
+The method scans an alignment BAM/CRAM/SAM file for mutation co-occurrences and output a JSON or YAML file.
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Aug 11 13:48:28 2022 +0000
@@ -0,0 +1,62 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">0.2</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.01</token>
+    <xml name="biotools">
+        <xrefs>
+            <xref type="bio.tools">cojac</xref>
+        </xrefs>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">cojac</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="version">
+        <version_command>echo @TOOL_VERSION@</version_command>
+    </xml>
+    <xml name="vocdir_input">
+        <conditional name="vocdir_option">
+            <param name="choice" type="select"
+                   label="Source of YAML files with definition of the variant of concerns"
+                   help="Cojac ships with a directory with variant definitions yaml files (https://github.com/cbg-ethz/cojac/tree/master/voc), which the tool can access internally. You can also download the latest version of the yaml files from https://github.com/phe-genomics/variant_definitions and use it as a custom yamls defining the variant of concerns.">
+                <option value="cache">Definitions shipped with the tool (can be outdated)</option>
+                <option value="custom">From history</option>
+            </param>
+            <when value="cache"/>
+            <when value="custom">
+                <param name="voc_file" type="data" format="yaml" multiple="true"
+                       label="YAML defining the variant of concern"/>
+            </when>
+        </conditional>
+    </xml>
+    <token name="@VOCDIR_COMMAND@"><![CDATA[
+#if $vocdir_option.choice == 'custom'
+    #set vocdir = 'voc/'
+    #set file_paths1 = []
+    mkdir -p voc &&
+    #for $input_file in $voc_file
+        #set $file_path = $vocdir + $input_file.element_identifier
+        ln -s '$input_file' '$file_path' &&
+        $file_paths1.append($file_path)
+    #end for
+#else
+    DB_PATH="\$(dirname "\$(dirname "\$(which cooc-mutbamscan)")")/share/cojac" &&
+    ln -s "\$DB_PATH" db &&
+    #set $vocdir = 'db/voc'
+#end if
+]]></token>
+    <token name="@HELP_HEADER@"><![CDATA[
+What it does
+============
+
+The cojac package comprises a set of command-line tools to analyse co-occurrence of mutations on amplicons. It is useful, for example, for early detection of viral variants of concern (e.g. Alpha, Delta, Omicron) in environmental samples, and has been designed to scan for multiple SARS-CoV-2 variants in wastewater samples.
+]]></token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1101/2021.01.08.21249379</citation>
+        </citations>
+    </xml>
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/amplicons111.yaml	Thu Aug 11 13:48:28 2022 +0000
@@ -0,0 +1,3 @@
+76_om1: [22821, 23189, 22907, 23114, {22898: A, 23048: A}]
+81_om1: [24416, 24765, 24473, 24691, {24469: A, 24503: T}]
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cooc-test111.json	Thu Aug 11 13:48:28 2022 +0000
@@ -0,0 +1,1 @@
+{"tbam11.bam": {"76_om1": {"sites": {"1": 68, "2": 4}, "muts": {}}, "81_om1": {"sites": {"2": 211}, "muts": {"1": 209}}}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/nCoV-2019.insert.V3.bed	Thu Aug 11 13:48:28 2022 +0000
@@ -0,0 +1,98 @@
+MN908947.3	54	385	1	1	+
+MN908947.3	342	704	2	2	+
+MN908947.3	664	1004	3	1	+
+MN908947.3	965	1312	4	2	+
+MN908947.3	1264	1623	5	1	+
+MN908947.3	1595	1942	6	2	+
+MN908947.3	1897	2242	7	1	+
+MN908947.3	2205	2568	8	2	+
+MN908947.3	2529	2880	9	1	+
+MN908947.3	2850	3183	10	2	+
+MN908947.3	3166	3507	11	1	+
+MN908947.3	3482	3826	12	2	+
+MN908947.3	3795	4142	13	1	+
+MN908947.3	4077	4402	14	2	+
+MN908947.3	4322	4666	15	1	+
+MN908947.3	4658	4995	16	2	+
+MN908947.3	4966	5296	17	1	+
+MN908947.3	5287	5620	18	2	+
+MN908947.3	5586	5932	19	1	+
+MN908947.3	5894	6247	20	2	+
+MN908947.3	6197	6526	21	1	+
+MN908947.3	6495	6846	22	2	+
+MN908947.3	6745	7092	23	1	+
+MN908947.3	7058	7389	24	2	+
+MN908947.3	7332	7671	25	1	+
+MN908947.3	7651	7997	26	2	+
+MN908947.3	7968	8319	27	1	+
+MN908947.3	8275	8635	28	2	+
+MN908947.3	8619	8954	29	1	+
+MN908947.3	8913	9245	30	2	+
+MN908947.3	9226	9557	31	1	+
+MN908947.3	9502	9834	32	2	+
+MN908947.3	9806	10146	33	1	+
+MN908947.3	10099	10437	34	2	+
+MN908947.3	10384	10737	35	1	+
+MN908947.3	10688	11048	36	2	+
+MN908947.3	11022	11372	37	1	+
+MN908947.3	11331	11668	38	2	+
+MN908947.3	11584	11927	39	1	+
+MN908947.3	11889	12234	40	2	+
+MN908947.3	12133	12465	41	1	+
+MN908947.3	12439	12779	42	2	+
+MN908947.3	12732	13074	43	1	+
+MN908947.3	13029	13363	44	2	+
+MN908947.3	13344	13660	45	1	+
+MN908947.3	13625	13961	46	2	+
+MN908947.3	13946	14271	47	1	+
+MN908947.3	14232	14579	48	2	+
+MN908947.3	14570	14898	49	1	+
+MN908947.3	14895	15224	50	2	+
+MN908947.3	15193	15538	51	1	+
+MN908947.3	15503	15861	52	2	+
+MN908947.3	15851	16186	53	1	+
+MN908947.3	16144	16485	54	2	+
+MN908947.3	16444	16804	55	1	+
+MN908947.3	16770	17130	56	2	+
+MN908947.3	17087	17430	57	1	+
+MN908947.3	17406	17738	58	2	+
+MN908947.3	17697	18036	59	1	+
+MN908947.3	17993	18324	60	2	+
+MN908947.3	18275	18650	61	1	+
+MN908947.3	18618	18957	62	2	+
+MN908947.3	18918	19275	63	1	+
+MN908947.3	19232	19591	64	2	+
+MN908947.3	19570	19911	65	1	+
+MN908947.3	19866	20231	66	2	+
+MN908947.3	20200	20542	67	1	+
+MN908947.3	20496	20867	68	2	+
+MN908947.3	20813	21146	69	1	+
+MN908947.3	21104	21427	70	2	+
+MN908947.3	21386	21716	71	1	+
+MN908947.3	21682	22013	72	2	+
+MN908947.3	21990	22324	73	1	+
+MN908947.3	22290	22626	74	2	+
+MN908947.3	22542	22877	75	1	+
+MN908947.3	22821	23189	76	2	+
+MN908947.3	23144	23500	77	1	+
+MN908947.3	23466	23822	78	2	+
+MN908947.3	23812	24145	79	1	+
+MN908947.3	24100	24443	80	2	+
+MN908947.3	24416	24765	81	1	+
+MN908947.3	24721	25052	82	2	+
+MN908947.3	25003	25347	83	1	+
+MN908947.3	25301	25646	84	2	+
+MN908947.3	25623	25969	85	1	+
+MN908947.3	25924	26290	86	2	+
+MN908947.3	26219	26566	87	1	+
+MN908947.3	26542	26890	88	2	+
+MN908947.3	26860	27190	89	1	+
+MN908947.3	27164	27511	90	2	+
+MN908947.3	27471	27825	91	1	+
+MN908947.3	27808	28145	92	2	+
+MN908947.3	28104	28442	93	1	+
+MN908947.3	28416	28756	94	2	+
+MN908947.3	28699	29041	95	1	+
+MN908947.3	29007	29356	96	2	+
+MN908947.3	29316	29665	97	1	+
+MN908947.3	29510	29836	98	2	+
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/omicron_ba1_mutations.yaml	Thu Aug 11 13:48:28 2022 +0000
@@ -0,0 +1,30 @@
+variant:
+  voc: VOC-21NOV-01
+  pheuid: pentagon-refining
+  short: om1
+  pangolin: BA.1
+source:
+- https://github.com/cov-lineages/pango-designation/issues/343
+threshold: 10
+mut:
+  # ORF1ab
+  2832: 'A>G' # nsp3:K38R
+  5386: 'T>G' # syn nsp3
+  8393: 'G>A' # nsp3:A1892T
+  11537: 'A>G' # nsp6:I189V
+  13195: 'T>C' # syn nsp10
+  18163: 'A>G' # nsp14:I42V
+  # S
+  22679: 'T>C' # surface glycoprotein:S373P
+  22898: 'G>A' # surface glycoprotein:G446S
+  23048: 'G>A' # surface glycoprotein:G496S
+  23202: 'C>A' # surface glycoprotein:T547K
+  23599: 'T>G' # surface glycoprotein:N679K
+  24130: 'C>A' # surface glycoprotein:N856K
+  24469: 'T>A' # surface glycoprotein:N969K
+  24503: 'C>T' # surface glycoprotein:L981F
+  # M
+  26530: 'A>G' # membrane glycoprotein:D3G
+  26577: 'C>G' # membrane glycoprotein:Q19E
+  # ORF6
+  27259: 'A>C' # syn ORF6 protein
Binary file test-data/tbam11.bam has changed