changeset 4:629a377741af draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pyega3 commit a7400d366495a3e2bd6e0cb120834a59327537ec
author iuc
date Wed, 07 Dec 2022 15:26:09 +0000
parents 6070512177f8
children ce69320f0845
files pyega3.xml test-data/filelist3.tabular test-data/filelist_EGAD00001003338.tabular
diffstat 3 files changed, 38 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/pyega3.xml	Thu Oct 27 15:11:53 2022 +0000
+++ b/pyega3.xml	Wed Dec 07 15:26:09 2022 +0000
@@ -1,7 +1,7 @@
 <tool id="pyega3" name="EGA Download Client" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.01" >
     <macros>
         <token name="@TOOL_VERSION@">4.0.5</token>
-        <token name="@VERSION_SUFFIX@">1</token>
+        <token name="@VERSION_SUFFIX@">2</token>
     </macros>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">pyega3</requirement>
@@ -49,15 +49,14 @@
 #elif $action.action_type == "download_files"
     #import re
     #set file_ids=[x.split('\t')[int(str($action.id_column))-1] for x in open(str($id_table)).readlines()]
-    #set outfiles=[x.split('\t')[int(str($action.file_column))-1].replace('\n', '') for x in open(str($id_table)).readlines()]
     mkdir downloads
-    #for f, o in zip($file_ids, $outfiles)
+    #for f in $file_ids
       #if not f.startswith("EGAF")
         && >&2 echo "Ignoring \"$f\": no EGA file ID"
         #continue
       #end if
       && 
-      echo 'Downloading $f: $o'
+      echo 'Downloading $f'
       &&
       pyega3 -c \${PYEGA_CONNECTIONS:-30} -cf '$credentials'
         fetch '$f'
@@ -72,17 +71,8 @@
           #end if
         #end if
         --output-dir downloads
-      #if re.match(".*vcf(_genomic_range_.*|).gz$", o)
-        #if $action.range.reference_name or ($action.range.start or $action.range.end)
-          && mv 'downloads/$f/'$o[:-3]'_genomic_range_'\$([ -z "$action.range.reference_name" ] && printf "None" || printf "$action.range.reference_name")'_'\$([ -z "$action.range.start" ] && printf "None" || printf "$action.range.reference_name")'_'\$([ -z "$action.range.reference_name" ] && printf "None" || printf "$action.range.end")'.gz' 'downloads/$f/'$o'.vcf_bgzip'
-        #else
-          && mv 'downloads/$f/$o' 'downloads/$f/'$o'.vcf_bgzip'
-        #end if
-      #end if
-      #if re.match(".*ped$", o)
-        && mv 'downloads/$f/$o' 'downloads/$f/'$o'.tabular'
-      #end if
     #end for
+    && for vcf in \$(ls downloads/**/*vcf.gz); do mv "\${vcf}" "\${vcf:0:-6}vcf_bgzip"; done  ## renaming vcf.gz files to vcf_bgzip to recognize format
     && rm -f downloads/**/*.md5  ## checksum validation already performed by pyEGA, clean up downloads folder
 
 #end if
@@ -128,7 +118,6 @@
             <when value="download_files">
                 <param name="id_table" type="data" format="tabular" label="Table with IDs to download" help="A tabular file where one column contains the set of file IDs. This will output a collection. Please select files that are all the same format (e.g. all BAM or all VCF)."/>
                 <param name="id_column" type="data_column" data_ref="id_table" label="Column containing the file IDs" help="File Identifiers starting with 'EGAF'. For example: EGAF00001753735" />
-                <param name="file_column" type="data_column" data_ref="id_table" label="Column containing the file names" />
                 <section name="range" title="Request a specific Genomic range? (will be applied to ALL requested files)" expanded="false">
                     <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." />
                     <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/>
@@ -153,7 +142,7 @@
         </data>
         <collection name="downloaded_file_collection" type="list" label="${tool.name} on ${on_string}: Downloaded datasets">
             <filter> action['action_type'] == 'download_files' </filter>
-            <discover_datasets pattern="__designation_and_ext__" recurse="true" directory="downloads" />
+            <discover_datasets pattern="__name_and_ext__" recurse="true" directory="downloads" />
         </collection>
     </outputs>
     <tests>
@@ -187,7 +176,13 @@
             <param name="file_id" value="EGAF00001775036"/>
             <output name="downloaded_file" md5="3b89b96387db5199fef6ba613f70e27c"/>
         </test>
-        <!-- download a single file, with genomic range specified -->
+        <!-- download a single vcf.gz -->
+        <test expect_num_outputs="1"> 
+            <param name="action_type" value="download_file"/>
+            <param name="file_id" value="EGAF00007243775"/>
+            <output name="downloaded_file" md5="51cfb69bf3b9416ff425381a58c18a2b" ftype="vcf_bgzip" />
+        </test>
+        <!-- download a single bam, with genomic range specified -->
         <test expect_num_outputs="1">
             <param name="action_type" value="download_file"/>
             <param name="file_id" value="EGAF00001753756"/>
@@ -201,7 +196,6 @@
             <param name="action_type" value="download_files"/>
             <param name="id_table" value="filelist.tabular"/>
             <param name="id_column" value="1"/>
-            <param name="file_column" value="5"/>
             <output_collection name="downloaded_file_collection" type="list" count="2">
                 <element name="ENCFF000VWO.bam" md5="b8ae14d5d1f717ab17d45e8fc36946a0" />
                 <element name="ENCFF284YOU.bam" md5="3b89b96387db5199fef6ba613f70e27c" />
@@ -215,7 +209,6 @@
             <param name="action_type" value="download_files"/>
             <param name="id_table" value="filelist2.tabular"/>
             <param name="id_column" value="1"/>
-            <param name="file_column" value="5"/>
             <param name="reference_name" value="1"/>
             <param name="start" value="0"/>
             <param name="end" value="10000"/>
@@ -232,10 +225,9 @@
             <param name="action_type" value="download_files"/>
             <param name="id_table" value="filelist3.tabular"/>
             <param name="id_column" value="1"/>
-            <param name="file_column" value="5"/>
             <output_collection name="downloaded_file_collection" type="list" count="2">
-                <element name="HG00408.novoBreak__256r__4.100100-10100100__7.200100-9000100.vcf.gz" md5="51cfb69bf3b9416ff425381a58c18a2b" />
-                <element name="HG01890.HGSVC__145r__1.900100-10001000__18.2001000-90001000.vcf.gz" md5="ebad4425191a89d3e970c02190a87175" />
+                <element name="HG00408.novoBreak__256r__4.100100-10100100__7.200100-9000100" md5="51cfb69bf3b9416ff425381a58c18a2b" ftype="vcf_bgzip" />
+                <element name="HG01890.HGSVC__145r__1.900100-10001000__18.2001000-90001000" md5="ebad4425191a89d3e970c02190a87175" ftype="vcf_bgzip" />
             </output_collection>
         </test>
     </tests>
--- a/test-data/filelist3.tabular	Thu Oct 27 15:11:53 2022 +0000
+++ b/test-data/filelist3.tabular	Wed Dec 07 15:26:09 2022 +0000
@@ -1,3 +1,3 @@
 File ID	Status	Bytes	Check sum	File name
-EGAF00007243779	1	15340	ebad4425191a89d3e970c02190a87175	HG01890.HGSVC__145r__1.900100-10001000__18.2001000-90001000.vcf.gz
-EGAF00007243775	1	23033	51cfb69bf3b9416ff425381a58c18a2b	HG00408.novoBreak__256r__4.100100-10100100__7.200100-9000100.vcf.gz
\ No newline at end of file
+EGAF00007243779	1	15340	ebad4425191a89d3e970c02190a87175	HG01890.HGSVC__145r__1.900100-10001000__18.2001000-900010.vcf.gz
+EGAF00007243775	1	23033	51cfb69bf3b9416ff425381a58c18a2b	HG00408.novoBreak__256r__4.100100-10100100__7.200100-90001.vcf.gz
\ No newline at end of file
--- a/test-data/filelist_EGAD00001003338.tabular	Thu Oct 27 15:11:53 2022 +0000
+++ b/test-data/filelist_EGAD00001003338.tabular	Wed Dec 07 15:26:09 2022 +0000
@@ -59,3 +59,25 @@
 EGAF00001770107	1	3551031027	dfef3f355230915418a78da460665d56	ENCFF284YOU.bam
 EGAF00001775034	1	5991400	b8ae14d5d1f717ab17d45e8fc36946a0	ENCFF000VWO.bam.bai
 EGAF00001775036	1	4804928	3b89b96387db5199fef6ba613f70e27c	ENCFF284YOU.bam.bai
+EGAF00007462299	1	15965067873	858d928a1a772cd0f1617ef72bae304e	EE-2564.HG03520.alt_bwamem_GRCh38DH.20150718.ESN.low_coverage.bam
+EGAF00007462300	1	7426229740	13a22b3eb197affaf69f5b25c2cad1b1	EE-2564.HG03520.alt_bwamem_GRCh38DH.20150718.ESN.low_coverage.cram
+EGAF00007462301	1	69693619962	cca89d5791ebbac96fdb692a2894949f	EE-2564.HGDP00862.alt_bwamem_GRCh38DH.20181023.Maya.bam
+EGAF00007462302	1	34851312043	9ac1dddf1c2439ae4386658a32321624	EE-2564.HGDP00862.alt_bwamem_GRCh38DH.20181023.Maya.cram
+EGAF00007462303	1	8200549586	6a5df1b7f6acc62b0320c2adeff6c000	EE-2564.NA18636.alt_bwamem_GRCh38DH.20150826.CHB.exome.bam
+EGAF00007462304	1	4243466784	ed2b26633c18288410724224f3bdae41	EE-2564.NA18636.alt_bwamem_GRCh38DH.20150826.CHB.exome.cram
+EGAF00007462305	1	68882266951	5746313dd819ba646b5f29830e0f2d50	EE-2564.SAMEA3302902.alt_bwamem_GRCh38DH.20200922.Karitiana.simons.bam
+EGAF00007462306	1	37011545036	082d881341b21ed6e2b69e98da62a7db	EE-2564.SAMEA3302902.alt_bwamem_GRCh38DH.20200922.Karitiana.simons.cram
+EGAF00007462307	1	299199	ba85892ac1adb70b42e6635eacafe411	EE-2564.NA18636.alt_bwamem_GRCh38DH.20150826.CHB.exome.cram.crai
+EGAF00007462308	1	1694742	8fc9df0e8a150acf0dd3b3df69a1a216	EE-2564.HGDP00862.alt_bwamem_GRCh38DH.20181023.Maya.cram.crai
+EGAF00007462309	1	9515968	12ac8cc4ea178dfcb8736b4ebe45c531	EE-2564.SAMEA3302902.alt_bwamem_GRCh38DH.20200922.Karitiana.simons.bam.bai
+EGAF00007462310	1	3706072	2cef86a5773cff9e337ad3ed57d545b7	EE-2564.NA18636.alt_bwamem_GRCh38DH.20150826.CHB.exome.bam.bai
+EGAF00007462311	1	500587	e35f25f65c961aab2ab1cdc1e45ad824	EE-2564.HG03520.alt_bwamem_GRCh38DH.20150718.ESN.low_coverage.cram.crai
+EGAF00007462312	1	9063136	e5c781c6d58dfd9a721ec4751d7acde2	EE-2564.HG03520.alt_bwamem_GRCh38DH.20150718.ESN.low_coverage.bam.bai
+EGAF00007462313	1	2181685	ca047cdbacf5cd29720ca94098ac09fe	EE-2564.SAMEA3302902.alt_bwamem_GRCh38DH.20200922.Karitiana.simons.cram.crai
+EGAF00007462314	1	9548376	b2a36fc7043e807ad111c567120611bf	EE-2564.HGDP00862.alt_bwamem_GRCh38DH.20181023.Maya.bam.bai
+EGAF00007553556	1	14585114453	1377526fc26a58294635ba48edc35535	EE-2564.ALL_2504_Samples.wgs.phase3.v5.20130502.genotypes.bcf
+EGAF00007553557	1	16811693421	6b9e2acfc328ce13e6ce9ac745ac6561	EE-2564.ALL_2504_Samples.wgs.phase3.v5.20130502.genotypes.vcf.gz
+EGAF00007553558	1	2053513514	a6d323bb194eac03e7112c243aeef5d3	EE-2564.ALL_2504_Samples.wgs.phase3.v5.20130502.genotypes_only_chr1and2.bcf
+EGAF00007553559	1	2426228900	66bb08a031ff66c8c03ee9917be53fcf	EE-2564.ALL_2504_Samples.wgs.phase3.v5.20130502.genotypes_only_chr1and2.vcf.gz
+EGAF00007553560	1	2691732	26d41f1a09671c834fd7b4eaac3f1a67	EE-2564.ALL_2504_Samples.wgs.phase3.v5.20130502.genotypes.vcf.gz.tbi
+EGAF00007553561	1	434009	357abb1556367d5c096cd54801a5e783	EE-2564.ALL_2504_Samples.wgs.phase3.v5.20130502.genotypes_only_chr1and2.vcf.gz.tbi