Mercurial > repos > iuc > data_manager_plant_tribes_scaffolds_downloader

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Tue Feb 14 13:44:24 2017 -0500
@@ -0,0 +1,12 @@
+categories:
+- Data Managers
+description: Data Manager for installing PlantTribes scaffolds data
+homepage_url: http://amborella.huck.psu.edu/
+long_description: |
+  PlantTribes is a collection of automated modular analysis pipelines that utilize objective
+  classifications of complete protein sequences from sequenced plant genomes to perform
+  comparative evolutionary studies.
+name: data_manager_plant_tribes_scaffolds_downloader
+owner: iuc
+remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader
+type: unrestricted
--- a/data_manager/data_manager_plant_tribes_scaffolds_download.py	Fri Jan 13 10:32:14 2017 -0500
+++ b/data_manager/data_manager_plant_tribes_scaffolds_download.py	Tue Feb 14 13:44:24 2017 -0500
@@ -31,9 +31,23 @@
         shutil.rmtree(dir)


-def url_download(target_directory, url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES):
-    work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds'))
-    make_directory(work_directory)
+def extract_archive(file_path, work_directory):
+    if tarfile.is_tarfile(file_path):
+        fh = tarfile.open(file_path, 'r:*')
+    elif zipfile.is_zipfile(file_path):
+        fh = zipfile.ZipFile(file_path, 'r')
+    else:
+        return
+    fh.extractall(work_directory)
+
+
+def move_files(work_directory, target_directory):
+    # Move the files into defined output directory.
+    for filename in os.listdir(work_directory):
+        shutil.move(os.path.join(work_directory, filename), target_directory)
+
+
+def url_download(url, work_directory):
     file_path = os.path.join(work_directory, os.path.basename(url))
     src = None
     dst = None
@@ -54,26 +68,48 @@
             src.close()
         if dst:
             dst.close()
-    if tarfile.is_tarfile(file_path):
-        fh = tarfile.open(file_path, 'r:*')
-    elif zipfile.is_zipfile(file_path):
-        fh = zipfile.ZipFile(file_path, 'r')
-    else:
-        return
-    fh.extractall(work_directory)
+    return file_path
+
+
+def download(target_file_path, web_url, config_web_url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES):
+    data_manager_dict = {}
+    data_table_entry = {}
+    # Download the scaffolds data.
+    work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds'))
+    make_directory(work_directory)
+    file_path = url_download(web_url)
+    extract_archive(file_path, work_directory)
     os.remove(file_path)
     # Move the scaffolds data files into defined output directory.
-    for filename in os.listdir(work_directory):
-        shutil.move(os.path.join(work_directory, filename), target_directory)
+    target_directory = make_directory(target_file_path)
+    move_files(work_directory, target_directory)
     remove_directory(work_directory)
-    data_manager_dict = {}
-    # Populate the data table, there should be a single entry in target_directory.
+    # Populate the data_manager_dict with the scaffolds data entry.
     for file_path in os.listdir(target_directory):
         full_path = os.path.abspath(os.path.join(target_directory, file_path))
         entry_name = "%s" % os.path.basename(file_path)
-        data_table_entry = dict(value=entry_name, name=entry_name, path=full_path, description=description)
-        for data_table_name in data_table_names:
-            data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry)
+        data_table_entry['value'] = entry_name
+        data_table_entry['name'] = entry_name
+        data_table_entry['path'] = full_path
+        data_table_entry['description'] = description
+    # Download the default configuration files.
+    work_directory = os.path.abspath(os.path.join(os.getcwd(), 'configs'))
+    make_directory(work_directory)
+    file_path = url_download(config_web_url)
+    extract_archive(file_path, work_directory)
+    os.remove(file_path)
+    shutil.rmtree(target_directory)
+    # Move the scaffolds data files into defined output directory.
+    target_directory = make_directory(target_file_path)
+    move_files(work_directory, target_directory)
+    remove_directory(work_directory)
+    # Populate the data_manager_dict with the default configs entry.
+    for file_path in os.listdir(target_directory):
+        full_path = os.path.abspath(os.path.join(target_directory, file_path))
+        data_table_entry['config_path'] = full_path
+    # Populate the data_man ager_dict.
+    for data_table_name in data_table_names:
+        data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry)
     return data_manager_dict


@@ -81,15 +117,16 @@
 parser.add_argument('--description', dest='description', default=None, help='Description')
 parser.add_argument('--name', dest='name', help='Data table entry unique ID')
 parser.add_argument('--out_file', dest='out_file', help='JSON output file')
-parser.add_argument('--web_url', dest='web_url', help='Web URL')
+parser.add_argument('--web_url', dest='web_url', help='URL for downloading scaffolds')
+parser.add_argument('--config_web_url', dest='config_web_url', help='URL for downloading default configs')

 args = parser.parse_args()

 # Some magic happens with tools of type "manage_data" in that the output
 # file contains some JSON data that allows us to define the target directory.
 params = json.loads(open(args.out_file).read())
-target_directory = params['output_data'][0]['extra_files_path']
-make_directory(target_directory)
+target_file_path = params['output_data'][0]['extra_files_path']
+

 if args.description is None:
     description = ''
@@ -97,7 +134,7 @@
     description = args.description.strip()

 # Get the scaffolds data.
-data_manager_dict = url_download(target_directory, args.web_url, description)
+data_manager_dict = download(target_file_path, args.web_url, args.config_web_url, description)
 # Write the JSON output dataset.
 fh = open(args.out_file, 'wb')
 fh.write(json.dumps(data_manager_dict))
--- a/data_manager/data_manager_plant_tribes_scaffolds_download.xml	Fri Jan 13 10:32:14 2017 -0500
+++ b/data_manager/data_manager_plant_tribes_scaffolds_download.xml	Tue Feb 14 13:44:24 2017 -0500
@@ -1,4 +1,4 @@
-<tool id="data_manager_plant_tribes_scaffolds_download" name="PlantTribes Scaffolds Download" version="1.0.0" tool_type="manage_data">
+<tool id="data_manager_plant_tribes_scaffolds_download" name="PlantTribes Scaffolds Download" version="1.1.0" tool_type="manage_data">
     <description></description>
     <stdio>
         <exit_code range=":-1" level="fatal" description="Error: Cannot open file" />
@@ -7,16 +7,18 @@
     <command>
         <![CDATA[
             python $__tool_directory__/data_manager_plant_tribes_scaffolds_download.py
-            --name "$name"
-            --description "$description"
-            --web_url "$web_url"
-            --out_file "$out_file"
+            --name '$name'
+            --description '$description'
+            --web_url '$web_url'
+            --config_web_url '$config_web_url'
+            --out_file '$out_file'
         ]]>
     </command>
     <inputs>
         <param name="name" type="text" value="" label="Data table entry unique ID"/>
         <param name="description" type="text" value="" label="Description of the data" help="Value is optional"/>
-        <param name="web_url" type="text" value="" label="Web URL" optional="False" />
+        <param name="web_url" type="text" value="" label="URL for downloading scaffolds" help="Must be same version as configs" optional="False" />
+        <param name="config_web_url" type="text" value="" label="URL for downloading configs" help="Must be same version as scaffolds" optional="False" />
     </inputs>
     <outputs>
         <data name="out_file" format="data_manager_json" />
@@ -28,13 +30,20 @@

 **What it does**

-This tool fetches scaffolds data used by the PlantTribes Galaxy tools and populates the plant_tribes_scaffolds data table.
-The scaffolds data can be imported using a URL, and an optional description can be provided that will appear next to the
-scaffolds file name in the data table entry.  Scaffolds data provided by the Floral Genome Project can be downloaded here:
+This tool fetches scaffolds data and default configuration files used by the PlantTribes Galaxy tools and populates the
+plant_tribes_scaffolds data table.  Both the scaffolds data and the default configuration files can be imported using a
+URL, and an optional description can be provided that will appear next to the scaffolds file name in the data table entry.
+
+Scaffolds data provided by the Floral Genome Project can be downloaded using these URLs:

  * 22 plant genomes (Angiosperms clusters, version 1.0):        http://fgp.huck.psu.edu/planttribes_data/22Gv1.0.tar.bz2
  * 22 plant genomes (Angiosperms clusters, version 1.1):        http://fgp.huck.psu.edu/planttribes_data/22Gv1.1.tar.bz2

+Default configuration files provided by the Floral Genome Project can be downloaded using these URLs:
+
+ * 22 plant genomes (Angiosperms clusters, version 1.0):        http://fgp.huck.psu.edu/planttribes_config/22Gv1.0.tar.bz2
+ * 22 plant genomes (Angiosperms clusters, version 1.1):        http://fgp.huck.psu.edu/planttribes_config/22Gv1.1.tar.bz2
+
     </help>
     <citations>
         <citation type="bibtex">
@@ -43,7 +52,7 @@
             title = {None},
             year = {None},
             eprint = {None},
-            url = {None}
+            url = {https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader}
         }</citation>
     </citations>
 </tool>
--- a/data_manager_conf.xml	Fri Jan 13 10:32:14 2017 -0500
+++ b/data_manager_conf.xml	Tue Feb 14 13:44:24 2017 -0500
@@ -14,6 +14,14 @@
                     <value_translation type="function">abspath</value_translation>
                 </column>
                 <column name="description" />
+                <column name="config_path" output_ref="out_file">
+                    <move type="file">
+                        <source>${config_path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">plant_tribes/configs/${value}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/plant_tribes/configs/${value}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
             </output>
         </data_table>
     </data_manager>
--- a/tool-data/plant_tribes_scaffolds.loc.sample	Fri Jan 13 10:32:14 2017 -0500
+++ b/tool-data/plant_tribes_scaffolds.loc.sample	Tue Feb 14 13:44:24 2017 -0500
@@ -1,4 +1,4 @@
 ## Plant Tribes scaffolds
-#Value	Name	Path	Description
-#22Gv1.0	22Gv1.0	/plant_tribes/scaffolds/22Gv1.0	22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0)
-#22Gv1.1	22Gv1.1	/plant_tribes/scaffolds/22Gv1.1	22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
+#Value	Name	Path	Description	Config Path
+#22Gv1.0	22Gv1.0	/plant_tribes/scaffolds/22Gv1.0	22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0)	/plant_tribes/configs/22Gv1.0
+#22Gv1.1	22Gv1.1	/plant_tribes/scaffolds/22Gv1.1	22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)	/plant_tribes/configs/22Gv1.1
--- a/tool_data_table_conf.xml.sample	Fri Jan 13 10:32:14 2017 -0500
+++ b/tool_data_table_conf.xml.sample	Tue Feb 14 13:44:24 2017 -0500
@@ -1,6 +1,6 @@
 <tables>
     <table name="plant_tribes_scaffolds" comment_char="#">
-        <columns>value, name, path, description</columns>
+        <columns>value, name, path, description, config_path</columns>
         <file path="tool-data/plant_tribes_scaffolds.loc" />
     </table>
 </tables>