changeset 0:7caea40b2a30 draft

planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
author dchristiany
date Wed, 02 Oct 2019 11:00:08 -0400
parents
children 5d72aa36e997
files README.md data_manager/FROGS_data_manager.py data_manager/FROGS_data_manager.xml data_manager_conf.xml tool-data/HVL_db.loc.sample tool-data/frogs_db.loc.sample tool-data/phiX_db.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 9 files changed, 378 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Wed Oct 02 11:00:08 2019 -0400
@@ -0,0 +1,60 @@
+.. image:: static/images/frogs_images/FROGS_logo.png 
+   :height: 144
+   :width: 110
+
+
+.. class:: infomark page-header h2
+
+What it does
+
+
+.. class:: infomark page-header h2
+
+Inputs/outputs
+
+.. class:: h3
+
+Inputs
+
+
+
+.. class:: h3
+
+Outputs
+
+
+
+ 
+ .. image:: static/images/frogs_images/FROGS_affiliation_summary.png 
+    :height: 800
+    :width: 600
+
+
+.. class:: infomark page-header h2
+
+Reference database
+
+
+
+.. class:: infomark page-header h2
+
+How it works
+
+
+
+
+Advices
+
+
+
+
+----
+
+**Contact**
+
+Contacts: frogs@inra.fr
+
+Repository: https://github.com/geraldinepascal/FROGS
+website: http://frogs.toulouse.inra.fr/
+
+Please cite the **FROGS article**: *Escudie F., et al. Bioinformatics, 2018. FROGS: Find, Rapidly, OTUs with Galaxy Solution.*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/FROGS_data_manager.py	Wed Oct 02 11:00:08 2019 -0400
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+from galaxy.util.json import from_json_string, to_json_string
+import os, sys, argparse, time, json, requests, urllib
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-d","--database")
+    parser.add_argument("-r","--resource")
+    parser.add_argument("-o","--output")
+    args = parser.parse_args()
+    return args
+
+def _add_data_table_entry(data_manager_dict, data_table_entry,data_table):
+    data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
+    data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, [])
+    data_manager_dict['data_tables'][data_table].append(data_table_entry)
+    return data_manager_dict
+
+def frogs_sources(data_manager_dict,target_directory):
+
+    #get frogs database index
+    frogs_db_index_link="http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv"
+    with requests.Session() as s:
+        download = s.get(frogs_db_index_link)
+        decoded_content = download.content.decode('utf-8')
+        db_index = download.content.splitlines()    
+        db_index = [line.split("\t") for line in db_index[1:]]
+        db_index = [line[:4]+[line[1]+"_"+line[2]+"_"+line[1]]+[line[4]] for line in db_index]  #add column name
+    
+    #get frogs dbs
+    dir_name="frogs_db_"+time.strftime("%Y%m%d")
+    os.mkdir(dir_name)
+    dbs=set([])
+    for line in db_index[:2]:
+        value=line[4]
+        name=value.replace("_"," ")
+        link=line[5]
+
+        #download frogs db
+        dl_file = urllib.URLopener()
+        dl_file.retrieve(link, "tmp.tar.gz")
+        
+        #unzip frogs db
+        with tarfile.open("tmp.tar.gz") as tar:
+            tar.extractall(dir_name)
+            tar.close()
+            os.remove('tmp.tar.gz')
+        
+        #get fasta file path
+        tmp = set(os.listdir(dir_name))
+        new_db = dir_name+"/"+"".join(tmp.difference(dbs))
+        files = os.listdir(new_db)
+        fasta = "".join([file for file in files if file.endswith('.fasta')])
+        path = new_db+'/'+fasta
+        dbs = os.listdir(dir_name)
+        release = value+"_"+time.strftime("%Y-%m-%d")
+        date=time.strftime("%Y%m%d")
+        path = os.path.join(target_directory,path)
+
+        data_table_entry = dict(name = name, value = value, path=path)
+        _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db")
+
+#def HVL_sources(resource):
+
+#def phiX_sources(resource):
+
+def main():
+
+    #get args from command line
+    args = get_args()
+
+    # Extract json file params
+    data_manager_dict = {}
+    filename = args.output
+    params = from_json_string(open(filename).read())
+    target_directory = params[ 'output_data' ][0]['extra_files_path']
+    os.mkdir(target_directory)
+
+    if args.database=="frogs_db_data":
+        frogs_sources(data_manager_dict,target_directory)
+    elif args.database=="HVL_db_data":
+        HVL_sources(args.resource)
+    elif args.database=="phiX_db_data":
+        phiX_sources(args.resource)
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/FROGS_data_manager.xml	Wed Oct 02 11:00:08 2019 -0400
@@ -0,0 +1,36 @@
+<tool id="FROGS_data_manager" name="FROGS Data manager" version="2019.10.02">
+    <requirements>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python $__tool_directory__/FROGS_data_manager.py
+            --database="$database.database"
+            --resource="$database.resource"
+            --output "$output"
+    ]]></command>
+    <inputs>
+        <conditional name="database">
+            <param name="database" type="select" label="Please select a database to update">
+                <option value="frogs_db_data">FROGS database (FROGS Affiliation OTU)</option>
+                <option value="HVL_db_data">HVL database (FROGS Affiliation postprocess)</option>
+                <option value="phiX_db_data">phiX database (FROGS Filters)</option>
+            </param>
+            <when value="frogs_db_data"/>
+            <when value="HVL_db_data">
+                <param name="resource" type="select" label="Choose the resource to create/update">
+                    <option value="unite_7.1_ITS1">UNITE 7.1 ITS1</option>
+                </param>
+            </when>
+            <when value="phiX_db_data">
+                <param name="resource" type="select" label="Choose the resource to create/update">
+                    <option value="phiX">phiX</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="data_manager_json"/>
+    </outputs>
+    <help><![CDATA[
+        TODO: Fill in help.
+    ]]></help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Wed Oct 02 11:00:08 2019 -0400
@@ -0,0 +1,47 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/data_manager.xml" id="frogs_data_manager">
+        <data_table name="frogs_db">
+            <output>
+                <column name="value"/>
+                <column name="name" />
+                <column name="path" output_ref="output" >
+                    <move type="directory">
+                        <source>#echo "/".join(str(${path}).split('/')[:-1])#</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">frogs_db</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/frogs_db/#echo "/".join(str(${path}).split('/')[-2:])#</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="HVL_db">
+            <output>
+                <column name="value"/>
+                <column name="name" />
+                <column name="path" output_ref="output" >
+                    <move type="file">
+                        <!--source>${path}/${value}.tsv</source-->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">HVL_db</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/HVL_db/${value}.fasta</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+        <data_table name="phiX_db">
+            <output>
+                <column name="value"/>
+                <column name="name" />
+                <column name="path" output_ref="output" >
+                    <move type="file">
+                        <!--source>${path}/${value}.tsv</source-->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">phiX_db</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/phiX_db/${value}.fasta</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/HVL_db.loc.sample	Wed Oct 02 11:00:08 2019 -0400
@@ -0,0 +1,34 @@
+# Copyright (C) 2014 INRA
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+#This is a sample file that enables tools FROGS_affiliations_postprocess to solve
+#inclusive amplicon sequence by selecting the smallest reference among multihit
+#You will need to create or download exact amplicon sequence reference fasta file
+#download link : http://genoweb.toulouse.inra.fr/frogs_databanks/HVL_reference
+#Finally you will need to create HVL_db.loc file similar to this one in your galaxy
+#tool-data directory.The HVL_db.loc file has this format (longer white space characters are
+#TAB characters):
+#
+#<unique_database_name>    <name>    <file_path>
+#
+#First column will be the visible name in galaxy.
+#So, for example, if you have  UNITE 7.1 ITS1 (only!) stored 
+#in /galaxy_databanks/ITS/UNITE_7.1/UNITE_ITS1.fasta 
+#then the HVL_db.loc entry would look like this:
+#
+#
+# EXAMPLE FOR TEST :
+#Unite_extract_ITS1_test	Unite_extract_ITS1_test	${__HERE__}/HVL_db_data/Unite_extract_ITS1.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/frogs_db.loc.sample	Wed Oct 02 11:00:08 2019 -0400
@@ -0,0 +1,53 @@
+# Copyright (C) 2014 INRA
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+#This is a sample file that enables tools FROGS_affiliations_OTU to use taxonomy database for
+#taxonomy affiliation. You will need to create or download Blast+ index and train your database
+#for RDP classifier these data files.
+#download link : http://genoweb.toulouse.inra.fr/frogs_databanks/assignation
+#Finally you will need to create frogs_db.loc file similar to this one in your galaxy
+#tool-data directory.The frogs_db.loc file has this format (longer white space characters are
+#TAB characters):
+#
+#<unique_database_name>   <file_path>
+#
+#First column will be the visible name in galaxy.
+#So, for example, if you had 16S silva 128 indexed stored in
+#/galaxy_databanks/16S/silva_128/ 
+#then the frogs_db.loc entry would look like this:
+#
+#silva 128 16S  /galaxy_databanks/16S/silva_128/silva_128_16S.fasta
+#
+#and your /galaxy_databanks/16S/silva_128/ directory
+#would contain index files:
+#
+#-rw-r--r-- 1 mbernard FROGS    8097966  5 déc.  16:56 bergeyTrainingTree.xml
+#-rw-r--r-- 1 mbernard FROGS 1572981589  5 déc.  16:56 genus_wordConditionalProbList.txt
+#-rw-r--r-- 1 mbernard FROGS       1654  5 déc.  16:56 LICENCE.txt
+#-rw-r--r-- 1 mbernard FROGS    1072228  5 déc.  16:56 logWordPrior.txt
+#-rw-r--r-- 1 mbernard FROGS  940834335  5 déc.  16:56 silva_128_16S.fasta
+#-rw-r--r-- 1 mbernard FROGS  152606489  5 déc.  16:56 silva_128_16S.fasta.nhr
+#-rw-r--r-- 1 mbernard FROGS    6918588  5 déc.  16:56 silva_128_16S.fasta.nin
+#-rw-r--r-- 1 mbernard FROGS  205320030  5 déc.  16:56 silva_128_16S.fasta.nsq
+#-rw-r--r-- 1 mbernard FROGS        281  5 déc.  16:56 silva_128_16S.fasta.properties
+#-rw-r--r-- 1 mbernard FROGS    3420464  5 déc.  16:56 silva_128_16S.tax
+#-rw-r--r-- 1 mbernard FROGS     964048  5 déc.  16:57 wordConditionalProbIndexArr.txt
+#
+#
+#<name>    <name>    <file_path>
+#
+# EXAMPLE FOR TEST :
+#ITS1_test	ITS1_test	${__HERE__}/frogs_db_data/ITS1.rdp.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/phiX_db.loc.sample	Wed Oct 02 11:00:08 2019 -0400
@@ -0,0 +1,42 @@
+# Copyright (C) 2014 INRA
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+#This is a sample file that enables tools FROGS_filters to identify phix contaminant. You will #need to create or download Blast+ index.
+#download link : http://genoweb.toulouse.inra.fr/frogs_databanks/contaminants
+#Finally you will need to create phiX_db.loc file similar to this one in your galaxy
+#tool-data directory.The phiX_db.loc file has this format (longer white space characters are
+#TAB characters):
+#
+#<contaminant_name>	<name>	<file_path>
+#
+#First column will be the visible name in galaxy.
+#So, for example, if you had phix indexed stored in
+#/galaxy_databanks/phiX/ 
+#then the phiX_db.loc entry would look like this:
+#
+#
+# EXAMPLE FOR TEST :
+#phiX_test	phiX_test	${__HERE__}/phiX_db_data/phi.fa
+#
+#and your /galaxy_databanks/phiX/ directory
+#would contain index files:
+#
+#-rwxrwxr-x 1 gpascal FROGS 5535 16 sept.  2015 phi.fa
+#-rw-rwxr-- 1 gpascal FROGS  132 16 sept.  2015 phi.fa.nhr
+#-rw-rwxr-- 1 gpascal FROGS   88 16 sept.  2015 phi.fa.nin
+#-rw-rwxr-- 1 gpascal FROGS 1348 16 sept.  2015 phi.fa.nsq
+#
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Oct 02 11:00:08 2019 -0400
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Location of frogs database files -->
+    <table name="frogs_db" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/frogs_db.loc" />
+    </table>
+    <table name="HVL_db" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/HVL_db.loc" />
+    </table>
+    <table name="phiX_db" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/phiX_db.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed Oct 02 11:00:08 2019 -0400
@@ -0,0 +1,3 @@
+<?xml version="1.0"?>
+<tool_dependency>
+</tool_dependency>