Mercurial > repos > dchristiany > frogs_data_manager
changeset 0:7caea40b2a30 draft
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
author | dchristiany |
---|---|
date | Wed, 02 Oct 2019 11:00:08 -0400 |
parents | |
children | 5d72aa36e997 |
files | README.md data_manager/FROGS_data_manager.py data_manager/FROGS_data_manager.xml data_manager_conf.xml tool-data/HVL_db.loc.sample tool-data/frogs_db.loc.sample tool-data/phiX_db.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml |
diffstat | 9 files changed, 378 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Wed Oct 02 11:00:08 2019 -0400 @@ -0,0 +1,60 @@ +.. image:: static/images/frogs_images/FROGS_logo.png + :height: 144 + :width: 110 + + +.. class:: infomark page-header h2 + +What it does + + +.. class:: infomark page-header h2 + +Inputs/outputs + +.. class:: h3 + +Inputs + + + +.. class:: h3 + +Outputs + + + + + .. image:: static/images/frogs_images/FROGS_affiliation_summary.png + :height: 800 + :width: 600 + + +.. class:: infomark page-header h2 + +Reference database + + + +.. class:: infomark page-header h2 + +How it works + + + + +Advices + + + + +---- + +**Contact** + +Contacts: frogs@inra.fr + +Repository: https://github.com/geraldinepascal/FROGS +website: http://frogs.toulouse.inra.fr/ + +Please cite the **FROGS article**: *Escudie F., et al. Bioinformatics, 2018. FROGS: Find, Rapidly, OTUs with Galaxy Solution.*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/FROGS_data_manager.py Wed Oct 02 11:00:08 2019 -0400 @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +from galaxy.util.json import from_json_string, to_json_string +import os, sys, argparse, time, json, requests, urllib + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument("-d","--database") + parser.add_argument("-r","--resource") + parser.add_argument("-o","--output") + args = parser.parse_args() + return args + +def _add_data_table_entry(data_manager_dict, data_table_entry,data_table): + data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) + data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, []) + data_manager_dict['data_tables'][data_table].append(data_table_entry) + return data_manager_dict + +def frogs_sources(data_manager_dict,target_directory): + + #get frogs database index + frogs_db_index_link="http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv" + with requests.Session() as s: + download = s.get(frogs_db_index_link) + decoded_content = download.content.decode('utf-8') + db_index = download.content.splitlines() + db_index = [line.split("\t") for line in db_index[1:]] + db_index = [line[:4]+[line[1]+"_"+line[2]+"_"+line[1]]+[line[4]] for line in db_index] #add column name + + #get frogs dbs + dir_name="frogs_db_"+time.strftime("%Y%m%d") + os.mkdir(dir_name) + dbs=set([]) + for line in db_index[:2]: + value=line[4] + name=value.replace("_"," ") + link=line[5] + + #download frogs db + dl_file = urllib.URLopener() + dl_file.retrieve(link, "tmp.tar.gz") + + #unzip frogs db + with tarfile.open("tmp.tar.gz") as tar: + tar.extractall(dir_name) + tar.close() + os.remove('tmp.tar.gz') + + #get fasta file path + tmp = set(os.listdir(dir_name)) + new_db = dir_name+"/"+"".join(tmp.difference(dbs)) + files = os.listdir(new_db) + fasta = "".join([file for file in files if file.endswith('.fasta')]) + path = new_db+'/'+fasta + dbs = os.listdir(dir_name) + release = value+"_"+time.strftime("%Y-%m-%d") + date=time.strftime("%Y%m%d") + path = os.path.join(target_directory,path) + + data_table_entry = dict(name = name, value = value, path=path) + _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db") + +#def HVL_sources(resource): + +#def phiX_sources(resource): + +def main(): + + #get args from command line + args = get_args() + + # Extract json file params + data_manager_dict = {} + filename = args.output + params = from_json_string(open(filename).read()) + target_directory = params[ 'output_data' ][0]['extra_files_path'] + os.mkdir(target_directory) + + if args.database=="frogs_db_data": + frogs_sources(data_manager_dict,target_directory) + elif args.database=="HVL_db_data": + HVL_sources(args.resource) + elif args.database=="phiX_db_data": + phiX_sources(args.resource) + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/FROGS_data_manager.xml Wed Oct 02 11:00:08 2019 -0400 @@ -0,0 +1,36 @@ +<tool id="FROGS_data_manager" name="FROGS Data manager" version="2019.10.02"> + <requirements> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python $__tool_directory__/FROGS_data_manager.py + --database="$database.database" + --resource="$database.resource" + --output "$output" + ]]></command> + <inputs> + <conditional name="database"> + <param name="database" type="select" label="Please select a database to update"> + <option value="frogs_db_data">FROGS database (FROGS Affiliation OTU)</option> + <option value="HVL_db_data">HVL database (FROGS Affiliation postprocess)</option> + <option value="phiX_db_data">phiX database (FROGS Filters)</option> + </param> + <when value="frogs_db_data"/> + <when value="HVL_db_data"> + <param name="resource" type="select" label="Choose the resource to create/update"> + <option value="unite_7.1_ITS1">UNITE 7.1 ITS1</option> + </param> + </when> + <when value="phiX_db_data"> + <param name="resource" type="select" label="Choose the resource to create/update"> + <option value="phiX">phiX</option> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data name="output" format="data_manager_json"/> + </outputs> + <help><![CDATA[ + TODO: Fill in help. + ]]></help> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Wed Oct 02 11:00:08 2019 -0400 @@ -0,0 +1,47 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/data_manager.xml" id="frogs_data_manager"> + <data_table name="frogs_db"> + <output> + <column name="value"/> + <column name="name" /> + <column name="path" output_ref="output" > + <move type="directory"> + <source>#echo "/".join(str(${path}).split('/')[:-1])#</source> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">frogs_db</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/frogs_db/#echo "/".join(str(${path}).split('/')[-2:])#</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="HVL_db"> + <output> + <column name="value"/> + <column name="name" /> + <column name="path" output_ref="output" > + <move type="file"> + <!--source>${path}/${value}.tsv</source--> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">HVL_db</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/HVL_db/${value}.fasta</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="phiX_db"> + <output> + <column name="value"/> + <column name="name" /> + <column name="path" output_ref="output" > + <move type="file"> + <!--source>${path}/${value}.tsv</source--> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">phiX_db</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/phiX_db/${value}.fasta</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/HVL_db.loc.sample Wed Oct 02 11:00:08 2019 -0400 @@ -0,0 +1,34 @@ +# Copyright (C) 2014 INRA +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# +#This is a sample file that enables tools FROGS_affiliations_postprocess to solve +#inclusive amplicon sequence by selecting the smallest reference among multihit +#You will need to create or download exact amplicon sequence reference fasta file +#download link : http://genoweb.toulouse.inra.fr/frogs_databanks/HVL_reference +#Finally you will need to create HVL_db.loc file similar to this one in your galaxy +#tool-data directory.The HVL_db.loc file has this format (longer white space characters are +#TAB characters): +# +#<unique_database_name> <name> <file_path> +# +#First column will be the visible name in galaxy. +#So, for example, if you have UNITE 7.1 ITS1 (only!) stored +#in /galaxy_databanks/ITS/UNITE_7.1/UNITE_ITS1.fasta +#then the HVL_db.loc entry would look like this: +# +# +# EXAMPLE FOR TEST : +#Unite_extract_ITS1_test Unite_extract_ITS1_test ${__HERE__}/HVL_db_data/Unite_extract_ITS1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/frogs_db.loc.sample Wed Oct 02 11:00:08 2019 -0400 @@ -0,0 +1,53 @@ +# Copyright (C) 2014 INRA +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# +#This is a sample file that enables tools FROGS_affiliations_OTU to use taxonomy database for +#taxonomy affiliation. You will need to create or download Blast+ index and train your database +#for RDP classifier these data files. +#download link : http://genoweb.toulouse.inra.fr/frogs_databanks/assignation +#Finally you will need to create frogs_db.loc file similar to this one in your galaxy +#tool-data directory.The frogs_db.loc file has this format (longer white space characters are +#TAB characters): +# +#<unique_database_name> <file_path> +# +#First column will be the visible name in galaxy. +#So, for example, if you had 16S silva 128 indexed stored in +#/galaxy_databanks/16S/silva_128/ +#then the frogs_db.loc entry would look like this: +# +#silva 128 16S /galaxy_databanks/16S/silva_128/silva_128_16S.fasta +# +#and your /galaxy_databanks/16S/silva_128/ directory +#would contain index files: +# +#-rw-r--r-- 1 mbernard FROGS 8097966 5 déc. 16:56 bergeyTrainingTree.xml +#-rw-r--r-- 1 mbernard FROGS 1572981589 5 déc. 16:56 genus_wordConditionalProbList.txt +#-rw-r--r-- 1 mbernard FROGS 1654 5 déc. 16:56 LICENCE.txt +#-rw-r--r-- 1 mbernard FROGS 1072228 5 déc. 16:56 logWordPrior.txt +#-rw-r--r-- 1 mbernard FROGS 940834335 5 déc. 16:56 silva_128_16S.fasta +#-rw-r--r-- 1 mbernard FROGS 152606489 5 déc. 16:56 silva_128_16S.fasta.nhr +#-rw-r--r-- 1 mbernard FROGS 6918588 5 déc. 16:56 silva_128_16S.fasta.nin +#-rw-r--r-- 1 mbernard FROGS 205320030 5 déc. 16:56 silva_128_16S.fasta.nsq +#-rw-r--r-- 1 mbernard FROGS 281 5 déc. 16:56 silva_128_16S.fasta.properties +#-rw-r--r-- 1 mbernard FROGS 3420464 5 déc. 16:56 silva_128_16S.tax +#-rw-r--r-- 1 mbernard FROGS 964048 5 déc. 16:57 wordConditionalProbIndexArr.txt +# +# +#<name> <name> <file_path> +# +# EXAMPLE FOR TEST : +#ITS1_test ITS1_test ${__HERE__}/frogs_db_data/ITS1.rdp.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/phiX_db.loc.sample Wed Oct 02 11:00:08 2019 -0400 @@ -0,0 +1,42 @@ +# Copyright (C) 2014 INRA +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# +#This is a sample file that enables tools FROGS_filters to identify phix contaminant. You will #need to create or download Blast+ index. +#download link : http://genoweb.toulouse.inra.fr/frogs_databanks/contaminants +#Finally you will need to create phiX_db.loc file similar to this one in your galaxy +#tool-data directory.The phiX_db.loc file has this format (longer white space characters are +#TAB characters): +# +#<contaminant_name> <name> <file_path> +# +#First column will be the visible name in galaxy. +#So, for example, if you had phix indexed stored in +#/galaxy_databanks/phiX/ +#then the phiX_db.loc entry would look like this: +# +# +# EXAMPLE FOR TEST : +#phiX_test phiX_test ${__HERE__}/phiX_db_data/phi.fa +# +#and your /galaxy_databanks/phiX/ directory +#would contain index files: +# +#-rwxrwxr-x 1 gpascal FROGS 5535 16 sept. 2015 phi.fa +#-rw-rwxr-- 1 gpascal FROGS 132 16 sept. 2015 phi.fa.nhr +#-rw-rwxr-- 1 gpascal FROGS 88 16 sept. 2015 phi.fa.nin +#-rw-rwxr-- 1 gpascal FROGS 1348 16 sept. 2015 phi.fa.nsq +# +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Oct 02 11:00:08 2019 -0400 @@ -0,0 +1,16 @@ +<?xml version="1.0"?> +<tables> + <!-- Location of frogs database files --> + <table name="frogs_db" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/frogs_db.loc" /> + </table> + <table name="HVL_db" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/HVL_db.loc" /> + </table> + <table name="phiX_db" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/phiX_db.loc" /> + </table> +</tables>