# HG changeset patch
# User brenninc
# Date 1458720206 14400
# Node ID d98ec2e7d5691df6f2a9813fdda1f9cba92089e8
Uploaded first version
diff -r 000000000000 -r d98ec2e7d569 data_manager/directory_data_manager.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/directory_data_manager.py Wed Mar 23 04:03:26 2016 -0400
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+import gzip
+import json
+import optparse # using optparse as hydra still python 2.6
+import os.path
+import shutil
+
+def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ):
+ data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
+ data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] )
+ data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry )
+ return data_manager_dict
+
+
+def get_param(name, params, default=None, check_tab=True):
+ value = params.get(name)
+ print name, value
+ return check_param(name, value, default=default, check_tab=check_tab)
+
+
+def check_param(name, value, default=None, check_tab=True):
+ if value in [ None, '', '?' ]:
+ if default:
+ print "Using {0} for {1} as no value provided".format( default, name )
+ value = default
+ else:
+ raise Exception( '{0} is not a valid {1}. You must specify a valid {1}.'.format( value, name ) )
+ if check_tab and "\t" in value:
+ raise Exception( '{0} is not a valid {1}. It may not contain a tab because these are used as seperators by galaxy .'.format( value, name ) )
+ return value
+
+def check_extension(extension):
+ extension = extension.strip()
+ if extension[0] == ".":
+ extension = extension[1:]
+ return extension
+
+
+def check_path(path, original_extension):
+ files = os.listdir(path)
+ check = "." + original_extension
+ for a_file in files:
+ if a_file.endswith(check):
+ return True
+ raise Exception( 'path {0} does not contain any files ending with {1}'.format( path, check ) )
+
+
+def main():
+
+ #Parse Command Line
+ parser = optparse.OptionParser()
+ parser.add_option( '--data_table_name', action='store', type="string", default=None, help='path' )
+ parser.add_option( '--json_output_file', action='store', type="string", default=None, help='path' )
+ (options, args) = parser.parse_args()
+
+ data_table_name = check_param("data_table_name", options.data_table_name)
+ json_output_file = check_param("json_output_file", options.json_output_file, check_tab=False)
+
+ param_dict = json.loads( open( json_output_file ).read() )
+ params = param_dict.get("param_dict")
+ print "input params:"
+ print params
+
+ data_table_entry = {}
+ data_table_entry["original_extension"] = check_extension(get_param("original_extension", params))
+ data_table_entry["galaxy_extension"] = check_extension(get_param("galaxy_extension", params))
+ data_table_entry["decompress"] = get_param("decompress", params)
+ if not (data_table_entry["decompress"] in ["No","Yes"]):
+ raise Exception( "Only legal values for dcompress are No and Yes." )
+ data_table_entry["path"] = get_param("path", params)
+ check_path(data_table_entry["path"], data_table_entry["original_extension"])
+
+ basename = os.path.basename(data_table_entry["path"])
+ filename = os.path.splitext(basename)[0]
+ data_table_entry["name"] = get_param("name", params, default=filename)
+ data_table_entry["value"] = get_param("value", params, default=data_table_entry["name"])
+ data_table_entry["dbkey"] = get_param("dbkey", params, default=data_table_entry["value"])
+
+ data_manager_dict = {}
+ _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry )
+
+ print "output:"
+ print data_manager_dict
+ # save info to json file
+ with open( json_output_file, 'wb' ) as output_file:
+ output_file.write( json.dumps( data_manager_dict ) )
+ output_file.write( "\n" )
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r d98ec2e7d569 data_manager/directory_data_manager.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/directory_data_manager.xml Wed Mar 23 04:03:26 2016 -0400
@@ -0,0 +1,59 @@
+
+ path inputer
+
+ directory_data_manager.py
+ --value "${value}"
+ --dbkey "${dbkey}"
+ --name "${name}"
+ --path "${path}"
+ --data_table_name "directory_data"
+ --json_output_file "${json_output_file}"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Adds information for downloading data to the directory_data data table.
+
+Reguried inputs are.
+
+path: Full path on the server where galaxy can find the files
+
+Original Extension: The Extention as found on the server.
+ Parts before the extension that could be used to split the data into seubsection such are Read1 and Read2 should not e included.
+
+Galaxy_Extension: The Extention to give the file before loading into galaxy so galaxy can detect the file type.
+ This should be one as listed in galaxy/config/datatypes_conf.xml (or xml.sample)
+ For example use tabular for tsv, txt for text and fasta and not fa
+ Ideally use fastqsanger, fastqsolexa, fastqcssanger, or fastqillumina instead of just fastq as many tools need this level of detail.
+
+Decompress: 'Yes' to ask the tool to decompress the files otherwise 'No'. Any other value will cause an error.
+
+If name is not provided the filename from path less the exstension is used.
+
+If value is not provided, the name will be used (or its default)
+
+If dbkey is not provided, the value will be used (or its default)
+
+===
+
+The tool will check the path exists and that it holds at least one file with the required extension.
+
+
+
+
+
+
diff -r 000000000000 -r d98ec2e7d569 data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Wed Mar 23 04:03:26 2016 -0400
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r d98ec2e7d569 tool-data/directory_data.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/directory_data.loc.sample Wed Mar 23 04:03:26 2016 -0400
@@ -0,0 +1,19 @@
+#This file lists the directories that can be read in
+
+#This file has the format (white space characters are TAB characters):
+#
+#
+#
+#original_extension should not include the starting .
+#
+#galaxy_extension should be one listed in galaxy/config/datatypes_conf.xml (or xml.sample)
+#
+#decompress should be No or Yes
+#
+#So, data_manager.loc could look something like this: (whitespace is tabs)
+#
+#john_12 john_12 John's fastq files batch 12 fastq.gz fastqsanger Yes /data/john/batch12
+#
+#Your directory_data.loc file should contain an entry for each path and extension pair
+#
+
diff -r 000000000000 -r d98ec2e7d569 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Mar 23 04:03:26 2016 -0400
@@ -0,0 +1,7 @@
+
+
+