changeset 6:56fda3d161f0 draft

planemo upload commit 0203cb3a0b40d9348674b2b098af805e2986abca-dirty
author stevecassidy
date Thu, 06 Oct 2016 11:14:39 -0400
parents e28c0258a09e
children 5a8d9ddabec4
files alveo_api_key.cwl alveo_api_key.xml alveo_get_item_data.xml alveo_get_item_list.py alveo_get_item_list.xml alveo_item_list_downloader.py alveo_item_list_downloader.xml alveo_item_list_importer.py
diffstat 8 files changed, 104 insertions(+), 181 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/alveo_api_key.cwl	Thu Oct 06 11:14:39 2016 -0400
@@ -0,0 +1,11 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: 'cwl:draft-3'
+class: CommandLineTool
+id: "alveo_api_key"
+label: "Get Alveo API Key"
+inputs: [] # TODO
+outputs: [] # TODO
+baseCommand: []
+arguments: []
+description: |
+   TODO: Fill in description.
\ No newline at end of file
--- a/alveo_api_key.xml	Sat Sep 03 02:54:47 2016 -0400
+++ b/alveo_api_key.xml	Thu Oct 06 11:14:39 2016 -0400
@@ -6,7 +6,7 @@
     </requirements>
 
     <command interpreter="python">
-        alveo_api_key.py --api_key $api_key --output_path $output
+        alveo_api_key.py --api_key "$api_key" --output_path $output
     </command>
 
     <inputs>
@@ -19,6 +19,8 @@
 
     <tests>
         <test>
+            <!-- expect this to fail unless you enter a valid API key here and copy it to
+                 the result file -->
             <param name="api_key" value="your api key here" />
             <output name="output" file="api-key.dat" compare="contains" />
         </test>
--- a/alveo_get_item_data.xml	Sat Sep 03 02:54:47 2016 -0400
+++ b/alveo_get_item_data.xml	Thu Oct 06 11:14:39 2016 -0400
@@ -52,9 +52,7 @@
         </test>
     </tests>
 
-    <help>Downloads files from a local list of Alveo items. You can download all files or those matching
-        a wildcard pattern (e.g. *.txt).  Results will be stored as a dataset collection in
-        your history.</help>
+    <help>Downloads files from a local list of Alveo items. You can download all files or those matching a wildcard pattern (e.g. \*.txt).  Results will be stored as a dataset collection in your history.</help>
     <citations>
         <citation type='bibtex'>
             @article{cassidy2014alveo,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/alveo_get_item_list.py	Thu Oct 06 11:14:39 2016 -0400
@@ -0,0 +1,37 @@
+from __future__ import print_function
+import json
+import argparse
+import pyalveo
+import sys
+import os
+from fnmatch import fnmatch
+
+API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
+
+def parser():
+    parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
+    parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
+    parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download")
+    parser.add_argument('--output', required=True, action="store", type=str, help="output file name")
+    return parser.parse_args()
+
+def main():
+    args = parser()
+    try:
+        api_key = open(args.api_key, 'r').read().strip()
+
+        client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False)
+        item_list = client.get_item_list(args.item_list_url)
+
+        with open(args.output, 'w') as out:
+            out.write("ItemURL\n")
+            for item in item_list:
+                out.write(item + "\n")
+                print(item)
+
+    except pyalveo.APIError as e:
+        print("ERROR: " + str(e), file=sys.stderr)
+        sys.exit(1)
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/alveo_get_item_list.xml	Thu Oct 06 11:14:39 2016 -0400
@@ -0,0 +1,52 @@
+<tool id="alveo_get_item_list" name="Get Item List from Alveo" version="0.01" force_history_refresh="True">
+    <description>Retrieves Item URLs from an Alveo Item List</description>
+
+    <requirements>
+        <requirement type="package" version="0.6">pyalveo</requirement>
+    </requirements>
+
+    <command interpreter="python">
+        alveo_get_item_list.py --api_key $api_key --item_list_url $item_list_url --output $output
+    </command>
+
+    <inputs>
+        <param name="api_key" type="data" format="txt" label="API Key" help="Your Alveo API key"/>
+        <param name="import_list" type="data" format="tabular" label="Imported Alveo Item List" help=""/>
+
+        <param name="item_list_url" type="select" label="Alveo Item List" help="The Alveo Item List you wish to import">
+            <options from_dataset="import_list">
+                <column name="name" index="0"/>
+                <column name="value" index="1"/>
+            </options>
+        </param>
+
+        <param name="job_name" type="text" size="25"
+               label="Supply a name for the outputs to remind you what they contain" value="Item List"/>
+    </inputs>
+
+    <outputs>
+        <data format="tabular" name="output" label="${job_name}"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="api_key" value="api-key.dat"/>
+            <param name="import_list" value="item-lists.dat"/>
+            <param name="item_list_url" value="https://app.alveo.edu.au/item_lists/180"/>
+            <param name="job_name" value="test_output_180.dat"/>
+            <output name="output" file="item_list_180.dat"/>
+        </test>
+    </tests>
+
+    <help>Get the URLs of all of the items from an Alveo item list.</help>
+    <citations>
+        <citation type='bibtex'>
+            @article{cassidy2014alveo,
+              title={The alveo virtual laboratory: a web based repository API},
+              author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others},
+              year={2014},
+              publisher={Reykjavik, Iceland: European Language Resources Association}
+            }
+        </citation>
+    </citations>
+</tool>
--- a/alveo_item_list_downloader.py	Sat Sep 03 02:54:47 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,85 +0,0 @@
-from __future__ import print_function
-import json
-import argparse
-import pyalveo
-import sys
-import os
-from fnmatch import fnmatch
-
-API_URL = 'https://app.alveo.edu.au' # TODO: export constants to a separate module
-
-def parser():
-    parser = argparse.ArgumentParser(description="Downloads documents in an Alveo Item List")
-    parser.add_argument('--api_key', required=True, action="store", type=str, help="Alveo API key")
-    parser.add_argument('--item_list_url', required=True, action="store", type=str, help="Item List to download")
-    parser.add_argument('--patterns', required=True, action="store", type=str, help="File patterns to download")
-    parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
-    return parser.parse_args()
-
-def get_item_list(api_key, item_list_url):
-    client = pyalveo.Client(api_key=api_key, api_url=API_URL, use_cache=False)
-    return client.get_item_list(item_list_url)
-
-# this file name pattern allows galaxy to discover the dataset designation and type
-FNPAT = "%(designation)s_%(ext)s"
-
-
-def galaxy_name(itemname, fname):
-    """construct a filename suitable for Galaxy dataset discovery
-    designation - (dataset identifier) is the file basename
-    ext - defines the dataset type and is the file extension
-    """
-
-    root, ext = os.path.splitext(fname)
-    ext = ext[1:] # remove initial .
-    fname = FNPAT % {'designation': itemname, 'ext': ext}
-
-    return fname
-
-
-def download_documents(item_list, patterns, output_path):
-    """
-    Downloads a list of documents to the directory specificed by output_path.
-
-    :type documents: list of pyalveo.Document
-    :param documents: Documents to download
-
-    :type output_path: String
-    :param output_path: directory to download to the documents to
-    """
-    if not os.path.exists(output_path):
-        os.makedirs(output_path)
-
-    downloaded = []
-
-    items = item_list.get_all()
-    filtered_documents = []
-    for item in items:
-        documents = item.get_documents()
-        for doc in documents:
-            for pattern in patterns:
-                if not pattern == '' and fnmatch(doc.get_filename(), pattern):
-                    fname = galaxy_name(item.metadata()['alveo:metadata']['dc:identifier'], doc.get_filename())
-                    try:
-                        doc.download_content(dir_path=output_path, filename=fname)
-                        downloaded.append(doc.get_filename())
-                    except:
-                        # maybe it doesn't exist or we have no access
-                        # TODO: report this
-                        pass
-    return downloaded
-
-def main():
-    args = parser()
-    try:
-        api_key = open(args.api_key, 'r').read().strip()
-        item_list = get_item_list(api_key, args.item_list_url)
-        patterns = args.patterns.split(',')
-        downloaded = download_documents(item_list, patterns, args.output_path)
-        # write out a list of downloaded files as a result?
-    except pyalveo.APIError as e:
-        print("ERROR: " + str(e), file=sys.stderr)
-        sys.exit(1)
-
-if __name__ == '__main__':
-    main()
--- a/alveo_item_list_downloader.xml	Sat Sep 03 02:54:47 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,91 +0,0 @@
-<tool id="alveo_item_list_downloader" name="Get Files from Alveo" version="0.01" force_history_refresh="True">
-    <description>Downloads files from the items in an Alveo Item List</description>
-
-    <requirements>
-        <requirement type="package" version="0.6">pyalveo</requirement>
-    </requirements>
-
-    <command interpreter="python">
-        alveo_item_list_downloader.py --api_key $api_key --item_list_url $item_list_url --patterns $patterns,$patternselect --output_path ItemListData
-    </command>
-
-    <inputs>
-        <param name="api_key" type="data" format="txt" label="API Key" help="Your Alveo API key"/>
-        <param name="import_list" type="data" format="tabular" label="Imported Alveo Item List" help=""/>
-
-        <param name="item_list_url" type="select" label="Alveo Item List" help="The Alveo Item List you wish to import">
-            <options from_dataset="import_list">
-                <column name="name" index="0"/>
-                <column name="value" index="1"/>
-            </options>
-        </param>
-
-        <param name="patternselect" type="select" multiple="true" label="Predefined imports" display="checkboxes">
-            <option value='*'>All Files</option>
-            <option value='*speaker16.wav'>Austalk 16bit/16kHz Speaker Headset WAV (*speaker16.wav)</option>
-            <option value='*plain.txt'>Plain text documents (*plain.txt)</option>
-            <option value='*.txt'>All text documents (*.txt)</option>
-            <option value=''>Other - enter pattern below</option>
-        </param>
-
-        <param name="patterns" type="text" label="File patterns to import"
-               optional="true"
-               help="One or more file patterns separated by commas eg. *.wav,*.txt"/>
-
-        <param name="job_name" type="text" size="25"
-               label="Supply a name for the outputs to remind you what they contain" value="Item List downloaded from Alveo"/>
-    </inputs>
-
-    <outputs>
-        <collection type="list" label="$job_name" name="output1">
-            <discover_datasets pattern="(?P&lt;designation&gt;[^_]+)_(?P&lt;ext&gt;.+)" directory="ItemListData"/>
-        </collection>
-    </outputs>
-
-    <tests>
-        <test>
-            <param name="api_key" value="api-key.dat"/>
-            <param name="import_list" value="item-lists.dat"/>
-            <param name="item_list_url" value="https://app.alveo.edu.au/item_lists/180"/>
-            <param name="patterns" value=""/>
-            <param name="patternselect" value="*plain.txt"/>
-            <param name="output_path" value="test_out"/>
-            <output_collection name="output1" type="list" count="6">
-                    <element name="GCSAusE02">
-                        <assert_contents>
-                            <has_text_matching expression="background noises"/>
-                        </assert_contents>
-                    </element>
-            </output_collection>
-        </test>
-        <test>
-            <param name="api_key" value="api-key.dat"/>
-            <param name="import_list" value="item-lists.dat"/>
-            <param name="item_list_url" value="https://app.alveo.edu.au/item_lists/180"/>
-            <param name="patterns" value="*plain.txt"/>
-            <param name="patternselect" value=""/>
-            <param name="output_path" value="test_out"/>
-            <output_collection name="output1" type="list" count="6">
-                    <element name="GCSAusE02">
-                        <assert_contents>
-                            <has_text_matching expression="background noises"/>
-                        </assert_contents>
-                    </element>
-            </output_collection>
-        </test>
-    </tests>
-
-    <help>Downloads files from an Alveo Item list. You can download all files or those matching
-        a wildcard pattern (e.g. *.txt).  Results will be stored as a dataset collection in
-        your history.</help>
-    <citations>
-        <citation type='bibtex'>
-            @article{cassidy2014alveo,
-              title={The alveo virtual laboratory: a web based repository API},
-              author={Cassidy, Steve and Estival, Dominique and Jones, Tim and Sefton, Peter and Burnham, Denis and Burghold, Jared and others},
-              year={2014},
-              publisher={Reykjavik, Iceland: European Language Resources Association}
-            }
-        </citation>
-    </citations>
-</tool>
--- a/alveo_item_list_importer.py	Sat Sep 03 02:54:47 2016 -0400
+++ b/alveo_item_list_importer.py	Thu Oct 06 11:14:39 2016 -0400
@@ -29,7 +29,6 @@
     try:
         api_key = open(args.api_key, 'r').read().strip()
         item_lists = get_item_lists(api_key)
-        print(item_lists)
         if item_lists:
             write_table(item_lists, args.output)
     except Exception as e: