changeset 1:3ce5d56297ed draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/megablast_xml_parser commit 3ccddd4e2032535ead030efa401e690ffb80d145"
author devteam
date Wed, 09 Sep 2020 10:27:20 +0000 (2020-09-09)
parents 35ff246876fc
children
files megablast_xml_parser.py megablast_xml_parser.xml repository_dependencies.xml
diffstat 3 files changed, 72 insertions(+), 87 deletions(-) [+]
line wrap: on
line diff
--- a/megablast_xml_parser.py	Mon May 19 10:59:33 2014 -0400
+++ b/megablast_xml_parser.py	Wed Sep 09 10:27:20 2020 +0000
@@ -1,78 +1,64 @@
 #!/usr/bin/env python
-    
-import sys, os, re
 
-if sys.version_info[:2] >= ( 2, 5 ):
-    import xml.etree.cElementTree as ElementTree
-else:
-    from galaxy import eggs
-    import pkg_resources; pkg_resources.require( "elementtree" )
-    from elementtree import ElementTree
+import re
+import sys
+import xml.etree.cElementTree as ElementTree
 
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
-    sys.exit()
 
 def __main__():
-    source  = sys.argv[1]
-    hspTags = [
-           "Hsp_bit-score",
-           "Hsp_evalue",
-           "Hsp_query-from",
-           "Hsp_query-to",
-           "Hsp_hit-from",
-           "Hsp_hit-to",
-           "Hsp_query-frame",
-           "Hsp_hit-frame",
-           "Hsp_identity",
-           "Hsp_align-len",
-           "Hsp_qseq",
-           "Hsp_hseq",
-           "Hsp_midline"
-          ]
-    hspData = []
+    source = sys.argv[1]
+    hspTags = ["Hsp_bit-score",
+               "Hsp_evalue",
+               "Hsp_query-from",
+               "Hsp_query-to",
+               "Hsp_hit-from",
+               "Hsp_hit-to",
+               "Hsp_query-frame",
+               "Hsp_hit-frame",
+               "Hsp_identity",
+               "Hsp_align-len",
+               "Hsp_qseq",
+               "Hsp_hseq",
+               "Hsp_midline"]
 
     # get an iterable
-    try: 
-        context = ElementTree.iterparse( source, events=( "start", "end" ) )
-    except:
-        stop_err( "Invalid data format." )
+    try:
+        context = ElementTree.iterparse(source, events=("start", "end"))
+    except Exception:
+        sys.exit("Invalid data format.")
     # turn it into an iterator
-    context = iter( context )
+    context = iter(context)
     # get the root element
     try:
-        event, root = context.next()
-    except:
-        stop_err( "Invalid data format." )
+        event, root = next(context)
+    except Exception:
+        sys.exit("Invalid data format.")
 
-    outfile = open( sys.argv[2], 'w' )
-    try:
-        for event, elem in context:
-           # for every <Iteration> tag
-           if event == "end" and elem.tag == "Iteration":
-               query = elem.findtext( "Iteration_query-def" )
-               qLen = elem.findtext( "Iteration_query-len" )
-               # for every <Hit> within <Iteration>
-               for hit in elem.findall( "Iteration_hits/Hit" ):
-                   subject = hit.findtext( "Hit_id" )
-                   if re.search( '^gi', subject ):
-                       subject = subject.split('|')[1]
-                   sLen = hit.findtext( "Hit_len" )
-                   # for every <Hsp> within <Hit>
-                   for hsp in hit.findall( "Hit_hsps/Hsp" ):
-                        outfile.write( "%s\t%s\t%s\t%s" % ( query, qLen, subject, sLen ) )
-                        for tag in hspTags:
-                            outfile.write("\t%s" %(hsp.findtext( tag )))
-                            #hspData.append( hsp.findtext( tag ) )
-                        #hspData = []
-                        outfile.write('\n')
-               # prevents ElementTree from growing large datastructure
-               root.clear()
-               elem.clear()
-    except:
-        outfile.close()
-        stop_err( "The input data is malformed, or there is more than one dataset in the input file. Error: %s" % sys.exc_info()[1] )
+    with open(sys.argv[2], 'w') as outfile:
+        try:
+            for event, elem in context:
+                # for every <Iteration> tag
+                if event == "end" and elem.tag == "Iteration":
+                    query = elem.findtext("Iteration_query-def")
+                    qLen = elem.findtext("Iteration_query-len")
+                    # for every <Hit> within <Iteration>
+                    for hit in elem.findall("Iteration_hits/Hit"):
+                        subject = hit.findtext("Hit_id")
+                        if re.search('^gi', subject):
+                            subject = subject.split('|')[1]
+                        sLen = hit.findtext("Hit_len")
+                        # for every <Hsp> within <Hit>
+                        for hsp in hit.findall("Hit_hsps/Hsp"):
+                            outfile.write("%s\t%s\t%s\t%s" % (query, qLen, subject, sLen))
+                            for tag in hspTags:
+                                outfile.write("\t%s" % (hsp.findtext(tag)))
+                            outfile.write('\n')
+                    # prevents ElementTree from growing large datastructure
+                    root.clear()
+                    elem.clear()
+        except Exception:
+            sys.exit("The input data is malformed, or there is more than one dataset in the input file. Error: %s" % sys.exc_info()[1])
 
-    outfile.close()
 
-if __name__ == "__main__": __main__()
+if __name__ == "__main__":
+    __main__()
--- a/megablast_xml_parser.xml	Mon May 19 10:59:33 2014 -0400
+++ b/megablast_xml_parser.xml	Wed Sep 09 10:27:20 2020 +0000
@@ -1,18 +1,23 @@
-<tool id="megablast_xml_parser" name="Parse blast XML output" version="1.0.0">
-<description></description>
-<command interpreter="python">megablast_xml_parser.py $input1 $output1</command>
-<inputs>
-  <param name="input1" type="data" format="blastxml" label="Megablast XML output" />
-</inputs>
-<outputs>
-  <data name="output1" format="tabular"/>
-</outputs>
-<tests>
-  <test>
-    <param name="input1" value="megablast_xml_parser_test1.gz" ftype="blastxml" />
-    <output name="output1" file="megablast_xml_parser_test1_out.tabular" ftype="tabular" />
-  </test>
-</tests>
+<tool id="megablast_xml_parser" name="Parse blast XML output" version="1.0.1">
+    <description></description>
+    <requirements>
+        <requirement type="package" version="3.8">python</requirement>
+    </requirements>
+    <command><![CDATA[
+        python '$__tool_directory__/megablast_xml_parser.py' '$input1' '$output1'
+    ]]></command>
+    <inputs>
+        <param name="input1" type="data" format="blastxml" label="Megablast XML output" />
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input1" value="megablast_xml_parser_test1.gz" ftype="blastxml" />
+            <output name="output1" file="megablast_xml_parser_test1_out.tabular" ftype="tabular" />
+        </test>
+    </tests>
 <help>
 
 **What it does**
@@ -54,7 +59,5 @@
 
 Note that this form of output does not contain alignment identify value. However, it can be computed by dividing the number of identical bases within the alignment (Field 13) by the alignment length (Field 14) using *Text Manipulation->Compute* tool 
 
-
-
-</help>
+    </help>
 </tool>
--- a/repository_dependencies.xml	Mon May 19 10:59:33 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-<?xml version="1.0"?>
-<repositories>
-  <repository changeset_revision="939a600f45e9" name="blast_datatypes" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" />
-</repositories>