changeset 0:726b1e233a08 draft

planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author drosofff
date Mon, 08 Jun 2015 18:55:07 -0400
parents
children 371b0e95dca5
files cherry_pick_fasta.py cherry_pick_fasta.xml test-data/input.fa test-data/output.fa
diffstat 2 files changed, 83 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cherry_pick_fasta.py	Mon Jun 08 18:55:07 2015 -0400
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Chery pick of fasta sequences satisfying a query string in their header/name
+"""
+
+import argparse
+
+def Parser():
+    the_parser = argparse.ArgumentParser(
+        description="Cherry pick fasta sequences")
+    the_parser.add_argument(
+        '--input', action="store", type=str, help="input fasta file")
+    the_parser.add_argument(
+        '--query-string', dest="query_string", action="store", type=str,
+                            help="header containing the string will be extracted as well as the corresponding sequence")
+    the_parser.add_argument(
+        '--output', action="store", type=str, help="output fasta file")
+    args = the_parser.parse_args()
+    return args
+
+def __main__():
+    """ main function """
+    args = Parser()
+    search_term = args.query_string
+    CrudeFasta = open (args.input, "r").read()
+    Output = open (args.output, "w")
+    FastaListe = CrudeFasta.split(">")
+    for sequence in FastaListe:
+        if search_term in sequence:
+            print >> Output,  ">%s" % sequence.rstrip()
+    Output.close()
+
+
+if __name__ == "__main__":
+    __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cherry_pick_fasta.xml	Mon Jun 08 18:55:07 2015 -0400
@@ -0,0 +1,47 @@
+<tool id="cherry_pick_fasta" name="Pick Fasta sequences" version="0.9.0">
+  <description>with header satisfying a query string</description>
+  <command interpreter="python">cherry_pick-fasta.py
+                                   --input "$input"
+                                   --query-string $query
+                                   --output $output
+  </command>
+
+  <inputs>
+    <param name="query" type="text" size="30" value="" label="Select sequences with this string in their header" help="exemple:'gi|40557596'">
+      <sanitizer>
+        <valid initial="string.printable">
+          <remove value="&quot;"/>
+          <remove value="\"/>
+        </valid>
+        <mapping initial="none">
+          <add source="&quot;" target="\&quot;"/>
+          <add source="\" target="\\"/>
+        </mapping>
+      </sanitizer>
+    </param>
+    <param format="fasta" label="Source file" name="input" type="data" />
+  </inputs>
+  <outputs>
+    <data name="output" format="fasta" label="${tool.name} on ${input.value} with query: ${query.value}" />
+  </outputs>
+  <tests>
+    <test>
+        <param ftype="fasta" name="input" value="input.fa" />
+        <param name="query" value="AAR88092.1" />
+        <output name="output" ftype="fasta" file="output.fa" />
+    </test>
+  </tests>
+  <help>
+**What it does**
+
+This tool retrieves nucleotide/peptide sequences from a fasta file whose headers match a given query string
+
+It is Copyright © 2015 `CNRS and University Pierre et Marie Curie`_ and is released under the `MIT license`_.
+
+.. _MIT license: http://opensource.org/licenses/MIT
+
+  </help>
+  <citations>
+      <citation></citation>
+  </citations>
+</tool>