Mercurial > repos > drosofff > cherry_pick_fasta
changeset 0:726b1e233a08 draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author | drosofff |
---|---|
date | Mon, 08 Jun 2015 18:55:07 -0400 |
parents | |
children | 371b0e95dca5 |
files | cherry_pick_fasta.py cherry_pick_fasta.xml test-data/input.fa test-data/output.fa |
diffstat | 2 files changed, 83 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cherry_pick_fasta.py Mon Jun 08 18:55:07 2015 -0400 @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Chery pick of fasta sequences satisfying a query string in their header/name +""" + +import argparse + +def Parser(): + the_parser = argparse.ArgumentParser( + description="Cherry pick fasta sequences") + the_parser.add_argument( + '--input', action="store", type=str, help="input fasta file") + the_parser.add_argument( + '--query-string', dest="query_string", action="store", type=str, + help="header containing the string will be extracted as well as the corresponding sequence") + the_parser.add_argument( + '--output', action="store", type=str, help="output fasta file") + args = the_parser.parse_args() + return args + +def __main__(): + """ main function """ + args = Parser() + search_term = args.query_string + CrudeFasta = open (args.input, "r").read() + Output = open (args.output, "w") + FastaListe = CrudeFasta.split(">") + for sequence in FastaListe: + if search_term in sequence: + print >> Output, ">%s" % sequence.rstrip() + Output.close() + + +if __name__ == "__main__": + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cherry_pick_fasta.xml Mon Jun 08 18:55:07 2015 -0400 @@ -0,0 +1,47 @@ +<tool id="cherry_pick_fasta" name="Pick Fasta sequences" version="0.9.0"> + <description>with header satisfying a query string</description> + <command interpreter="python">cherry_pick-fasta.py + --input "$input" + --query-string $query + --output $output + </command> + + <inputs> + <param name="query" type="text" size="30" value="" label="Select sequences with this string in their header" help="exemple:'gi|40557596'"> + <sanitizer> + <valid initial="string.printable"> + <remove value="""/> + <remove value="\"/> + </valid> + <mapping initial="none"> + <add source=""" target="\""/> + <add source="\" target="\\"/> + </mapping> + </sanitizer> + </param> + <param format="fasta" label="Source file" name="input" type="data" /> + </inputs> + <outputs> + <data name="output" format="fasta" label="${tool.name} on ${input.value} with query: ${query.value}" /> + </outputs> + <tests> + <test> + <param ftype="fasta" name="input" value="input.fa" /> + <param name="query" value="AAR88092.1" /> + <output name="output" ftype="fasta" file="output.fa" /> + </test> + </tests> + <help> +**What it does** + +This tool retrieves nucleotide/peptide sequences from a fasta file whose headers match a given query string + +It is Copyright © 2015 `CNRS and University Pierre et Marie Curie`_ and is released under the `MIT license`_. + +.. _MIT license: http://opensource.org/licenses/MIT + + </help> + <citations> + <citation></citation> + </citations> +</tool>