changeset 0:8b37115e8d6b draft

planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
author bebatut
date Tue, 02 Feb 2016 11:16:14 -0500
parents
children 0ea7700e08ba
files fasta_add_barcode.py fasta_add_barcode.xml
diffstat 2 files changed, 122 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta_add_barcode.py	Tue Feb 02 11:16:14 2016 -0500
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import os
+import argparse
+import copy
+import operator
+
+
+def write_seq_fasta_format(seq, output_file):
+    split_seq = [seq[i:i+60] for i in xrange(0,len(seq),60)]
+    for split in split_seq:
+        output_file.write(split + '\n')
+
+def fasta_add_barcode(args):
+    mapping = {}
+    with open(args.input_mapping_file,'r') as input_mapping_file:
+        for line in input_mapping_file:
+            split_line = line[:-1].split('\t')
+
+            if len(split_line) != 2:
+                string = 'Incorrect number of column in mapping file.'
+                string += '\nTwo tabular separated columns are expected'
+                raise ValueError(string)
+
+            mapping[split_line[0]] = split_line[1]
+
+    seq_id = ''
+    seq = ''
+    with open(args.input_sequence_file,'r') as input_sequence_file:
+        with open(args.output_sequence_file, 'w') as output_sequence_file:
+            for line in input_sequence_file:
+                if line.startswith('>'):
+                    if seq != '':
+                        if not mapping.has_key(seq_id):
+                            string = 'A sequence identifier (' + seq_id + ') is'
+                            string += ' not found in mapping file'
+                            raise ValueError(string)
+
+                        output_sequence_file.write('>' + seq_id + '\n')
+
+                        barcode = mapping[seq_id]
+                        seq = barcode + seq
+                        write_seq_fasta_format(seq, output_sequence_file)
+                    seq_id = line[1:-1].split( )[0]
+                    seq = ''
+                else:
+                    seq += line[:-1]
+
+########
+# Main #
+########
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input_sequence_file', required=True)
+    parser.add_argument('--input_mapping_file', required=True)
+    parser.add_argument('--output_sequence_file', required=True)
+    args = parser.parse_args()
+
+    fasta_add_barcode(args)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta_add_barcode.xml	Tue Feb 02 11:16:14 2016 -0500
@@ -0,0 +1,61 @@
+<tool id="fasta_add_barcode" name="Add barcodes">
+
+    <description>to FASTA sequences</description>
+
+    <requirements>
+        <requirement type='package' version="1.0">fasta_add_barcode</requirement>
+    </requirements>
+
+    <stdio>
+    </stdio>
+
+    <version_command>python -version</version_command>
+
+    <!--<command>-->
+    <command><![CDATA[
+        python $__tool_directory__/fasta_add_barcode.py 
+            --input_sequence_file $sequence_file
+            --input_mapping_file $mapping_file
+            --output_sequence_file $output_sequence_file
+        ]]>
+    </command>
+
+    <inputs>
+        <param name="sequence_file" type="data" format="fasta" label="Sequence 
+        zfile" help=""/>
+        <param name="mapping_file" type="data" format="tabular" 
+            label="Mapping file between sequence identifier and barcode to add" 
+            help="The mapping file must be a tabular delimited file with
+            two columns. The first column contains sequence identifier and
+            the second column corresponding barcode to add at the beginning
+            of each sequence."/>
+    </inputs>
+
+    <outputs>
+        <data format="fasta" name="output_sequence_file" metadata_source="sequence_file" 
+            label="Sequences with added barcodes from ${on_string}" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="sequence_file" value="input_sequence_file"/>
+            <param name="mapping_file" value="mapping_file"/>
+            <param name="output_sequence_file" value="output_sequence_file" />
+        </test>
+    </tests>
+
+    <help><![CDATA[
+
+**What it does**
+
+This tool takes a FASTA file and add at the beginning of each sequence a barcode.
+The barcode of each sequence is determined given sequence identifier and the mapping 
+file.
+
+]]>
+    </help>
+
+    <citations>
+    </citations>
+</tool>
+