Mercurial > repos > bebatut > fasta_add_barcode
comparison fasta_add_barcode.py @ 0:8b37115e8d6b draft
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
| author | bebatut |
|---|---|
| date | Tue, 02 Feb 2016 11:16:14 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:8b37115e8d6b |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 | |
| 4 import sys | |
| 5 import os | |
| 6 import argparse | |
| 7 import copy | |
| 8 import operator | |
| 9 | |
| 10 | |
| 11 def write_seq_fasta_format(seq, output_file): | |
| 12 split_seq = [seq[i:i+60] for i in xrange(0,len(seq),60)] | |
| 13 for split in split_seq: | |
| 14 output_file.write(split + '\n') | |
| 15 | |
| 16 def fasta_add_barcode(args): | |
| 17 mapping = {} | |
| 18 with open(args.input_mapping_file,'r') as input_mapping_file: | |
| 19 for line in input_mapping_file: | |
| 20 split_line = line[:-1].split('\t') | |
| 21 | |
| 22 if len(split_line) != 2: | |
| 23 string = 'Incorrect number of column in mapping file.' | |
| 24 string += '\nTwo tabular separated columns are expected' | |
| 25 raise ValueError(string) | |
| 26 | |
| 27 mapping[split_line[0]] = split_line[1] | |
| 28 | |
| 29 seq_id = '' | |
| 30 seq = '' | |
| 31 with open(args.input_sequence_file,'r') as input_sequence_file: | |
| 32 with open(args.output_sequence_file, 'w') as output_sequence_file: | |
| 33 for line in input_sequence_file: | |
| 34 if line.startswith('>'): | |
| 35 if seq != '': | |
| 36 if not mapping.has_key(seq_id): | |
| 37 string = 'A sequence identifier (' + seq_id + ') is' | |
| 38 string += ' not found in mapping file' | |
| 39 raise ValueError(string) | |
| 40 | |
| 41 output_sequence_file.write('>' + seq_id + '\n') | |
| 42 | |
| 43 barcode = mapping[seq_id] | |
| 44 seq = barcode + seq | |
| 45 write_seq_fasta_format(seq, output_sequence_file) | |
| 46 seq_id = line[1:-1].split( )[0] | |
| 47 seq = '' | |
| 48 else: | |
| 49 seq += line[:-1] | |
| 50 | |
| 51 ######## | |
| 52 # Main # | |
| 53 ######## | |
| 54 if __name__ == "__main__": | |
| 55 parser = argparse.ArgumentParser() | |
| 56 parser.add_argument('--input_sequence_file', required=True) | |
| 57 parser.add_argument('--input_mapping_file', required=True) | |
| 58 parser.add_argument('--output_sequence_file', required=True) | |
| 59 args = parser.parse_args() | |
| 60 | |
| 61 fasta_add_barcode(args) |
