annotate fasta_add_barcode.py @ 2:2ca5fde7a2bb draft default tip

planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit e857f7126443e115f11954085423f8999bc870aa-dirty
author bebatut
date Fri, 15 Apr 2016 06:01:42 -0400
parents 8b37115e8d6b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
1 #!/usr/bin/env python
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
2 # -*- coding: utf-8 -*-
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
3
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
4 import sys
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
5 import os
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
6 import argparse
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
7 import copy
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
8 import operator
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
9
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
10
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
11 def write_seq_fasta_format(seq, output_file):
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
12 split_seq = [seq[i:i+60] for i in xrange(0,len(seq),60)]
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
13 for split in split_seq:
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
14 output_file.write(split + '\n')
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
15
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
16 def fasta_add_barcode(args):
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
17 mapping = {}
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
18 with open(args.input_mapping_file,'r') as input_mapping_file:
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
19 for line in input_mapping_file:
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
20 split_line = line[:-1].split('\t')
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
21
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
22 if len(split_line) != 2:
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
23 string = 'Incorrect number of column in mapping file.'
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
24 string += '\nTwo tabular separated columns are expected'
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
25 raise ValueError(string)
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
26
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
27 mapping[split_line[0]] = split_line[1]
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
28
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
29 seq_id = ''
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
30 seq = ''
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
31 with open(args.input_sequence_file,'r') as input_sequence_file:
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
32 with open(args.output_sequence_file, 'w') as output_sequence_file:
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
33 for line in input_sequence_file:
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
34 if line.startswith('>'):
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
35 if seq != '':
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
36 if not mapping.has_key(seq_id):
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
37 string = 'A sequence identifier (' + seq_id + ') is'
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
38 string += ' not found in mapping file'
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
39 raise ValueError(string)
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
40
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
41 output_sequence_file.write('>' + seq_id + '\n')
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
42
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
43 barcode = mapping[seq_id]
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
44 seq = barcode + seq
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
45 write_seq_fasta_format(seq, output_sequence_file)
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
46 seq_id = line[1:-1].split( )[0]
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
47 seq = ''
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
48 else:
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
49 seq += line[:-1]
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
50
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
51 ########
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
52 # Main #
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
53 ########
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
54 if __name__ == "__main__":
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
55 parser = argparse.ArgumentParser()
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
56 parser.add_argument('--input_sequence_file', required=True)
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
57 parser.add_argument('--input_mapping_file', required=True)
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
58 parser.add_argument('--output_sequence_file', required=True)
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
59 args = parser.parse_args()
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
60
8b37115e8d6b planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit d28e31b1b4dbcc3c406a4e8a5a41ac4576bf4f43-dirty
bebatut
parents:
diff changeset
61 fasta_add_barcode(args)