Mercurial > repos > artbio > small_rna_map
annotate small_rna_map.py @ 4:6ff925458e05 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit dcd643f146afe371d3e418aa91ddbaff6211a09a
author | artbio |
---|---|
date | Tue, 18 Jul 2017 17:35:52 -0400 |
parents | 2e0dc6032a98 |
children | d65045e976e6 |
rev | line source |
---|---|
3
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
1 import argparse |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
2 from collections import defaultdict |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
3 |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
4 import numpy |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
5 |
0
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
6 import pysam |
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
7 |
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
8 |
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
9 def Parser(): |
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
10 the_parser = argparse.ArgumentParser() |
3
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
11 the_parser.add_argument('--input', dest='input', required=True, |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
12 nargs='+', help='input BAM files') |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
13 the_parser.add_argument('--sample_name', dest='sample_name', |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
14 required=True, nargs='+', help='sample name') |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
15 the_parser.add_argument('--output', action="store", |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
16 type=str, help='output tabular file') |
0
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
17 args = the_parser.parse_args() |
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
18 return args |
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
19 |
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
20 |
3
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
21 def compute_map_dictionary(pysam_object): |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
22 ''' |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
23 returns |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
24 - map_dictionary {(read_chromosome,read_position,polarity): |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
25 [list_of_read_length]} |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
26 - max_dictionary {chromosome_name: |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
27 max_of_number_of_read_at_any_position} |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
28 ''' |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
29 unmatched_chromosomes = list(pysam_object.references) |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
30 map_dictionary = defaultdict(list) |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
31 max_dictionary = dict.fromkeys(pysam_object.references, 0) |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
32 for read in pysam_object: |
0
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
33 if not read.is_unmapped: |
3
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
34 read_chromosome = read.reference_name |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
35 # to shift from 0-based to 1-based coordinate |
0
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
36 read_position = read.pos + 1 |
3
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
37 polarity = "F" if (not read.is_reverse) else "R" |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
38 map_dictionary[(read_chromosome, read_position, |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
39 polarity)].append(read.qlen) |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
40 if read_chromosome in unmatched_chromosomes: |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
41 unmatched_chromosomes.remove(read_chromosome) |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
42 for name in unmatched_chromosomes: |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
43 # to put at least a null value by chromosome |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
44 map_dictionary[(name, 1, 'F')] = [0] |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
45 # make a dictionary with max value of reads by chromosome |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
46 max_dictionary = defaultdict(int) |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
47 for name in pysam_object.references: |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
48 for key in map_dictionary.keys(): |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
49 if name in key and len(map_dictionary[key]) > max_dictionary[name]: |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
50 max_dictionary[name] = len(map_dictionary[key]) |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
51 return map_dictionary, max_dictionary |
0
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
52 |
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
53 |
3
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
54 def main(inputs, sample_names, output): |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
55 with open(output, 'w') as outfile: |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
56 outfile.write('\t'.join(['Dataset', 'Chromosome', 'Chrom_length', |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
57 'Coordinate', 'Nbr_reads', 'Polarity', |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
58 'Max', 'Mean', 'Median\n'])) |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
59 chr_lengths = dict(zip(pysam.AlignmentFile(inputs[0], 'rb').references, |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
60 pysam.AlignmentFile(inputs[0], 'rb').lengths)) |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
61 for file, sample in zip(inputs, sample_names): |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
62 with pysam.AlignmentFile(file, 'rb') as bamfile: |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
63 map, maximum = compute_map_dictionary(bamfile) |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
64 for record in sorted(map): |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
65 line = [sample, record[0], |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
66 str(chr_lengths[record[0]]), |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
67 str(record[1]), str(len(map[record])), |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
68 record[2], str(maximum[record[0]]), |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
69 str(round(numpy.mean(map[record]), 1)), |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
70 str(numpy.median(map[record]))] |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
71 outfile.write("\t".join(line) + "\n") |
0
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
72 |
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
73 |
1ad5d040f85f
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit fa452d860cf550c7524df59e77b36fd39e3e2a45
artbio
parents:
diff
changeset
|
74 if __name__ == "__main__": |
3
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
75 args = Parser() |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
76 # if identical sample names # to be tested |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
77 if len(set(args.sample_name)) != len(args.sample_name): |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
78 args.sample_name = [name + '_' + str(i) for |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
79 i, name in enumerate(args.sample_name)] |
2e0dc6032a98
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit 93f212712d9846c7aaa389de60babb332d38363e
artbio
parents:
0
diff
changeset
|
80 main(args.input, args.sample_name, args.output) |