Mercurial > repos > artbio > mircounts
annotate mircounts.py @ 4:b7ac138bb781 draft default tip
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 51dc6c56c7d95fc229ffee958354211cd454fd36"
author | artbio |
---|---|
date | Sun, 09 May 2021 17:06:45 +0000 |
parents | ee99c6374a3b |
children |
rev | line source |
---|---|
0
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
1 #!/usr/bin/env python |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
2 import argparse |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
3 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
4 import pysam |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
5 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
6 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
7 def Parser(): |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
8 parser = argparse.ArgumentParser(description='miRNAs counts and coverages') |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
9 parser.add_argument('-a', '--alignment', metavar='FILE', type=str, |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
10 dest='alignment_file', help='Alignment bam file') |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
11 parser.add_argument('--gff', metavar='FILE', type=str, dest='gff_file', |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
12 help='GFF3 describing both pre-miRNAs\ |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
13 and mature miRNAs') |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
14 parser.add_argument('-q', '--quality_threshold', type=int, |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
15 dest='quality_threshold', |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
16 help='Quality threshold for coverage (default=10)', |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
17 default=10) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
18 parser.add_argument('-p', '--pre_mirs', type=str, dest='pre_mirs', |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
19 help='pre-miRNAs count file path', metavar='FILE') |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
20 parser.add_argument('-m', '--mirs', type=str, dest='mirs', |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
21 help='mature miRNA count file path', metavar='FILE') |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
22 parser.add_argument('--lattice', metavar='FILE', type=str, dest='lattice', |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
23 help='Output file for the lattice dataframe.') |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
24 args = parser.parse_args() |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
25 return args |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
26 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
27 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
28 def get_pre_mir_counts(bamfile): |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
29 """ |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
30 Takes a AlignmentFile object and returns a dictionary of counts for reads |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
31 aligning with pre_mirs (as keys) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
32 """ |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
33 count = dict() |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
34 for ref_name in bamfile.references: |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
35 count[ref_name] = bamfile.count(reference=ref_name) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
36 return count |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
37 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
38 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
39 def get_pre_mir_coverage(bamfile, quality=10): |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
40 """ |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
41 Takes a AlignmentFile object and returns a dictionary of lists |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
42 of coverage along the coordinates of pre_mirs (as keys) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
43 """ |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
44 coverage = dict() |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
45 for ref_name, ref_len in zip(bamfile.references, bamfile.lengths): |
3
ee99c6374a3b
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
artbio
parents:
0
diff
changeset
|
46 coverage[ref_name] = bamfile.count_coverage(contig=ref_name, |
ee99c6374a3b
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
artbio
parents:
0
diff
changeset
|
47 start=0, stop=ref_len, |
0
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
48 quality_threshold=quality) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
49 """ Add the 4 coverage values """ |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
50 coverage[ref_name] = [sum(x) for x in |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
51 zip(*coverage[ref_name])] |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
52 return coverage |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
53 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
54 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
55 def get_mir_counts(bamfile, gff_file): |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
56 """ |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
57 Takes a AlignmentFile and a gff file and computes for |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
58 each 'miRNA' region of the gff the number of reads that hit it |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
59 returns a dict[mir_name] = count |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
60 """ |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
61 counts = dict() |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
62 for line in open(gff_file, 'r'): |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
63 if line[0] != '#': |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
64 gff_fields = line[:-1].split("\t") |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
65 if gff_fields[2] == 'miRNA': |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
66 mir_name = gff_fields[0] |
3
ee99c6374a3b
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
artbio
parents:
0
diff
changeset
|
67 premir_name = gff_fields[8].split('Parent_mir_Name=')[-1] |
0
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
68 mir_start = int(gff_fields[3]) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
69 mir_end = int(gff_fields[4]) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
70 # GFF is 1-based, pysam is 0-based. |
3
ee99c6374a3b
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
artbio
parents:
0
diff
changeset
|
71 counts[mir_name] = bamfile.count(contig=premir_name, |
0
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
72 start=mir_start-1, |
3
ee99c6374a3b
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
artbio
parents:
0
diff
changeset
|
73 stop=mir_end-1) |
0
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
74 return counts |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
75 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
76 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
77 def write_dataframe_coverage(countdict, outfile): |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
78 """ |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
79 Takes a dict[pre_mir reference name] = [coverage list] |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
80 and writes a dataframe with columns: |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
81 <gene_type name>, offset, normoffset, counts and normcounts |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
82 in the outfile |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
83 """ |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
84 F = open(outfile, 'w') |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
85 F.write('Mir_hairpin\tOffset\tNorm_offset\tCount\tNorm_count\n') |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
86 for ref in sorted(countdict): |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
87 """ |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
88 For each reference name in mirs, |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
89 write the coverage of each of its positions |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
90 """ |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
91 maximum = max(countdict[ref]) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
92 reference_length = len(countdict[ref]) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
93 for pos, c in enumerate(countdict[ref]): |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
94 """ Compute and write value for each reference position""" |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
95 F.write('%s\t%s\t%s\t%s\t%s\n' % (ref, str(pos + 1), |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
96 str(float(pos+1)/reference_length), str(float(c)), |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
97 str(float(c)/maximum) if maximum != 0 else '0')) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
98 F.close() |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
99 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
100 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
101 def write_counts(countdict, outfile): |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
102 """ |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
103 Takes a dict[<gene_type name>]=count and |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
104 writes a count table |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
105 """ |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
106 F = open(outfile, 'w') |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
107 for gene in sorted(countdict): |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
108 F.write('%s\t%s\n' % (gene, str(countdict[gene]))) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
109 F.close() |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
110 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
111 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
112 def main(): |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
113 args = Parser() |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
114 bamfile = pysam.AlignmentFile(args.alignment_file, 'rb', check_sq=False) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
115 if args.pre_mirs: |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
116 pre_mirs = get_pre_mir_counts(bamfile) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
117 write_counts(pre_mirs, args.pre_mirs) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
118 if args.lattice: |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
119 pre_mirs_coverage = get_pre_mir_coverage(bamfile, |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
120 args.quality_threshold) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
121 write_dataframe_coverage(pre_mirs_coverage, args.lattice) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
122 if args.mirs: |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
123 mirs = get_mir_counts(bamfile, args.gff_file) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
124 write_counts(mirs, args.mirs) |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
125 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
126 |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
127 if __name__ == '__main__': |
10f0e4c00b13
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 3a6181bd181729f642b75c4e689f063fc2821cf1
artbio
parents:
diff
changeset
|
128 main() |