Mercurial > repos > mvdbeek > size_distribution
comparison size_distributions.py @ 0:ac5584567084 draft
planemo upload for repository https://github.com/bardin-lab/smallRNA_tools commit 48d9f377e28d686c2e0d8eee6684e34c52b7b3cc
| author | mvdbeek |
|---|---|
| date | Mon, 20 Aug 2018 12:39:15 -0400 |
| parents | |
| children | 21b5a9170b90 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ac5584567084 |
|---|---|
| 1 from collections import ( | |
| 2 Counter, | |
| 3 OrderedDict | |
| 4 ) | |
| 5 | |
| 6 import click | |
| 7 import pandas as pd | |
| 8 import pysam | |
| 9 | |
| 10 | |
| 11 def global_size_distribution(alignment_file, minimum_size=18, maximum_size=30): | |
| 12 reference_counters = OrderedDict() | |
| 13 for reference in alignment_file.references: | |
| 14 sense_counter = Counter() | |
| 15 antisense_counter = Counter() | |
| 16 for i in range(minimum_size, maximum_size + 1): | |
| 17 sense_counter[i] = 0 | |
| 18 antisense_counter[i] = 0 | |
| 19 reference_counters[reference] = {'sense': sense_counter, | |
| 20 'antisense': antisense_counter} | |
| 21 for i, read in enumerate(alignment_file): | |
| 22 if not read.is_unmapped: | |
| 23 reference = read.reference_name | |
| 24 readlength = read.query_alignment_length | |
| 25 if minimum_size - 1 < readlength < maximum_size + 1: | |
| 26 if read.is_reverse: | |
| 27 reference_counters[reference]['antisense'][readlength] += 1 | |
| 28 else: | |
| 29 reference_counters[reference]['sense'][readlength] += 1 | |
| 30 df = pd.Panel(reference_counters).to_frame() | |
| 31 df.index.names = ['readlength', 'orientation'] | |
| 32 return df | |
| 33 | |
| 34 | |
| 35 def to_long(df): | |
| 36 df = df.reset_index() | |
| 37 df = df.melt(id_vars=('readlength', 'orientation')) | |
| 38 df.columns = ['readlength', 'orientation', 'reference', 'count'] | |
| 39 return df | |
| 40 | |
| 41 | |
| 42 def write_table(df, output_path): | |
| 43 df.to_csv(output_path, sep="\t") | |
| 44 | |
| 45 | |
| 46 @click.command() | |
| 47 @click.argument('alignment_path', type=click.Path(exists=True)) | |
| 48 @click.option('--minimum_size', default=18, type=int, help="Minimum readlength to consider") | |
| 49 @click.option('--maximum_size', default=30, type=int, help="Minimum readlength to consider") | |
| 50 @click.option('--wide/--long', default=False, help="Output wide or long format.") | |
| 51 @click.option('--output', default="/dev/stdout", help="Write to this file") | |
| 52 def size_dist(alignment_path, minimum_size=18, maximum_size=30, output="/dev/stdout", wide=False): | |
| 53 """Calculate size distribution and orientation""" | |
| 54 with pysam.AlignmentFile(alignment_path) as alignment_file: | |
| 55 df = global_size_distribution(alignment_file, minimum_size, maximum_size) | |
| 56 if not wide: | |
| 57 df = to_long(df) | |
| 58 write_table(df, output) | |
| 59 | |
| 60 | |
| 61 if __name__ == '__main__': | |
| 62 size_dist() |
