Mercurial > repos > mvdbeek > size_distribution
comparison size_distributions.py @ 4:f1eeaf42144b draft default tip
planemo upload for repository https://github.com/bardin-lab/smallRNA_tools commit c8e0a703fcdff580ba0a0c5806a37c088c03ab7b-dirty
| author | mvdbeek |
|---|---|
| date | Mon, 20 Aug 2018 14:46:57 -0400 |
| parents | 21b5a9170b90 |
| children |
comparison
equal
deleted
inserted
replaced
| 3:21b5a9170b90 | 4:f1eeaf42144b |
|---|---|
| 27 reference_counters[reference]['antisense'][readlength] += 1 | 27 reference_counters[reference]['antisense'][readlength] += 1 |
| 28 else: | 28 else: |
| 29 reference_counters[reference]['sense'][readlength] += 1 | 29 reference_counters[reference]['sense'][readlength] += 1 |
| 30 df = pd.Panel(reference_counters).to_frame() | 30 df = pd.Panel(reference_counters).to_frame() |
| 31 df.index.names = ['readlength', 'orientation'] | 31 df.index.names = ['readlength', 'orientation'] |
| 32 return df | 32 return i, df |
| 33 | 33 |
| 34 | 34 |
| 35 def to_long(df): | 35 def to_long(df, total_count): |
| 36 df = df.reset_index() | 36 df = df.reset_index() |
| 37 df = df.melt(id_vars=('readlength', 'orientation')) | 37 df = df.melt(id_vars=('readlength', 'orientation')) |
| 38 df.columns = ['readlength', 'orientation', 'reference', 'count'] | 38 df.columns = ['readlength', 'orientation', 'reference', 'count'] |
| 39 df['TPM'] = df['count'] / total_count * 10 ** 6 | |
| 39 return df | 40 return df |
| 40 | 41 |
| 41 | 42 |
| 42 def write_table(df, output_path): | 43 def write_table(df, output_path): |
| 43 df.to_csv(output_path, sep="\t", index=False) | 44 df.to_csv(output_path, sep="\t", index=False) |
| 50 @click.option('--wide/--long', default=False, help="Output wide or long format.") | 51 @click.option('--wide/--long', default=False, help="Output wide or long format.") |
| 51 @click.option('--output', default="/dev/stdout", help="Write to this file") | 52 @click.option('--output', default="/dev/stdout", help="Write to this file") |
| 52 def size_dist(alignment_path, minimum_size=18, maximum_size=30, output="/dev/stdout", wide=False): | 53 def size_dist(alignment_path, minimum_size=18, maximum_size=30, output="/dev/stdout", wide=False): |
| 53 """Calculate size distribution and orientation""" | 54 """Calculate size distribution and orientation""" |
| 54 with pysam.AlignmentFile(alignment_path) as alignment_file: | 55 with pysam.AlignmentFile(alignment_path) as alignment_file: |
| 55 df = global_size_distribution(alignment_file, minimum_size, maximum_size) | 56 total_count, df = global_size_distribution(alignment_file, minimum_size, maximum_size) |
| 56 if not wide: | 57 if not wide: |
| 57 df = to_long(df) | 58 df = to_long(df, total_count) |
| 58 write_table(df, output) | 59 write_table(df, output) |
| 59 | 60 |
| 60 | 61 |
| 61 if __name__ == '__main__': | 62 if __name__ == '__main__': |
| 62 size_dist() | 63 size_dist() |
