Mercurial > repos > dfornika > artic_align_trim
annotate align_trim.py @ 10:1ae3d853cac8 draft default tip
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit fc533961a332436c344ecbed6e7b0a17e108ae50-dirty"
| author | dfornika | 
|---|---|
| date | Tue, 17 Mar 2020 23:39:02 +0000 | 
| parents | 26516cf26444 | 
| children | 
| rev | line source | 
|---|---|
| 0 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 2 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 3 # Written by Nick Loman | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 4 # Originally part of the ZiBRA pipeline (zibraproject.org) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 5 # This file adapted from ARTICnetwork 'fieldbioinformatics' pipeline: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 6 # https://github.com/artic-network/fieldbioinformatics | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 7 | 
| 6 
26516cf26444
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: 
0diff
changeset | 8 import csv | 
| 0 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 9 import sys | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 10 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 11 from collections import defaultdict | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 12 from copy import copy | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 13 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 14 import pysam | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 15 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 16 def read_bed_file(fn): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 17 bedfile = [] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 18 with open(fn) as csvfile: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 19 reader = csv.reader(csvfile, dialect='excel-tab') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 20 for row in reader: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 21 bedrow = {} | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 22 bedrow['Primer_ID'] = row[3] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 23 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 24 if len(row) >= 6: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 25 # new style bed | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 26 bedrow['direction'] = row[5] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 27 elif len(row) == 5: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 28 # old style without directory | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 29 if 'LEFT' in row[3]: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 30 bedrow['direction'] = '+' | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 31 elif 'RIGHT' in row[3]: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 32 bedrow['direction'] = '-' | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 33 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 34 print("Malformed BED file!", file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 35 raise SystemExit | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 36 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 37 if bedrow['direction'] == '+': | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 38 bedrow['end'] = int(row[2]) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 39 bedrow['start'] = int(row[1]) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 40 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 41 bedrow['end'] = int(row[1]) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 42 bedrow['start'] = int(row[2]) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 43 bedfile.append(bedrow) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 44 return bedfile | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 45 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 46 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 47 def check_still_matching_bases(s): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 48 for flag, length in s.cigartuples: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 49 if flag == 0: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 50 return True | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 51 return False | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 52 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 53 def trim(args, cigar, s, start_pos, end): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 54 if not end: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 55 pos = s.pos | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 56 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 57 pos = s.reference_end | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 58 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 59 eaten = 0 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 60 while 1: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 61 ## chomp stuff off until we reach pos | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 62 if end: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 63 flag, length = cigar.pop() | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 64 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 65 flag, length = cigar.pop(0) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 66 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 67 if args.verbose: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 68 print("Chomped a %s, %s" % (flag, length), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 69 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 70 if flag == 0: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 71 ## match | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 72 #to_trim -= length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 73 eaten += length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 74 if not end: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 75 pos += length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 76 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 77 pos -= length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 78 if flag == 1: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 79 ## insertion to the ref | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 80 #to_trim -= length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 81 eaten += length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 82 if flag == 2: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 83 ## deletion to the ref | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 84 #eaten += length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 85 if not end: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 86 pos += length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 87 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 88 pos -= length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 89 pass | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 90 if flag == 4: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 91 eaten += length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 92 if not end and pos >= start_pos and flag == 0: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 93 break | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 94 if end and pos <= start_pos and flag == 0: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 95 break | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 96 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 97 #print >>sys.stderr, "pos:%s %s" % (pos, start_pos) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 98 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 99 extra = abs(pos - start_pos) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 100 if args.verbose: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 101 print("extra %s" % (extra), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 102 if extra: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 103 if flag == 0: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 104 if args.verbose: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 105 print("Inserted a %s, %s" % (0, extra), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 106 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 107 if end: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 108 cigar.append((0, extra)) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 109 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 110 cigar.insert(0, (0, extra)) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 111 eaten -= extra | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 112 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 113 if not end: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 114 s.pos = pos - extra | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 115 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 116 if args.verbose: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 117 print("New pos: %s" % (s.pos), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 118 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 119 if end: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 120 cigar.append((4, eaten)) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 121 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 122 cigar.insert(0, (4, eaten)) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 123 oldcigarstring = s.cigarstring | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 124 s.cigartuples = cigar | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 125 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 126 #print >>sys.stderr, s.query_name, oldcigarstring[0:50], s.cigarstring[0:50] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 127 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 128 def find_primer(bed, pos, direction): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 129 from operator import itemgetter | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 130 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 131 closest = min([(abs(p['start'] - pos), p['start'] - pos, p) for p in bed if p['direction'] == direction], key=itemgetter(0)) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 132 return closest | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 133 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 134 def is_correctly_paired(p1, p2): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 135 name1 = p1[2]['Primer_ID'] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 136 name2 = p2[2]['Primer_ID'] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 137 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 138 name1 = name1.replace('_LEFT', '') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 139 name2 = name2.replace('_RIGHT', '') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 140 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 141 return name1 == name2 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 142 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 143 def go(args): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 144 if args.report: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 145 reportfh = open(args.report, "w") | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 146 print("QueryName\tReferenceStart\tReferenceEnd\tPrimerPair\tPrimer1\tPrimer1Start\tPrimer2\tPrimer2Start\tIsSecondary\tIsSupplementary\tStart\tEnd\tCorrectlyPaired", file=reportfh) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 147 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 148 bed = read_bed_file(args.bedfile) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 149 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 150 counter = defaultdict(int) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 151 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 152 infile = pysam.AlignmentFile("-", "rb") | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 153 outfile = pysam.AlignmentFile("-", "wh", template=infile) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 154 for s in infile: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 155 cigar = copy(s.cigartuples) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 156 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 157 ## logic - if alignment start site is _before_ but within X bases of | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 158 ## a primer site, trim it off | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 159 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 160 if s.is_unmapped: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 161 print("%s skipped as unmapped" % (s.query_name), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 162 continue | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 163 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 164 if s.is_supplementary: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 165 print("%s skipped as supplementary" % (s.query_name), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 166 continue | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 167 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 168 p1 = find_primer(bed, s.reference_start, '+') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 169 p2 = find_primer(bed, s.reference_end, '-') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 170 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 171 correctly_paired = is_correctly_paired(p1, p2) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 172 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 173 report = "%s\t%s\t%s\t%s_%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%d" % (s.query_name, s.reference_start, s.reference_end, p1[2]['Primer_ID'], p2[2]['Primer_ID'], p1[2]['Primer_ID'], abs(p1[1]), p2[2]['Primer_ID'], abs(p2[1]), s.is_secondary, s.is_supplementary, p1[2]['start'], p2[2]['end'], correctly_paired) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 174 if args.report: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 175 print(report, file=reportfh) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 176 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 177 if args.verbose: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 178 print(report, file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 179 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 180 ## if the alignment starts before the end of the primer, trim to that position | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 181 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 182 try: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 183 if args.start: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 184 primer_position = p1[2]['start'] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 185 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 186 primer_position = p1[2]['end'] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 187 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 188 if s.reference_start < primer_position: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 189 trim(args, cigar, s, primer_position, 0) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 190 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 191 if args.verbose: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 192 print("ref start %s >= primer_position %s" % (s.reference_start, primer_position), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 193 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 194 if args.start: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 195 primer_position = p2[2]['start'] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 196 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 197 primer_position = p2[2]['end'] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 198 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 199 if s.reference_end > primer_position: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 200 trim(args, cigar, s, primer_position, 1) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 201 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 202 if args.verbose: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 203 print("ref end %s >= primer_position %s" % (s.reference_end, primer_position), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 204 except Exception as e: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 205 print("problem %s" % (e,), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 206 pass | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 207 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 208 if args.normalise: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 209 pair = "%s-%s-%d" % (p1[2]['Primer_ID'], p2[2]['Primer_ID'], s.is_reverse) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 210 counter[pair] += 1 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 211 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 212 if counter[pair] > args.normalise: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 213 continue | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 214 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 215 if not check_still_matching_bases(s): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 216 continue | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 217 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 218 outfile.write(s) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 219 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 220 reportfh.close() | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 221 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 222 def main(): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 223 import argparse | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 224 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 225 parser = argparse.ArgumentParser(description='Trim alignments from an amplicon scheme.') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 226 parser.add_argument('bedfile', help='BED file containing the amplicon scheme') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 227 parser.add_argument('--normalise', type=int, help='Subsample to n coverage per strand') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 228 parser.add_argument('--report', type=str, help='Output report to file') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 229 parser.add_argument('--start', action='store_true', help='Trim to start of primers instead of ends') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 230 parser.add_argument('--verbose', action='store_true', help='Debug mode') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 231 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 232 args = parser.parse_args() | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 233 go(args) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 234 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 235 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 236 if __name__ == "__main__": | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 237 main() | 
