Mercurial > repos > dfornika > artic_align_trim
annotate align_trim.py @ 2:8bcfa2bb56cd draft
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
| author | dfornika | 
|---|---|
| date | Tue, 10 Mar 2020 22:27:23 +0000 | 
| parents | defebd1f95b9 | 
| children | 26516cf26444 | 
| rev | line source | 
|---|---|
| 0 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 2 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 3 # Written by Nick Loman | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 4 # Originally part of the ZiBRA pipeline (zibraproject.org) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 5 # This file adapted from ARTICnetwork 'fieldbioinformatics' pipeline: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 6 # https://github.com/artic-network/fieldbioinformatics | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 7 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 8 import sys | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 9 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 10 from collections import defaultdict | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 11 from copy import copy | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 12 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 13 import pysam | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 14 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 15 def read_bed_file(fn): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 16 bedfile = [] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 17 with open(fn) as csvfile: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 18 reader = csv.reader(csvfile, dialect='excel-tab') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 19 for row in reader: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 20 bedrow = {} | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 21 bedrow['Primer_ID'] = row[3] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 22 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 23 if len(row) >= 6: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 24 # new style bed | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 25 bedrow['direction'] = row[5] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 26 elif len(row) == 5: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 27 # old style without directory | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 28 if 'LEFT' in row[3]: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 29 bedrow['direction'] = '+' | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 30 elif 'RIGHT' in row[3]: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 31 bedrow['direction'] = '-' | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 32 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 33 print("Malformed BED file!", file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 34 raise SystemExit | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 35 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 36 if bedrow['direction'] == '+': | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 37 bedrow['end'] = int(row[2]) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 38 bedrow['start'] = int(row[1]) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 39 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 40 bedrow['end'] = int(row[1]) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 41 bedrow['start'] = int(row[2]) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 42 bedfile.append(bedrow) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 43 return bedfile | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 44 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 45 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 46 def check_still_matching_bases(s): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 47 for flag, length in s.cigartuples: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 48 if flag == 0: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 49 return True | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 50 return False | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 51 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 52 def trim(args, cigar, s, start_pos, end): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 53 if not end: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 54 pos = s.pos | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 55 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 56 pos = s.reference_end | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 57 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 58 eaten = 0 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 59 while 1: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 60 ## chomp stuff off until we reach pos | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 61 if end: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 62 flag, length = cigar.pop() | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 63 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 64 flag, length = cigar.pop(0) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 65 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 66 if args.verbose: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 67 print("Chomped a %s, %s" % (flag, length), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 68 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 69 if flag == 0: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 70 ## match | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 71 #to_trim -= length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 72 eaten += length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 73 if not end: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 74 pos += length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 75 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 76 pos -= length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 77 if flag == 1: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 78 ## insertion to the ref | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 79 #to_trim -= length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 80 eaten += length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 81 if flag == 2: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 82 ## deletion to the ref | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 83 #eaten += length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 84 if not end: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 85 pos += length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 86 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 87 pos -= length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 88 pass | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 89 if flag == 4: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 90 eaten += length | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 91 if not end and pos >= start_pos and flag == 0: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 92 break | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 93 if end and pos <= start_pos and flag == 0: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 94 break | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 95 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 96 #print >>sys.stderr, "pos:%s %s" % (pos, start_pos) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 97 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 98 extra = abs(pos - start_pos) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 99 if args.verbose: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 100 print("extra %s" % (extra), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 101 if extra: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 102 if flag == 0: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 103 if args.verbose: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 104 print("Inserted a %s, %s" % (0, extra), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 105 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 106 if end: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 107 cigar.append((0, extra)) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 108 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 109 cigar.insert(0, (0, extra)) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 110 eaten -= extra | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 111 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 112 if not end: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 113 s.pos = pos - extra | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 114 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 115 if args.verbose: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 116 print("New pos: %s" % (s.pos), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 117 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 118 if end: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 119 cigar.append((4, eaten)) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 120 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 121 cigar.insert(0, (4, eaten)) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 122 oldcigarstring = s.cigarstring | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 123 s.cigartuples = cigar | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 124 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 125 #print >>sys.stderr, s.query_name, oldcigarstring[0:50], s.cigarstring[0:50] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 126 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 127 def find_primer(bed, pos, direction): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 128 from operator import itemgetter | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 129 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 130 closest = min([(abs(p['start'] - pos), p['start'] - pos, p) for p in bed if p['direction'] == direction], key=itemgetter(0)) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 131 return closest | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 132 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 133 def is_correctly_paired(p1, p2): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 134 name1 = p1[2]['Primer_ID'] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 135 name2 = p2[2]['Primer_ID'] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 136 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 137 name1 = name1.replace('_LEFT', '') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 138 name2 = name2.replace('_RIGHT', '') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 139 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 140 return name1 == name2 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 141 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 142 def go(args): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 143 if args.report: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 144 reportfh = open(args.report, "w") | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 145 print("QueryName\tReferenceStart\tReferenceEnd\tPrimerPair\tPrimer1\tPrimer1Start\tPrimer2\tPrimer2Start\tIsSecondary\tIsSupplementary\tStart\tEnd\tCorrectlyPaired", file=reportfh) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 146 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 147 bed = read_bed_file(args.bedfile) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 148 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 149 counter = defaultdict(int) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 150 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 151 infile = pysam.AlignmentFile("-", "rb") | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 152 outfile = pysam.AlignmentFile("-", "wh", template=infile) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 153 for s in infile: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 154 cigar = copy(s.cigartuples) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 155 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 156 ## logic - if alignment start site is _before_ but within X bases of | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 157 ## a primer site, trim it off | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 158 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 159 if s.is_unmapped: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 160 print("%s skipped as unmapped" % (s.query_name), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 161 continue | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 162 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 163 if s.is_supplementary: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 164 print("%s skipped as supplementary" % (s.query_name), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 165 continue | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 166 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 167 p1 = find_primer(bed, s.reference_start, '+') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 168 p2 = find_primer(bed, s.reference_end, '-') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 169 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 170 correctly_paired = is_correctly_paired(p1, p2) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 171 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 172 report = "%s\t%s\t%s\t%s_%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%d" % (s.query_name, s.reference_start, s.reference_end, p1[2]['Primer_ID'], p2[2]['Primer_ID'], p1[2]['Primer_ID'], abs(p1[1]), p2[2]['Primer_ID'], abs(p2[1]), s.is_secondary, s.is_supplementary, p1[2]['start'], p2[2]['end'], correctly_paired) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 173 if args.report: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 174 print(report, file=reportfh) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 175 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 176 if args.verbose: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 177 print(report, file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 178 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 179 ## if the alignment starts before the end of the primer, trim to that position | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 180 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 181 try: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 182 if args.start: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 183 primer_position = p1[2]['start'] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 184 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 185 primer_position = p1[2]['end'] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 186 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 187 if s.reference_start < primer_position: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 188 trim(args, cigar, s, primer_position, 0) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 189 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 190 if args.verbose: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 191 print("ref start %s >= primer_position %s" % (s.reference_start, primer_position), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 192 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 193 if args.start: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 194 primer_position = p2[2]['start'] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 195 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 196 primer_position = p2[2]['end'] | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 197 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 198 if s.reference_end > primer_position: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 199 trim(args, cigar, s, primer_position, 1) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 200 else: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 201 if args.verbose: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 202 print("ref end %s >= primer_position %s" % (s.reference_end, primer_position), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 203 except Exception as e: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 204 print("problem %s" % (e,), file=sys.stderr) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 205 pass | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 206 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 207 if args.normalise: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 208 pair = "%s-%s-%d" % (p1[2]['Primer_ID'], p2[2]['Primer_ID'], s.is_reverse) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 209 counter[pair] += 1 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 210 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 211 if counter[pair] > args.normalise: | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 212 continue | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 213 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 214 if not check_still_matching_bases(s): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 215 continue | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 216 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 217 outfile.write(s) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 218 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 219 reportfh.close() | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 220 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 221 def main(): | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 222 import argparse | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 223 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 224 parser = argparse.ArgumentParser(description='Trim alignments from an amplicon scheme.') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 225 parser.add_argument('bedfile', help='BED file containing the amplicon scheme') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 226 parser.add_argument('--normalise', type=int, help='Subsample to n coverage per strand') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 227 parser.add_argument('--report', type=str, help='Output report to file') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 228 parser.add_argument('--start', action='store_true', help='Trim to start of primers instead of ends') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 229 parser.add_argument('--verbose', action='store_true', help='Debug mode') | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 230 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 231 args = parser.parse_args() | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 232 go(args) | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 233 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 234 | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 235 if __name__ == "__main__": | 
| 
defebd1f95b9
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/artic_align_trim commit 374121821497c96c8450afda266951c2f431ba11-dirty"
 dfornika parents: diff
changeset | 236 main() | 
