Mercurial > repos > rijst > snptools

--- a/gbk_to_fasta.py	Wed Dec 12 06:26:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-import sys
-
-if len(sys.argv) < 3:
-    exit("Not enough arguments passed, pleas provide names of input- and output file")
-
-input_name = sys.argv[1]
-output_name = sys.argv[2]
-
-from Bio import GenBank
-
-try: seq_record = GenBank.RecordParser().parse(open(input_name))
-except: exit("Error reading %s, check file correctness." % input_name)
-
-try: out_file = open(output_name, 'w')
-except IOError as e:
-    exit("Error trying to open '%s': {1}".format(e.errno, e.strerror))
-
-accession = definition = ''
-if seq_record.accession[0] != '': accession = '|gb|'+seq_record.accession[0]
-if seq_record.definition != '': definition = '|'+seq_record.definition
-
-out_file.write(">gi|%s%s%s\n" % (seq_record.gi,accession,definition))
-
-i = 0
-while i < len(seq_record.sequence):
-    out_file.write(seq_record.sequence[i:i+70]+"\n")
-    i += 70
-
-out_file.close()
--- a/gbk_to_fasta.xml	Wed Dec 12 06:26:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-<tool id="gbk2fasta" name="Genbank-to-Fasta">
-  <description>produces a Fasta file from a genbank file</description>
-  <command interpreter="python">gbk_to_fasta.py $input $output</command>
-  <inputs>
-    <param name="input" type="data" format="text" label="Genbank file" />
-  </inputs>
-  <outputs>
-    <data name="output" format="fasta" label="${tool.name} on ${on_string}: converted Genbank file" />
-  </outputs>
-  <help>
-  This tool produces a fasta file from a genbank file containing a sequence.
-  </help>
-</tool>
--- a/snpsplit.py	Wed Dec 12 06:26:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-'''This script takes a tab-delimited file containting position, ref base, mut base and splits any multicharacter ref or mut base entries into seperate lines and calculating the new positions'''
-
-import sys
-
-if len(sys.argv) != 3:
-    exit("snpsplit takes exactly two arguments (input and output file), no more and no less")
-
-input_name = sys.argv[1]
-output_name = sys.argv[2]
-
-try:
-    in_file = open(input_name)
-except IOError as e:
-    exit("Error trying to open '"+input_name+"': {1}".format(e.errno, e.strerror))
-
-try:
-    out_file = open(output_name, 'w')
-except IOError as e:
-    exit("Error trying to open '"+output_name+"': {1}".format(e.errno, e.strerror))
-
-def splitter(cells):
-    global out_lines
-    for i in range(0,len(cells[1])):
-        if cells[1][i] == cells[2][i]: continue
-        out_file.write(str(int(cells[0])+i)+'\t'+cells[1][i]+'\t'+cells[2][i]+'\n')
-        out_lines += 1
-
-in_lines=out_lines=0
-out_file.write("Position\tRef\tMut\n")
-for line in in_file:
-    in_lines += 1
-    cells = line.rstrip().split('\t')
-    if not str(line[0]).isdigit():
-        out_file.write(line)
-        continue
-
-    # Can only deal with SNPs/MNPs, not indels.
-    if len(cells[1]) != len(cells[2]): continue
-    splitter(cells)
-
-in_file.close()
-out_file.close()
-
-print "Lines read: %s" % in_lines
-print "Lines printed: %s" % out_lines
--- a/snpsplit.xml	Wed Dec 12 06:26:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-<tool id="snpsplit" name="SNP splitter">
-  <description>splits multicharacter entries into separate lines</description>
-  <command interpreter="python">snpsplit.py $input $output</command>
-  <inputs>
-    <param name="input" type="data" format="tabular" label="SNP table" />
-  </inputs>
-  <outputs>
-    <data name="output" format="tabular" label="${tool.name} on ${on_string}" />
-  </outputs>
-  <help>
-
-Input: tab delimited, format Position Ref Mut
-Position is 1-based genomic coordinate
-Ref is the reference sequence
-Mut is the mutant sequence
-
-Ref en Mut sequences consisting of more than one character will be split up into separate lines. Example:
-Input:
-123 CGT ATG
-Output:
-123 C A
-124 G T
-125 T G
-
-Bases that are the same in both columns, will be skipped. Example:
-Input:
-123 CGT AGG
-Output:
-123 C A
-125 T G
-
-  </help>
-</tool>
--- a/tablemerger.py	Wed Dec 12 06:26:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-'''Takes tab-delimited SNP tables from user input and merges them into one.'''
-
-import sys
-files = []
-filenames = []
-
-try:
-    output = open(sys.argv[1], "w")
-    output.write('Position\tReference')
-except:
-    exit("No output file given or unable to open output file.")
-for name in sys.argv[2:]:
-    try:
-        files.append(open(name, "rU"))
-    except:
-        continue
-
-# Fetch headers and print them to output file;
-headers = [header.readline()[:-1].split('\t')[2:] for header in files]
-columns = [len(strains) for strains in headers]
-for strain in [a for b in headers for a in b]:
-    output.write('\t'+strain)
-    output.flush()
-
-file_active = [True]*len(files)
-snps = [row.readline()[:-1].split('\t') for row in files]
-
-while True in file_active:
-    for h in range(0,len(snps)):
-        if file_active[h]:
-            cur_pos = [h]
-            lowest = int(snps[h][0])
-            break
-    i = 1
-
-    # Determine lowest position
-    while i < len(snps):
-        if int(snps[i][0]) < lowest and file_active[i]:
-            lowest = int(snps[i][0])
-            cur_pos = [i]
-        elif int(snps[i][0]) == lowest:
-            cur_pos.append(i)
-        i+=1
-
-    # Check if all SNPs sharing a position have the same reference base, exit with message otherwise;
-    if len(cur_pos) > 1:
-        ref_base = snps[cur_pos[0]][1].lower()
-        for j in cur_pos[1:]:
-            if snps[j][1].lower() != ref_base:
-                error = '\nError: Reference bases not the same for position %s, present in following files:' % lowest
-                for k in cur_pos:
-                    error += ' '+filenames[k]
-                exit(error+'.')
-
-    # Write line to output file containing position, ref base and snps/empty cells;
-    output.write('\n'+snps[cur_pos[0]][0]+'\t'+snps[cur_pos[0]][1].lower())
-    for l,row in enumerate(snps):
-        if l in cur_pos:
-            for snp in row[2:]:
-                output.write('\t'+snp)
-        else:
-            output.write('\t'*columns[l])
-
-    # Read new line in files that had snp at current position;
-    for m in cur_pos:
-        line = files[m].readline()
-        if line == '': file_active[m] = False
-        else:
-            snps[m] = line.split('\t')
-            snps[m][-1] = snps[m][-1].rstrip()# Remove newline character at end of line;
-
-for it in files: it.close()
-output.close()
--- a/tablemerger.xml	Wed Dec 12 06:26:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,34 +0,0 @@
-<tool id="tablemerger" name="SNP table merger">
-  <description>merges any number of SNP tables into one</description>
-  <command interpreter="python">tablemerger.py $output
-    #for $f in $inputs:
-    $f.in
-    #end for
-  </command>
-  <inputs>
-    <repeat name="inputs" title="Input files">
-      <param name="in" type="data" format="tabular" label="Input SNP table" />
-    </repeat>
-  </inputs>
-  <outputs>
-    <data name="output" format="tabular" label="${tool.name} on various SNP tables" />
-  </outputs>
-  <help>
-This tool takes any number of tab-delimited SNP tables and merges them together.It puts SNPs at the same position on the same line and ignores bases that are the same in two strains.
-Example input:
-Position Ref Strain1
-123 A G
-125 C T
-
-Position Ref Strain2
-123 A T
-124 G C
-125 C T
-
-Would give output:
-Position Ref Strain1 Strain2
-123 A G T
-124 G   C
-125 C T T
-  </help>
-</tool>
--- a/trams.py	Wed Dec 12 06:26:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,487 +0,0 @@
-#################
-transl_table = 11
-intro_message = ''' +------------------------------------------------------------------+
- | Tool for Rapid Annotation of Microbial SNPs (TRAMS): a simple    |
- | program for rapid annotation of genomic variation in prokaryotes |
- |                                                                  |
- | Developed by: Richard A. Reumerman, Paul R. Herron,              |
- | Paul A. Hoskisson and Vartul Sangal                              |
- +------------------------------------------------------------------+\n'''
-#################
-
-import sys
-import time
-start = time.clock()
-
-# Command line files: SNP REF REF-TYPE ANNOT OVERL SUM;
-if len(sys.argv) < 7:
-    exit("\nNot enough arguments given.\nUsage: TRAMS_Galaxy.py [SNP.] [REF.] [ANNOT.] [OVERL.] [SUM.]")
-try:
-    file_snps = open(sys.argv[1], "rU")
-except IOError as e:
-    exit("Error trying to open '"+sys.argv[1]+"': {1}".format(e.errno, e.strerror))
-try:
-    file_ref = open(sys.argv[2], "rU")
-except IOError as e:
-    exit("Error trying to open '"+sys.argv[2]+"': {1}".format(e.errno, e.strerror))
-
-filetype_reference = sys.argv[3]
-
-try:
-    file_out = open(sys.argv[4], "w")
-except IOError as e:
-    exit("Error trying to open '"+sys.argv[4]+"': {1}".format(e.errno, e.strerror))
-try:
-    file_overlap = open(sys.argv[5], "w")
-except IOError as e:
-    exit("Error trying to open '"+sys.argv[5]+"': {1}".format(e.errno, e.strerror))
-try:
-    file_summary = open(sys.argv[6], "w")
-except IOError as e:
-    exit("Error trying to open '"+sys.argv[6]+"': {1}".format(e.errno, e.strerror))
-
-import Bio
-from Bio import SeqIO, SeqFeature
-from Bio.SeqRecord import SeqRecord
-from Bio.Seq import Seq
-from Bio.Alphabet import generic_dna, IUPAC
-from Bio.Data import CodonTable
-
-modules_loaded = time.clock()
-
-def non_coding_calc(gene, pos = 0):
-    '''This function takes a pseudogene and returns the number of bases
-    located in between the sub-features before 'pos'. Returns 0 if 'pseudo' = False.
-    Input: {start, subfeats, pseudo}, pos (default = 0)'''
-    if not gene['pseudo']: return 0
-
-    non_coding_bases = 0
-    prev_subfeat_end = gene['start']
-    if gene['strand'] == -1:
-        for subfeature in gene['subfeats']:
-            if subfeature.location._start.position < pos:
-                prev_subfeat_end = subfeature.location._end.position
-                continue
-            non_coding_bases += (subfeature.location._start.position - prev_subfeat_end)
-            prev_subfeat_end = subfeature.location._end.position
-    else:
-        for subfeature in gene['subfeats']:
-            non_coding_bases += (subfeature.location._start.position - prev_subfeat_end)
-            prev_subfeat_end = subfeature.location._end.position
-            if prev_subfeat_end >= pos and pos != 0: break
-
-    return non_coding_bases
-
-
-def region_calc(bounds,length):
-    regions = []
-    lastend=i=0
-    while i < len(bounds):
-        if bounds[i]['start'] > lastend:# Intergenic region present;
-            regions.append([lastend,bounds[i]['start'],-1])
-            lastend = bounds[i]['start']
-        else:
-            regions.append([bounds[i]['start'],bounds[i]['end'],i])
-            if bounds[i]['end'] > lastend:
-                lastend = bounds[i]['end']
-            i += 1
-
-    if regions[-1][1] < length:# Final tail of genome;
-        regions.append([lastend,length,-1])
-
-    return regions
-
-
-def overlap_calc(bounds):
-    '''This function takes an array of feature starts and ends and
-    returns an array of starts and ends of all overlapping regions.
-    Input: [{start,end}]'''
-    i = 0
-    overlaps = []
-    while i < len(bounds) - 1:
-        for downstr in bounds[i+1:]:
-            if downstr[0] < bounds[i][1]:# Features overlap;
-                if downstr[1] < bounds[i][1]:# Complete overlap;
-                    overlaps.append([downstr[0],downstr[1],bounds[i][2],downstr[2],[0,0]])
-                else:# Partial overlap;
-                    overlaps.append([downstr[0],bounds[i][1],bounds[i][2],downstr[2],[0,0]])
-            else:# No use looking further;
-                break
-
-        i += 1
-
-    return overlaps
-
-
-def match_feature(bounds,pos,prev=0):
-    '''This function checks if a position is located inside a feature and
-    returns the feature's number if found or -1 if none is found.
-    Input: {start,end},pos,prev_feat (default = 0)'''
-    for i in range(prev, len(bounds)):
-        if (pos >= bounds[i]['start']) and (pos < bounds[i]['end']):
-            return i
-        elif pos < bounds[i]['start']:# No use looking further
-            return -1
-
-    return -1
-
-
-def write_output(line,target=file_out):
-    '''This function takes the 2 dimensional array containing all the SNP
-    data. It contains an array of information on the feature and an array
-    for each strain for which SNPs are given.
-    Input: [[pos],[ref],[cells],[cells],etc]'''
-    target.write('\n'+str(line[0][0]))
-    for cell in line[1]:
-        target.write('\t'+str(cell))
-    for strain in line[2:]:
-        target.write('\t')
-        for cell in strain:
-            target.write('\t'+str(cell))
-
-    target.flush()
-
-
-def new_codon_calc(ref_codon, new_base, pos_in_cod):
-    return str(ref_codon[0:pos_in_cod-1]+new_base+ref_codon[pos_in_cod:len(ref_codon)])
-
-
-def mut_type_check(ref_res, ref_codon, pos_in_gene, new_base, new_codon):
-    if str(new_codon).lower() == str(ref_codon).lower():
-        return ['','','','']
-    new_residue = Seq(new_codon).translate(table=transl_table)
-    if str(new_residue) == str(ref_res):
-        mut_type = 'synonymous'
-    elif (pos_in_gene / 3) < 1 and str(ref_codon).upper() in CodonTable.unambiguous_dna_by_id[transl_table].start_codons:# position 0,1 or 2 and SNP is in start codon;
-        if str(new_codon).upper() in CodonTable.unambiguous_dna_by_id[transl_table].start_codons: mut_type = 'nonsynonymous'
-        else: mut_type = 'nonstart'
-    elif str(new_residue) == '*': mut_type = 'nonsense'
-    elif str(ref_res) == '*': mut_type = 'nonstop'
-    else: mut_type = 'nonsynonymous'
-
-    return [mut_type,new_base,new_codon,new_residue]
-
-
-def codon_process(codon):
-    '''This function processes a codon. It loops through it 3 times,
-    once to determine which is the highest position mutated, once to
-    fill in the cells for the output file and once to output all lines.
-    Input: [empty,start_pos,[line1],[line2],[line3],strand]
-    It also uses global variable strain_nr'''
-    lastposition = [-1]*(strain_nr-1)
-    new_codons = ['']*(strain_nr-1)
-    if codon[-1] == -1:# Change codon position order for -1 features;
-        temp = codon [1:-1]
-        temp.reverse()
-        codon[1:-1] = temp
-    for i,line in enumerate(codon[1:-1],1):
-        if line == '': continue
-        for j,strain in enumerate(line[2:]):
-            if strain[1] in ['a','g','c','t']:
-                lastposition[j] = i
-                new_codons[j] = codon[i][1][8]
-
-    for i,line in enumerate(codon[1:-1],1):
-        if codon[-1] == -1: pos_in_cod = 4-i
-        else: pos_in_cod = i
-
-        if line == '': continue
-        for j,strain in enumerate(line[2:]):
-            if i == lastposition[j]: # Check for synonymous etc.;
-                new_codons[j] = new_codon_calc(new_codons[j],strain[1],pos_in_cod)
-                codon[i][j+2] = mut_type_check(line[1][9],line[1][8],codon[0],strain[1],new_codons[j])
-                straininfo[j][codon[i][j+2][0]] += 1# Counting;
-            elif strain[1] in ['a','g','c','t']:
-                codon[i][j+2] = ['MNP',strain[1],'','']
-                straininfo[j]['mnps'] += 1
-                new_codons[j] = new_codon_calc(new_codons[j],strain[1],pos_in_cod)
-            elif strain[0] == 'Allele missing': codon[i][j+2] = strain
-            else: codon[i][j+2] = ['']*4
-
-    for line in codon[1:-1]:
-        if line != '': write_output(line)
-
-def feature_props(feature):
-    properties = {'type':feature.type,'strand':feature.location._strand,
-                  'sequence':feature.extract(seq_record.seq),'pseudo': False,
-                  'locus_tag':'','gene_name':'','product':'',
-                  'start':int(feature.location._start.position),
-                'end':int(feature.location._end.position)}
-    if 'pseudo' in feature.qualifiers:
-        properties['pseudo'] = True
-        properties['type'] = 'pseudogene'
-        properties['pure_seq'] = properties['sequence']
-        if properties['strand'] == -1:
-            properties['sequence'] = seq_record.seq[feature.location._start.position:feature.location._end.position].reverse_complement()
-        else:
-            properties['sequence'] = seq_record.seq[feature.location._start.position:feature.location._end.position]
-        if feature.sub_features: properties['subfeats'] = feature.sub_features
-    if 'locus_tag' in feature.qualifiers: properties['locus_tag'] = feature.qualifiers['locus_tag'][0]
-    if 'gene' in feature.qualifiers: properties['gene_name']= feature.qualifiers['gene'][0]
-    if feature.type in ['tRNA','rRNA','CDS']: properties['product'] = feature.qualifiers['product'][0]
-
-    return properties
-
-# Read embl/genbank file for information on sequence features;
-try:
-    seq_record = SeqIO.parse(file_ref, filetype_reference).next()
-except:
-    file_ref.close()
-    quit("Error reading "+sys.argv[2]+", please check file for errors.")
-file_ref.close()
-
-# Loop through genome features and save relevant properties;
-feats = []# Dictionary of properties;
-
-feature_types = {'intergenic':0,'gene':0,'pseudogene':0}
-feat_temp_store = ''
-for feature in seq_record.features:
-    # Check if gene is defined as other feature (e.g. CDS). Else, save info from 'gene';
-    if feat_temp_store != '':
-        if (feature.location._start.position == feat_temp_store.location._start.position and
-            feature.location._end.position == feat_temp_store.location._end.position):# Gene also defined as other feature;
-            feat_temp_store = ''
-        else:# Gene not also defined as CDS;
-            feats.append(feature_props(feat_temp_store))
-            feat_temp_store = ''
-    elif feature.type == 'gene':
-        feat_temp_store = feature
-
-    if not feature.type in ['source','gene','misc_feature']:
-        if not feature.type in feature_types and feature.type != 'CDS': feature_types[feature.type] = 0
-        feats.append(feature_props(feature))
-
-
-feat_props = sorted(feats, key=lambda cells:int(cells['start']))
-feat_boundaries = [{'start':item['start'],'end':item['end']} for item in feat_props]
-regions = region_calc(feat_boundaries,len(seq_record.seq))
-feat_overlap = overlap_calc(regions)
-
-reference_loaded = time.clock()
-
-# Create array of SNPs from input file for processing;
-lines = [line.split('\t') for line in file_snps if line.strip()]
-file_snps.close()
-# First line contains headers, extract number of strains etc;
-headers = lines[0]
-snp_table = sorted(lines[1:], key=lambda cells:int(cells[0]))
-
-snps_loaded = time.clock()
-
-# Print output file headers;
-headers[-1] = headers[-1].rstrip()# Remove newline character;
-strain_nr = len(headers)-1
-strains_found = 'Found '+str(strain_nr)+' strains: '+headers[1]+' (reference)'
-first_line = '\t'+headers[1]+'\t'*9
-second_line = 'Position\tFeature\tLocus tag\tGene\tProduct\tStart\tEnd\tStrand\tRef. base\tRef. codon\tRef. res.'
-straininfo = [0]*(len(headers[2:]))
-for i,strain in enumerate(headers[2:]):
-    straininfo[i] = {'snps':0,'mnps':0,'synonymous':0,'nonsynonymous':0,'nonstart':0,'nonstop':0,'nonsense':0}
-    straininfo[i].update(feature_types)
-    strains_found += ', '+strain
-    first_line += '\t\t'+strain+'\t'*3
-    second_line += '\t\tSNP type\tNew base\tNew codon\tNew res.'
-
-file_out.write(first_line+'\n'+second_line)
-file_out.flush()
-
-# Loop through SNPs from array and process them;
-props = {}# Properties of a feature;
-prev_snp = ''# Position of previous SNP;
-to_write = []# Information of current SNP;
-compl_bases = {'a':'t','t':'a','g':'c','c':'g'}
-firstsnp = True# First snp of region, or of codon in cases of 3 positions in codon mutated;
-prev_start=j=k=0
-overlap_snps = []
-codon = ['']*5# Array of codon positions. First item is position of first base of codon in the gene;
-
-for region in regions:
-    firstsnp = True
-    i = prev_start
-    while i < len(snp_table):# Loop through SNPs
-        snp_entry = snp_table[i]
-        if not str(snp_entry[0]).isdigit():# Not a valid line, skip;
-            i += 1
-            continue
-
-        pos = int(snp_entry[0])-1
-        if pos < region[0]:# Not inside region yet;
-            i += 1
-            continue
-        elif firstsnp and pos < region[1]:
-            prev_start = i
-        elif pos >= region[1]:# End of region, process and next;
-            if not firstsnp and codon != ['','','','','']:
-                codon_process(codon)
-            break
-
-        # Documentation of SNPs in feature overlaps;
-        while j < len(feat_overlap)-1 and pos > feat_overlap[j][1]: j += 1
-        k = j
-        while k < len(feat_overlap) and pos >= feat_overlap[k][0]:
-            if pos < feat_overlap[k][1]:
-                if feat_overlap[k][4][0] == 0:
-                    feat_overlap[k][4][0] = pos
-                feat_overlap[k][4][1] = pos
-            k += 1
-
-
-        snp_entry[-1] = snp_entry[-1].rstrip()# Remove newline character at end of line;
-        mnp=in_feat=False
-        snp_feat = region[2]
-        ref_base = snp_entry[1]
-
-        to_write = [[pos+1]]
-
-        # Output feature properties and reference situation;
-        if snp_feat == -1:
-            codon = ['']*5
-            to_write.append(['intergenic','','','','','','',ref_base.upper(),'',''])
-        elif feat_props[snp_feat]['type'] not in ['CDS','gene','pseudogene']:# In feature, but non-coding;
-            codon = ['']*5
-            props = feat_props[snp_feat]
-            if props['strand'] == -1: ref_base = (compl_bases[snp_entry[1].lower()])
-            else: ref_base = snp_entry[1]
-            to_write.append([props['type'],props['locus_tag'],props['gene_name'],
-                             props['product'],props['start']+1,props['end'],
-                             '',ref_base.upper(),'',''])
-        else:# in CDS/gene feature, check codon etc;
-            props = feat_props[snp_feat]
-            sequence = props['sequence']
-            if props['strand'] == -1:
-                pos_in_gene = props['end'] - pos - 1# Python counting
-                ref_base = (compl_bases[snp_entry[1].lower()])
-            else:
-                pos_in_gene = pos - props['start']# Python counting
-                ref_base = snp_entry[1]
-
-            in_feat = True
-            if props['pseudo'] and 'subfeats' in props:# Pseudogene that needs special attention;
-                in_feat = False
-                subfeat_boundaries = [{'start':item.location._start.position,'end':item.location._end.position}
-                                      for item in props['subfeats']]
-                snp_subfeat = match_feature(subfeat_boundaries,pos)
-                if snp_subfeat != -1:
-                    in_feat = True
-                    pos_in_gene -= non_coding_calc({'start':props['start'],'subfeats':props['subfeats'],
-                                                'pseudo':True,'strand':props['strand']},pos)
-                    sequence = props['pure_seq']
-
-            if not in_feat:# In pseudogene non-coding region;
-                codon = ['']*5
-                to_write.append(['non coding',props['locus_tag'],props['gene_name'],props['product'],
-                             props['start']+1,props['end'],props['strand'],ref_base.upper(),
-                             '',''])
-            else:# In coding region;
-                pos_in_cod = (pos_in_gene+1)%3
-                if pos_in_cod == 0: pos_in_cod = 3# Remainder of division 0 means 3rd place in codon;
-
-                old_codon = sequence[pos_in_gene-pos_in_cod+1:pos_in_gene-pos_in_cod+4].upper()
-                old_residue = old_codon.translate(table=transl_table)
-                to_write.append([props['type'],props['locus_tag'],props['gene_name'],props['product'],
-                                 props['start']+1,props['end'],props['strand'],ref_base.upper(),
-                                 old_codon,old_residue])
-
-            if in_feat and not firstsnp and (pos >= prev_snp):# Check if snp is in same codon as previous snp. Position check for overlapping features;
-                if props['strand'] == 1 and (pos - prev_snp + 1) < pos_in_cod:# Same codon (Positive strand);
-                    mnp = True
-                elif props['strand'] == -1 and (pos  - prev_snp + 1) <= (3 - pos_in_cod):# Same codon (negative strand);
-                    mnp = True
-
-            # Process previous codon if not MNP;
-            if in_feat and not mnp:
-                if not firstsnp:
-                    codon_process(codon)
-                codon = [pos_in_gene-pos_in_cod+1,'','','',props['strand']]
-
-
-        for l, snp in enumerate(snp_entry[2:]):# Loop through SNPs/strains;
-
-            snp = snp.lower()
-            if snp == '':# Empty cell;
-                to_write.append(['','','',''])
-                continue
-
-            if snp == '-': # Feature not present in this strain;
-                to_write.append(['Allele missing','','',''])
-                continue
-
-            if snp_feat == -1:# Intergenic;
-                if snp == ref_base.lower():
-                    to_write.append(['']*4)
-                else:
-                    to_write.append(['',snp,'',''])
-                    straininfo[l]['intergenic'] += 1
-                    straininfo[l]['snps'] += 1
-                continue
-
-            if props['strand'] == -1:
-                snp = compl_bases[snp]
-
-            if snp == ref_base.lower():
-                to_write.append(['']*4)
-            else:
-                to_write.append(['',snp,'',''])
-                straininfo[l]['snps'] += 1
-                if props['type'] != 'CDS':
-                    straininfo[l][props['type']] += 1
-
-
-
-        if props['type'] in ['CDS','gene','pseudogene'] and in_feat:
-            codon[pos_in_cod] = to_write
-        else:
-            write_output(to_write)
-
-        if firstsnp: firstsnp = False
-        prev_snp = pos+1
-        i += 1
-
-
-if codon != ['','','','','']: codon_process(codon)
-
-file_out.close()
-
-end = time.clock()
-
-file_summary.write("\n")
-file_summary.write(intro_message)
-file_summary.write('\n'+strains_found+'.\n')
-
-file_summary.write("\nFinished annotation. Total time: %s s\n\n" % round(end-start,1))
-
-
-file_overlap.write('SNP start\tSNP end\tFeature 1\tLocus tag\tProduct\t\tFeature 2\tLocus tag\tProduct')
-for overlap in feat_overlap:
-    if overlap[4] != [0,0]:
-        overlap[4][0]+=1
-        overlap[4][1]+=1
-        if overlap[4][0] == overlap[4][1]: overlap[4][1] = ''
-        write_output([[str(overlap[4][0])],[str(overlap[4][1]),feat_props[overlap[2]]['type'],feat_props[overlap[2]]['locus_tag'],feat_props[overlap[2]]['product']],
-                        [feat_props[overlap[3]]['type'],feat_props[overlap[3]]['locus_tag'],feat_props[overlap[3]]['product']]],
-                        file_overlap)
-
-
-for i,strain in enumerate(headers[2:]):
-    file_summary.write("\n")
-    info = straininfo[i]
-    file_summary.write("+ Strain %s:\n" % strain)
-    file_summary.write("  %s SNPs found\n" % info['snps'])
-    file_summary.write("  Number of SNPs found CDS features: %s\n" % (info['mnps']+info['nonstart']+info['nonstop']+info['nonsense']+
-                                                     info['synonymous']+info['nonsynonymous']))
-    file_summary.write("  (of which in pseudogenes: %s)\n" % info['pseudogene'])
-    file_summary.write("  - MNPs:          %s\n" % info['mnps'])
-    file_summary.write("  - Synonymous:    %s\n" % info['synonymous'])
-    file_summary.write("  - Nonsynonymous: %s\n" % info['nonsynonymous'])
-    file_summary.write("  - Nonsense:      %s\n" % info['nonsense'])
-    file_summary.write("  - Nonstart:      %s\n" % info['nonstart'])
-    file_summary.write("  - Nonstop:       %s\n" % info['nonstop'])
-    file_summary.write("  Intergenic: %s\n" % info['intergenic'])
-
-    for typ in feature_types:
-        if typ not in  ['intergenic','pseudogene'] and info[typ] != 0:
-            file_summary.write("  %s: %s\n" % (typ,info[typ]))
-    file_summary.flush()
-
-file_overlap.close()
-file_summary.close()
--- a/trams.xml	Wed Dec 12 06:26:42 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-<tool id="trams" name="TRAMS">
-  <description>Tool for Rapid Annotation of Microbial SNPs</description>
-  <command interpreter="python">trams.py $input1 $input2 $ref_format $annot $overl $sum</command>
-  <inputs>
-    <param name="input1" type="data" format="tabular" label="SNP table" />
-    <param name="input2" type="data" format="txt" label="Reference genome" help="Reference genome file, should be genbank or embl file." />
-    <param name="ref_format" type="select">
-     <label>Reference file type</label>
-      <option value="gb">Genbank</option>
-      <option value="embl">EMBL</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data name="annot" format="tabular" label="${tool.name} on ${on_string}: SNP annotation file" />
-    <data name="overl" format="tabular" label="${tool.name} on ${on_string}: Overlap file" />
-    <data name="sum" format="txt" label="${tool.name} on ${on_string}: Summary file" />
-  </outputs>
-  <help>
-
-This tool annotates SNPs from a SNP table and a genbank file.
-
-  </help>
-</tool>