changeset 2:43710158b0fa draft

Uploaded
author cschu
date Fri, 22 May 2015 13:34:55 -0400
parents ceb1abb5633e
children 55a28255359a
files candisnp.py candisnp.xml tool_data_table_conf.xml.sample
diffstat 3 files changed, 266 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/candisnp.py	Fri May 22 13:34:55 2015 -0400
@@ -0,0 +1,213 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import argparse
+import subprocess
+import struct
+import shutil
+import tempfile
+
+import urllib2
+import csv
+import json
+
+    
+CANDISNP_SERVER = 'http://candisnp.tsl.ac.uk' #:8080' 
+
+CANDI_TESTDATA = {
+    "ref": "athalianaTair10",
+    "data": [{
+               "is_synonymous": "FALSE",
+               "change": "E/K",
+               "effect": "NON_SYNONYMOUS_CODING",
+               "gene": "AT1G01320",
+               "allele_freq": 0.291666667,
+               "reference_base": "C",
+               "position": 126483,
+               "chromosome": "1",
+               "is_ctga": "TRUE",
+               "in_cds": "TRUE",
+               "alternate_base": "T",
+              },
+            ],
+}
+"""
+'is_ctga', 
+'effect', 
+'change', 
+'gene', 
+
+"""
+
+
+SNP_DATA_HEADERS = {                    
+                    0: 'chromosome', 
+                    1: 'position', 
+                    3: 'reference_base',  
+                    4: 'alternate_base',
+                    7: 'info'               
+}
+
+contentHeaders = {
+    'Content-Type': 'application/json',
+    'Accept': 'text/plain'
+}
+
+class SNPEffect(object):
+    def __init__(self, string):
+        effectAndData = string.strip(')').split('(') 
+        # sys.stderr.write(str(effectAndData)+'\n')
+        self.effect, effectData = effectAndData[0], effectAndData[1]
+        effectData = effectData.split('|')
+        assert len(effectData) >= 11, 'Invalid effect field %s' % string
+        self.impact, self.fClass, changeDist, self.aaChange, self.aaLength, self.gene, self.transcriptBioType, isCoding, self.transcriptID, exonIntronRank, genotypeNumber, warning = (effectData + [''])[:12]
+        if self.effect in ('UPSTREAM', 'DOWNSTREAM'):
+            self.transcriptDist = changeDist
+            self.codonChange = None
+        else:
+            self.transcriptDist = None
+            self.codonChange = changeDist
+        self.isCoding = (isCoding == 'CODING')
+        self.exonIntronRank = int(exonIntronRank) if exonIntronRank else 'NA'
+        self.genotypeNumber = int(genotypeNumber)
+        self.warning = warning if warning else None
+
+        self.is_synonymous = not self.effect == 'NON_SYNONYMOUS_CODING'
+        if self.effect in ('INTRON', 'INTERGENIC'):
+            self.in_cds, self.is_synonymous = False, False
+        else:
+            self.in_cds = True
+        try:
+            self.change = '%s/%s' % (self.aaChange[0], self.aaChange[-1])
+        except:
+            self.change = ''
+
+
+        pass
+    def toDict(self):
+        def is_ctga(ref, alt):
+            return (ref == 'C' and alt == 'T') or (ref == 'G' and alt == 'A')
+        assert hasattr(self, 'is_synonymous'), 'Missing attribute: is_synonymous'
+        assert hasattr(self, 'allele_frequency'), 'Missing attribute: allele_frequency'
+        assert hasattr(self, 'reference_base'), 'Missing attribute: reference_base' 
+        assert hasattr(self, 'alternate_base'), 'Missing attribute: alternate_base' 
+        return { "is_synonymous": str(self.is_synonymous).upper(),
+                 "change": self.change if self.change else 'NA',
+                 "effect": self.effect if self.effect else 'NA',
+                 "gene": self.gene if self.gene else 'NA',
+                 "allele_freq": self.allele_frequency,
+                 "reference_base": self.reference_base,
+                 "position": self.position,
+                 "chromosome": self.chromosome,
+                 "is_ctga": str(is_ctga(self.reference_base, self.alternate_base)).upper(),
+                 "in_cds": str(self.in_cds).upper(),
+                 "alternate_base": self.alternate_base, } 
+
+        
+    pass
+        
+        
+
+class AnnotatedSNPEffectFactory(object):
+    def __init__(self, *args, **kwargs):
+        assert len(args) >= 7, 'Not enough values in args (%s)' % args
+        self.log = kwargs['log']
+        self.chromosome = args[0]
+        self.position = int(args[1])
+        self.reference_base = args[3]
+        self.alternate_base = args[4]
+     
+        # info = dict([field.split('=') for field in args[7].strip().split(';')])
+        info = dict(field.split('=') for field in args[7].strip().split(';'))
+        # self.log.write('***'+str(info.get('EFF', '@@@')+'***\n'))
+        
+        self.allele_frequency = float(info.get('AF', '0.0'))
+        self.effects = [effect 
+                        for effect in info.get('EFF', '').split(',')
+                        if effect]
+        # assert self.effects, 'No effects in info-string %s' % info.get('EF', '')
+        pass
+
+    def getEffects(self, notWanted=('UPSTREAM', 'DOWNSTREAM', 'FRAME_SHIFT')):
+        for effectString in self.effects:            
+            effect = SNPEffect(effectString)
+            if effect.effect not in notWanted:
+                for attr in ('chromosome', 'position', 'reference_base', 'alternate_base', 'allele_frequency'):
+                    setattr(effect, attr, getattr(self, attr))
+                yield effect.toDict()
+                
+
+        pass
+    pass
+
+def extractCandiDataFromSnpEffVCF(snpEffVCF, fo):
+    snps = []
+    for line in snpEffVCF:
+        if not line.startswith('#'):
+            #fo.write(line)
+            asf = AnnotatedSNPEffectFactory(*line.strip().split('\t'), log=fo)
+            snpEffects = list(asf.getEffects())                        
+            #fo.write(str(snpEffects)+'\n')
+            snps.extend(snpEffects)
+    return snps
+
+
+
+def main(argv):
+
+    descr = ''
+    parser = argparse.ArgumentParser(description=descr)
+    parser.add_argument('--ref', help='The snpEff genome reference.')
+    parser.add_argument('snpEff_output', type=str, help='The input file.')
+    parser.add_argument('candisnp_html', type=str, help='The output file.')
+    args = parser.parse_args()
+
+    fo = open(args.candisnp_html, 'wb')
+    """
+    import subprocess
+    import urllib2
+    response = urllib2.urlopen('http://ruup.xyz:8080/monitors')
+    html = response.read()
+
+
+    # p = subprocess.Popen(['wget http://ruup.xyz:8080/monitors'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
+    # fo.write(p.communicate()[0])
+    fo.write(html)
+    fo.close()
+
+    return None
+    """
+    """ 
+    # This works.
+    candiData = CANDI_TESTDATA 
+    candiMessage = json.dumps(candiData)
+    """
+    candiData = extractCandiDataFromSnpEffVCF(open(args.snpEff_output), fo)
+    candiMessage = json.dumps({'ref': args.ref, 'data': candiData})
+
+    request = urllib2.Request(CANDISNP_SERVER + ':8080', 
+                              headers=contentHeaders,
+                              data=candiMessage)
+
+
+    try:
+        response = urllib2.urlopen(request)
+        candiURL = response.read()
+    except urllib2.URLError, e:
+        candiURL = ''
+        sys.stderr.write(str(e.reason) + '\n')
+   
+    if candiURL:
+        body = urllib2.urlopen(candiURL)
+        fo.write(body.read())
+        #fo.write('<iframe src="%s"></iframe>\n' % candiURL)
+    else:
+        fo.write('I am sorry. CandiSNP does not pick up. Maybe (<a href="%s" target="_blank">try it manually?</a>)\n' % CANDISNP_SERVER)
+     
+    fo.close()
+    pass
+
+# main(sys.argv[1:])
+
+if __name__ == '__main__': main(sys.argv[1:])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/candisnp.xml	Fri May 22 13:34:55 2015 -0400
@@ -0,0 +1,40 @@
+<tool id="candisnp" name="candisnp">
+	<description></description>
+	<requirements>
+	  <requirement type="package" version="2.7.4">python</requirement>
+	  <requirement type="package" version="4.0">snpEff</requirement>
+	</requirements>
+	<command interpreter="python">candisnp.py 
+		#if $snpDb.genomeSrc == 'cached':
+		 --ref="${snpDb.genomeVersion.fields.key}"
+		#end if
+                $input
+                $candisnp_html
+	</command>
+
+
+	<inputs>
+		<param format="vcf" name="input" type="data" label="snpEff output"/>
+		<conditional name="snpDb">
+			<param name="genomeSrc" type="select" label="Genome source">
+				<option value="cached">Locally installed reference genome</option>
+			</param>
+			<when value="cached">
+				<param name="genomeVersion" type="select" label="Genome">
+					<!-- GENOME DESCRIPTION -->
+					<options from_data_table="snpeffv_genomedb">
+						<!-- <filter type="static_value" name="snpeff_version" value="@SNPEFF_VERSION@" column="1"/> -->
+						<filter type="unique_value" column="2"/>
+					</options>
+				</param>
+			</when>
+		</conditional>			
+	</inputs>
+	<outputs>
+		<data format="html" name="candisnp_html" label="${tool.name}" />
+	</outputs>
+
+	<help>
+	This tool is a testtool to issue POST requests from Galaxy.
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri May 22 13:34:55 2015 -0400
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<tables>
+<table comment_char="#" name="snpeffv_genomedb">
+        <columns>key, version, value, name, path</columns>
+        <file path="/tsl/services/galaxy/dist/galaxy-dist/tool-data/toolshed.g2.bx.psu.edu/repos/iuc/snpeff/500832f27cbc/snpeffv_genomedb.loc" />
+        <tool_shed_repository>
+            <tool_shed>toolshed.g2.bx.psu.edu</tool_shed>
+            <repository_name>snpeff</repository_name>
+            <repository_owner>iuc</repository_owner>
+            <installed_changeset_revision>500832f27cbc</installed_changeset_revision>
+            </tool_shed_repository>
+    </table>
+</tables>