annotate bim/bim.py @ 1:0e9c114c748f draft default tip

planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
author sanbi-uwc
date Thu, 30 Aug 2018 07:41:55 -0400
parents 57bd7c6a72cd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
1 # -*- coding: utf-8 -*-
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
2 from galaxy import eggs
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
3
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
4
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
5 import pkg_resources
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
6 pkg_resources.require( "bx-python" )
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
7
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
8
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
9 import logging, os, sys, time, sets, tempfile, shutil
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
10 import data
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
11 from galaxy import util
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
12 from galaxy.datatypes.sniff import *
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
13 from cgi import escape
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
14 import urllib
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
15 from bx.intervals.io import *
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
16 from galaxy.datatypes import metadata
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
17 from galaxy.datatypes.metadata import MetadataElement
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
18 from galaxy.datatypes.tabular import Tabular
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
19
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
20
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
21 class Bim( Tabular ):
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
22 """Tab delimited data in bim format"""
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
23 file_ext = "bim"
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
24
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
25 MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True )
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
26
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
27
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
28 def __init__ (self, **kwd):
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
29 """Initialize bim datatype"""
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
30 Tabular. __init__ (self, **kwd)
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
31 self.do_something_else()
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
32
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
33
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
34 def init_meta( self, dataset, copy_from=None ):
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
35 Tabular.init_meta( self, dataset, copy_from=copy_from )
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
36 if elems_len == 8:
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
37 try:
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
38 map( int, [hdr[6], hdr[7]] )
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
39 proceed = True
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
40 except:
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
41 pass
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
42
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
43
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
44 def sniff( self, filename ):
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
45 headers = get_headers( filename, '\t' )
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
46 try:
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
47 if len(headers) < 2:
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
48 return False
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
49 for hdr in headers:
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
50 if len( hdr ) > 1 and hdr[0] and not hdr[0].startswith( '#' ):
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
51 if len(hdr) != 8:
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
52 return False
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
53 try:
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
54 map( int, [hdr[6], hdr[7]] )
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
55 except:
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
56 return False
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
57 # Do other necessary checking here...
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
58 except:
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
59 return False
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
60 # If we haven't yet returned False, then...
57bd7c6a72cd planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit 2bf82b5426ec792b408104250a875585cbf7347f
sanbi-uwc
parents:
diff changeset
61 return True