annotate fam/fam.py @ 1:0e9c114c748f draft default tip

planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
author sanbi-uwc
date Thu, 30 Aug 2018 07:41:55 -0400
parents 57bd7c6a72cd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
1 # -*- coding: utf-8 -*-
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
2 from galaxy import eggs
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
3
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
4
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
5 import pkg_resources
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
6 pkg_resources.require( "bx-python" )
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
7
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
8
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
9 import logging, os, sys, time, sets, tempfile, shutil
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
10 import data
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
11 from galaxy import util
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
12 from galaxy.datatypes.sniff import *
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
13 from cgi import escape
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
14 import urllib
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
15 from bx.intervals.io import *
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
16 from galaxy.datatypes import metadata
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
17 from galaxy.datatypes.metadata import MetadataElement
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
18 from galaxy.datatypes.tabular import Tabular
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
19
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
20
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
21 class Fam( Tabular ):
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
22 """Tab delimited data in bim format"""
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
23 file_ext = "fam"
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
24
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
25 MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True )
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
26
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
27
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
28 def __init__ (self, **kwd):
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
29 """Initialize bim datatype"""
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
30 Tabular. __init__ (self, **kwd)
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
31 self.do_something_else()
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
32
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
33
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
34 def init_meta( self, dataset, copy_from=None ):
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
35 Tabular.init_meta( self, dataset, copy_from=copy_from )
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
36 if elems_len == 8:
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
37 try:
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
38 map( int, [hdr[6], hdr[7]] )
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
39 proceed = True
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
40 except:
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
41 pass
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
42
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
43
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
44 def sniff( self, filename ):
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
45 headers = get_headers( filename, '\t' )
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
46 try:
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
47 if len(headers) < 2:
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
48 return False
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
49 for hdr in headers:
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
50 if len( hdr ) > 1 and hdr[0] and not hdr[0].startswith( '#' ):
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
51 if len(hdr) != 8:
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
52 return False
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
53 try:
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
54 map( int, [hdr[6], hdr[7]] )
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
55 except:
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
56 return False
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
57 # Do other necessary checking here...
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
58 except:
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
59 return False
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
60 # If we haven't yet returned False, then...
0e9c114c748f planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry/tree/master/datatypes/plink_datatypes commit d0354c6155e1a44aab9c2fecf7a1a7f9e4bb22de
sanbi-uwc
parents: 0
diff changeset
61 return True