annotate compressed.py @ 1:70d78b7ce093 draft default tip

planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
author tiagoantao
date Sun, 13 Mar 2016 13:23:44 -0400
parents b0ed47130f0d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
1
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
2 """
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
3 Compressed classes
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
4 """
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
5
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
6 import mimetypes, logging, os, os.path, sys, time, tempfile, shutil, string, glob, re, zipfile, tarfile
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
7
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
8 from galaxy.datatypes.data import Data
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
9 from galaxy.datatypes.sniff import *
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
10 from cgi import escape
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
11 from inspect import isclass
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
12 from galaxy import util
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
13 from galaxy.datatypes.metadata import MetadataElement #import directly to maintain ease of use in Datatype class definitions
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
14 from galaxy.util import inflector
1
70d78b7ce093 planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents: 0
diff changeset
15 from galaxy.util.checkers import *
0
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
16 from galaxy.util.bunch import Bunch
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
17 from galaxy.util.odict import odict
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
18 from galaxy.util.sanitize_html import sanitize_html
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
19 from galaxy.datatypes import data
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
20
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
21
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
22 log = logging.getLogger(__name__)
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
23
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
24 class Zip( Data ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
25 file_ext = "zip"
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
26
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
27 def set_peek( self, dataset, is_multi_byte=False ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
28 """Set the peek and blurb text"""
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
29 if not dataset.dataset.purged:
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
30 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
31 else:
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
32 dataset.peek = 'file does not exist'
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
33 dataset.blurb = 'file purged from disk'
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
34
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
35 def sniff( self, filename ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
36 if (check_zip( filename )):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
37 return True
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
38 return False
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
39
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
40
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
41 class Tgz( Data ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
42 file_ext = "tar.gz"
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
43
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
44 def set_peek( self, dataset, is_multi_byte=False ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
45 """Set the peek and blurb text"""
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
46 if not dataset.dataset.purged:
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
47 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
48 else:
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
49 dataset.peek = 'file does not exist'
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
50 dataset.blurb = 'file purged from disk'
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
51
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
52 def sniff( self, filename ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
53 is_gzipped, is_valid = check_gzip( filename )
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
54 is_tar = tarfile.is_tarfile( filename )
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
55 return (is_gzipped and is_valid and is_tar)
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
56
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
57
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
58 class Tbz2( Data ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
59 file_ext = "tar.bz2"
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
60
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
61 def set_peek( self, dataset, is_multi_byte=False ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
62 """Set the peek and blurb text"""
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
63 if not dataset.dataset.purged:
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
64 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
65 else:
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
66 dataset.peek = 'file does not exist'
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
67 dataset.blurb = 'file purged from disk'
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
68
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
69 def sniff( self, filename ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
70 is_bzipped, is_valid = check_bz2( filename )
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
71 is_tar = tarfile.is_tarfile( filename )
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
72 return (is_bzipped and is_valid and is_tar)
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
73
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
74
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
75 class Fastqgz( Data ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
76 file_ext = "fastq.gz"
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
77
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
78 def set_peek( self, dataset, is_multi_byte=False ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
79 """Set the peek and blurb text"""
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
80 if not dataset.dataset.purged:
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
81 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
82 else:
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
83 dataset.peek = 'file does not exist'
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
84 dataset.blurb = 'file purged from disk'
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
85
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
86 def sniff( self, filename ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
87 is_gzipped, is_valid = check_gzip( filename )
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
88 is_tar = tarfile.is_tarfile( filename )
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
89 return (is_gzipped and is_valid and not is_tar)
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
90
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
91 class Fastqbz2( Data ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
92 file_ext = "fastq.bz2"
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
93
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
94 def set_peek( self, dataset, is_multi_byte=False ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
95 """Set the peek and blurb text"""
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
96 if not dataset.dataset.purged:
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
97 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
98 else:
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
99 dataset.peek = 'file does not exist'
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
100 dataset.blurb = 'file purged from disk'
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
101
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
102 def sniff( self, filename ):
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
103 is_bzipped, is_valid = check_bz2( filename )
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
104 is_tar = tarfile.is_tarfile( filename )
b0ed47130f0d planemo upload commit a978c98767df89e56b192fc450370559a00e47b9-dirty
tiagoantao
parents:
diff changeset
105 return (is_bzipped and is_valid and not is_tar)