comparison utils.py @ 31:d8049deb0c97 draft

planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit faeedda55e23f1197bc454d3db2d52af29d786e8-dirty
author yating-l
date Fri, 17 Mar 2017 12:28:32 -0400
parents a4a54b925c73
children 3e5b6f6a9abe
comparison
equal deleted inserted replaced
30:daf6a1122200 31:d8049deb0c97
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 ''' 3 """
4 This file include common used functions for converting file format to gff3 4 This file include common used functions for converting file format to gff3
5 ''' 5 """
6 from collections import OrderedDict 6 from collections import OrderedDict
7 import json 7 import json
8 import subprocess 8 import subprocess
9 import os 9 import os
10 import tempfile 10 import tempfile
11 11 import string
12 12
13 def write_features(field, attribute, gff3): 13 def write_features(field, attribute, gff3):
14 ''' 14 """
15 The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md) 15 The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)
16 field, attribute are ordered dictionary 16 field, attribute are ordered dictionary
17 gff3 is the file handler 17 gff3 is the file handler
18 ''' 18 """
19 attr = [] 19 attr = []
20 for v in field.values(): 20 for v in field.values():
21 gff3.write(str(v) + '\t') 21 gff3.write(str(v) + '\t')
22 for k, v in attribute.items(): 22 for k, v in attribute.items():
23 s = str(k) + '=' + str(v) 23 s = str(k) + '=' + str(v)
45 except OSError as err: 45 except OSError as err:
46 print "Cannot generate chrom_sizes from twoBitInfo err({0}): {1}".format(err.errno, err.strerror) 46 print "Cannot generate chrom_sizes from twoBitInfo err({0}): {1}".format(err.errno, err.strerror)
47 return chrom_sizes 47 return chrom_sizes
48 48
49 def sequence_region(chrom_sizes): 49 def sequence_region(chrom_sizes):
50 ''' 50 """
51 This function read from a chromatin size file generated by twoBitInfo and write the information to dict 51 This function read from a chromatin size file generated by twoBitInfo and write the information to dict
52 return a dict 52 return a dict
53 ''' 53 """
54 f = open(chrom_sizes, 'r') 54 f = open(chrom_sizes, 'r')
55 sizes = f.readlines() 55 sizes = f.readlines()
56 sizes_dict = {} 56 sizes_dict = {}
57 for line in sizes: 57 for line in sizes:
58 chrom_info = line.rstrip().split('\t') 58 chrom_info = line.rstrip().split('\t')
74 child_attr['Parent'] = parent_attr['ID'] 74 child_attr['Parent'] = parent_attr['ID']
75 write_features(child_field, child_attr, gff3) 75 write_features(child_field, child_attr, gff3)
76 num = num + 1 76 num = num + 1
77 77
78 def add_tracks_to_json(trackList_json, new_tracks, modify_type): 78 def add_tracks_to_json(trackList_json, new_tracks, modify_type):
79 ''' 79 """
80 Add to track configuration (trackList.json) 80 Add to track configuration (trackList.json)
81 # modify_type = 'add_tracks': add a new track like bam or bigwig, new_track = dict() 81 # modify_type = 'add_tracks': add a new track like bam or bigwig, new_track = dict()
82 # modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict()) 82 # modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict())
83 ''' 83 """
84 with open(trackList_json, 'r+') as f: 84 with open(trackList_json, 'r+') as f:
85 data = json.load(f) 85 data = json.load(f)
86 if modify_type == 'add_tracks': 86 if modify_type == 'add_tracks':
87 data['tracks'].append(new_tracks) 87 data['tracks'].append(new_tracks)
88 elif modify_type == 'add_attr': 88 elif modify_type == 'add_attr':
96 f.write(json.dumps(data, separators=(',' , ':'), indent=4)) 96 f.write(json.dumps(data, separators=(',' , ':'), indent=4))
97 f.truncate() 97 f.truncate()
98 f.close() 98 f.close()
99 99
100 def gtfToGff3(gtf_file, gff3_file, chrom_sizes): 100 def gtfToGff3(gtf_file, gff3_file, chrom_sizes):
101 ''' 101 """
102 Covert gtf file output from StringTie to gff3 format 102 Covert gtf file output from StringTie to gff3 format
103 ''' 103 """
104 gff3 = open(gff3_file, 'w') 104 gff3 = open(gff3_file, 'w')
105 gff3.write("##gff-version 3\n") 105 gff3.write("##gff-version 3\n")
106 sizes_dict = sequence_region(chrom_sizes) 106 sizes_dict = sequence_region(chrom_sizes)
107 seq_regions = dict() 107 seq_regions = dict()
108 parents = dict() 108 parents = dict()
142 attribute['transcript_id'] = attr_li[1].split()[1].strip('"') 142 attribute['transcript_id'] = attr_li[1].split()[1].strip('"')
143 attribute['coverage'] = attr_li[3].split()[1].strip('"') 143 attribute['coverage'] = attr_li[3].split()[1].strip('"')
144 write_features(field, attribute, gff3) 144 write_features(field, attribute, gff3)
145 gff3.close() 145 gff3.close()
146 146
147 def sanitize_name_path(input_path): 147
148 ''' 148 def sanitize_name(input_name):
149 """
149 Galaxy will name all the files and dirs as *.dat, 150 Galaxy will name all the files and dirs as *.dat,
150 the function is simply replacing '.' to '_' for the dirs 151 the function can replace '.' to '_' for the dirs
151 ''' 152 """
152 return input_path.replace('.', '_') 153 validChars = "_-%s%s" % (string.ascii_letters, string.digits)
154 sanitized_name = ''.join([c if c in validChars else '_' for c in input_name])
155 return "gonramp_" + sanitized_name
153 156
154 def createBamIndex(bamfile): 157 def createBamIndex(bamfile):
155 p = subprocess.Popen(['samtools', 'index', bamfile]) 158 p = subprocess.Popen(['samtools', 'index', bamfile])
156 p.communicate() 159 p.communicate()
157 filename = bamfile + '.bai' 160 filename = bamfile + '.bai'