Mercurial > repos > yating-l > jbrowse_hub
comparison utils.py @ 31:d8049deb0c97 draft
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit faeedda55e23f1197bc454d3db2d52af29d786e8-dirty
author | yating-l |
---|---|
date | Fri, 17 Mar 2017 12:28:32 -0400 |
parents | a4a54b925c73 |
children | 3e5b6f6a9abe |
comparison
equal
deleted
inserted
replaced
30:daf6a1122200 | 31:d8049deb0c97 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 ''' | 3 """ |
4 This file include common used functions for converting file format to gff3 | 4 This file include common used functions for converting file format to gff3 |
5 ''' | 5 """ |
6 from collections import OrderedDict | 6 from collections import OrderedDict |
7 import json | 7 import json |
8 import subprocess | 8 import subprocess |
9 import os | 9 import os |
10 import tempfile | 10 import tempfile |
11 | 11 import string |
12 | 12 |
13 def write_features(field, attribute, gff3): | 13 def write_features(field, attribute, gff3): |
14 ''' | 14 """ |
15 The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md) | 15 The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md) |
16 field, attribute are ordered dictionary | 16 field, attribute are ordered dictionary |
17 gff3 is the file handler | 17 gff3 is the file handler |
18 ''' | 18 """ |
19 attr = [] | 19 attr = [] |
20 for v in field.values(): | 20 for v in field.values(): |
21 gff3.write(str(v) + '\t') | 21 gff3.write(str(v) + '\t') |
22 for k, v in attribute.items(): | 22 for k, v in attribute.items(): |
23 s = str(k) + '=' + str(v) | 23 s = str(k) + '=' + str(v) |
45 except OSError as err: | 45 except OSError as err: |
46 print "Cannot generate chrom_sizes from twoBitInfo err({0}): {1}".format(err.errno, err.strerror) | 46 print "Cannot generate chrom_sizes from twoBitInfo err({0}): {1}".format(err.errno, err.strerror) |
47 return chrom_sizes | 47 return chrom_sizes |
48 | 48 |
49 def sequence_region(chrom_sizes): | 49 def sequence_region(chrom_sizes): |
50 ''' | 50 """ |
51 This function read from a chromatin size file generated by twoBitInfo and write the information to dict | 51 This function read from a chromatin size file generated by twoBitInfo and write the information to dict |
52 return a dict | 52 return a dict |
53 ''' | 53 """ |
54 f = open(chrom_sizes, 'r') | 54 f = open(chrom_sizes, 'r') |
55 sizes = f.readlines() | 55 sizes = f.readlines() |
56 sizes_dict = {} | 56 sizes_dict = {} |
57 for line in sizes: | 57 for line in sizes: |
58 chrom_info = line.rstrip().split('\t') | 58 chrom_info = line.rstrip().split('\t') |
74 child_attr['Parent'] = parent_attr['ID'] | 74 child_attr['Parent'] = parent_attr['ID'] |
75 write_features(child_field, child_attr, gff3) | 75 write_features(child_field, child_attr, gff3) |
76 num = num + 1 | 76 num = num + 1 |
77 | 77 |
78 def add_tracks_to_json(trackList_json, new_tracks, modify_type): | 78 def add_tracks_to_json(trackList_json, new_tracks, modify_type): |
79 ''' | 79 """ |
80 Add to track configuration (trackList.json) | 80 Add to track configuration (trackList.json) |
81 # modify_type = 'add_tracks': add a new track like bam or bigwig, new_track = dict() | 81 # modify_type = 'add_tracks': add a new track like bam or bigwig, new_track = dict() |
82 # modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict()) | 82 # modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict()) |
83 ''' | 83 """ |
84 with open(trackList_json, 'r+') as f: | 84 with open(trackList_json, 'r+') as f: |
85 data = json.load(f) | 85 data = json.load(f) |
86 if modify_type == 'add_tracks': | 86 if modify_type == 'add_tracks': |
87 data['tracks'].append(new_tracks) | 87 data['tracks'].append(new_tracks) |
88 elif modify_type == 'add_attr': | 88 elif modify_type == 'add_attr': |
96 f.write(json.dumps(data, separators=(',' , ':'), indent=4)) | 96 f.write(json.dumps(data, separators=(',' , ':'), indent=4)) |
97 f.truncate() | 97 f.truncate() |
98 f.close() | 98 f.close() |
99 | 99 |
100 def gtfToGff3(gtf_file, gff3_file, chrom_sizes): | 100 def gtfToGff3(gtf_file, gff3_file, chrom_sizes): |
101 ''' | 101 """ |
102 Covert gtf file output from StringTie to gff3 format | 102 Covert gtf file output from StringTie to gff3 format |
103 ''' | 103 """ |
104 gff3 = open(gff3_file, 'w') | 104 gff3 = open(gff3_file, 'w') |
105 gff3.write("##gff-version 3\n") | 105 gff3.write("##gff-version 3\n") |
106 sizes_dict = sequence_region(chrom_sizes) | 106 sizes_dict = sequence_region(chrom_sizes) |
107 seq_regions = dict() | 107 seq_regions = dict() |
108 parents = dict() | 108 parents = dict() |
142 attribute['transcript_id'] = attr_li[1].split()[1].strip('"') | 142 attribute['transcript_id'] = attr_li[1].split()[1].strip('"') |
143 attribute['coverage'] = attr_li[3].split()[1].strip('"') | 143 attribute['coverage'] = attr_li[3].split()[1].strip('"') |
144 write_features(field, attribute, gff3) | 144 write_features(field, attribute, gff3) |
145 gff3.close() | 145 gff3.close() |
146 | 146 |
147 def sanitize_name_path(input_path): | 147 |
148 ''' | 148 def sanitize_name(input_name): |
149 """ | |
149 Galaxy will name all the files and dirs as *.dat, | 150 Galaxy will name all the files and dirs as *.dat, |
150 the function is simply replacing '.' to '_' for the dirs | 151 the function can replace '.' to '_' for the dirs |
151 ''' | 152 """ |
152 return input_path.replace('.', '_') | 153 validChars = "_-%s%s" % (string.ascii_letters, string.digits) |
154 sanitized_name = ''.join([c if c in validChars else '_' for c in input_name]) | |
155 return "gonramp_" + sanitized_name | |
153 | 156 |
154 def createBamIndex(bamfile): | 157 def createBamIndex(bamfile): |
155 p = subprocess.Popen(['samtools', 'index', bamfile]) | 158 p = subprocess.Popen(['samtools', 'index', bamfile]) |
156 p.communicate() | 159 p.communicate() |
157 filename = bamfile + '.bai' | 160 filename = bamfile + '.bai' |