Mercurial > repos > yating-l > jbrowse_hub
annotate utils.py @ 5:e7c80e9b70ae draft
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
author | yating-l |
---|---|
date | Tue, 14 Mar 2017 12:24:37 -0400 |
parents | e4f3f2ed4fa5 |
children | a4a54b925c73 |
rev | line source |
---|---|
0
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
1 #!/usr/bin/env python |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
2 |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
3 ''' |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
4 This file include common used functions for converting file format to gff3 |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
5 ''' |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
6 from collections import OrderedDict |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
7 import json |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
8 import subprocess |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
9 import os |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
10 import tempfile |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
11 |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
12 |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
13 def write_features(field, attribute, gff3): |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
14 ''' |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
15 The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
16 field, attribute are ordered dictionary |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
17 gff3 is the file handler |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
18 ''' |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
19 attr = [] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
20 for v in field.values(): |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
21 gff3.write(str(v) + '\t') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
22 for k, v in attribute.items(): |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
23 s = str(k) + '=' + str(v) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
24 attr.append(s) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
25 gff3.write(';'.join(attr)) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
26 gff3.write('\n') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
27 |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
28 def getChromSizes(reference, tool_dir): |
5
e7c80e9b70ae
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
0
diff
changeset
|
29 #TODO: find a better way instead of shipping the two exec files with the tool |
0
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
30 faToTwoBit = os.path.join(tool_dir, 'faToTwoBit') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
31 twoBitInfo = os.path.join(tool_dir, 'twoBitInfo') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
32 try: |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
33 twoBitFile = tempfile.NamedTemporaryFile(bufsize=0) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
34 chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0, suffix='.chrom.sizes', delete=False) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
35 except IOError as err: |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
36 print "Cannot create tempfile err({0}): {1}".format(err.errno, err.strerror) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
37 try: |
5
e7c80e9b70ae
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
0
diff
changeset
|
38 p = subprocess.Popen(['faToTwoBit', reference, twoBitFile.name]) |
0
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
39 p.communicate() |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
40 except OSError as err: |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
41 print "Cannot generate twoBitFile from faToTwoBit err({0}): {1}".format(err.errno, err.strerror) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
42 try: |
5
e7c80e9b70ae
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
0
diff
changeset
|
43 p = subprocess.Popen(['twoBitInfo', twoBitFile.name, chrom_sizes.name]) |
0
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
44 p.communicate() |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
45 except OSError as err: |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
46 print "Cannot generate chrom_sizes from twoBitInfo err({0}): {1}".format(err.errno, err.strerror) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
47 return chrom_sizes |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
48 |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
49 def sequence_region(chrom_sizes): |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
50 ''' |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
51 This function read from a chromatin size file generated by twoBitInfo and write the information to dict |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
52 return a dict |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
53 ''' |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
54 f = open(chrom_sizes, 'r') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
55 sizes = f.readlines() |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
56 sizes_dict = {} |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
57 for line in sizes: |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
58 chrom_info = line.rstrip().split('\t') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
59 sizes_dict[chrom_info[0]] = chrom_info[1] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
60 return sizes_dict |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
61 |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
62 def child_blocks(parent_field, parent_attr, gff3): |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
63 num = 0 |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
64 blockcount = int(parent_attr['blockcount']) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
65 chromstart = parent_attr['chromstarts'].split(',') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
66 blocksize = parent_attr['blocksizes'].split(',') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
67 while num < blockcount: |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
68 child_attr = OrderedDict() |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
69 child_field = parent_field |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
70 child_field['type'] = 'exon_junction' |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
71 child_field['start'] = int(chromstart[num]) + int(parent_field['start']) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
72 child_field['end'] = int(child_field['start']) + int(blocksize[num]) - 1 |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
73 child_attr['ID'] = parent_attr['ID'] + '_exon_' + str(num+1) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
74 child_attr['Parent'] = parent_attr['ID'] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
75 write_features(child_field, child_attr, gff3) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
76 num = num + 1 |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
77 |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
78 def add_tracks_to_json(trackList_json, new_tracks, modify_type): |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
79 ''' |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
80 Add to track configuration (trackList.json) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
81 # modify_type = 'add_tracks': add a new track like bam or bigwig, new_track = dict() |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
82 # modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict()) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
83 ''' |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
84 with open(trackList_json, 'r+') as f: |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
85 data = json.load(f) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
86 if modify_type == 'add_tracks': |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
87 data['tracks'].append(new_tracks) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
88 elif modify_type == 'add_attr': |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
89 for k in new_tracks: |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
90 for track in data['tracks']: |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
91 if k.lower() in track['urlTemplate'].lower(): |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
92 attr = new_tracks[k] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
93 for k, v in attr.items(): |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
94 track[k] = v |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
95 f.seek(0, 0) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
96 f.write(json.dumps(data, separators=(',' , ':'), indent=4)) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
97 f.truncate() |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
98 f.close() |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
99 |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
100 def gtfToGff3(gtf_file, gff3_file, chrom_sizes): |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
101 ''' |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
102 Covert gtf file output from StringTie to gff3 format |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
103 ''' |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
104 gff3 = open(gff3_file, 'w') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
105 gff3.write("##gff-version 3\n") |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
106 sizes_dict = sequence_region(chrom_sizes) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
107 seq_regions = dict() |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
108 parents = dict() |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
109 with open(gtf_file, 'r') as gtf: |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
110 for line in gtf: |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
111 if line.startswith('#'): |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
112 continue |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
113 field = OrderedDict() |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
114 attribute = OrderedDict() |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
115 li = line.rstrip().split("\t") |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
116 #print li |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
117 field['seqid'] = li[0] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
118 #print field['seqid'] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
119 if field['seqid'] not in seq_regions: |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
120 end_region = sizes_dict[field['seqid']] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
121 gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
122 seq_regions[field['seqid']] = end_region |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
123 field['source'] = li[1] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
124 field['type'] = li[2] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
125 # The first base in a chromosome is numbered 0 in BED format |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
126 field['start'] = li[3] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
127 field['end'] = li[4] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
128 field['score'] = li[5] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
129 field['strand'] = li[6] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
130 field['phase'] = li[7] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
131 attr_li = li[8].split(';') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
132 gene_id = attr_li[0].split()[1].strip('"') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
133 attribute['ID'] = gene_id + '_' + field['type'] + '_' + str(field['start']) + '_' + str(field['end']) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
134 if field['type'] == 'transcript': |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
135 parents[gene_id] = attribute['ID'] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
136 attribute['transcript_id'] = attr_li[1].split()[1].strip('"') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
137 attribute['coverage'] = attr_li[2].split()[1].strip('"') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
138 attribute['fpkm'] = attr_li[3].split()[1].strip('"') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
139 attribute['tpm'] = attr_li[4].split()[1].strip('"') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
140 elif field['type'] == 'exon': |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
141 attribute['Parent'] = parents[gene_id] |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
142 attribute['transcript_id'] = attr_li[1].split()[1].strip('"') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
143 attribute['coverage'] = attr_li[3].split()[1].strip('"') |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
144 write_features(field, attribute, gff3) |
e4f3f2ed4fa5
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
diff
changeset
|
145 gff3.close() |
5
e7c80e9b70ae
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
0
diff
changeset
|
146 |
e7c80e9b70ae
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
0
diff
changeset
|
147 def sanitize_name_path(input_path): |
e7c80e9b70ae
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
0
diff
changeset
|
148 ''' |
e7c80e9b70ae
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
0
diff
changeset
|
149 Galaxy will name all the files and dirs as *.dat, |
e7c80e9b70ae
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
0
diff
changeset
|
150 the function is simply replacing '.' to '_' for the dirs |
e7c80e9b70ae
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
0
diff
changeset
|
151 ''' |
e7c80e9b70ae
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit f18ea51d27ec7addfa6413716391cfefebc8acbc-dirty
yating-l
parents:
0
diff
changeset
|
152 return input_path.replace('.', '_') |