# HG changeset patch
# User yating-l
# Date 1489768112 14400
# Node ID d8049deb0c97c90b067d5078d1fdee751d4d5691
# Parent daf6a1122200067f044241761bcd3f1088d294fb
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit faeedda55e23f1197bc454d3db2d52af29d786e8-dirty
diff -r daf6a1122200 -r d8049deb0c97 TrackHub.py
--- a/TrackHub.py Wed Mar 15 11:46:38 2017 -0400
+++ b/TrackHub.py Fri Mar 17 12:28:32 2017 -0400
@@ -1,14 +1,10 @@
#!/usr/bin/env python
import os
-import trackObject
+import subprocess
+import shutil
import utils
-import subprocess
-import string
-import shutil
-import tempfile
-#TODO: package JBrowse file conversion .pl files
class TrackHub:
def __init__(self, inputFiles, reference, outputDirect, tool_dir, genome, extra_files_path):
@@ -21,10 +17,6 @@
self.raw = os.path.join(self.out_path, 'raw')
self.json = os.path.join(self.out_path, 'json')
try:
- if not self.out_path:
- raise ValueError('empty output path\n')
- if not os.path.exists(self.out_path):
- raise ValueError('the output folder has not been created')
if os.path.exists(self.json):
shutil.rmtree(self.json)
os.makedirs(self.json)
@@ -39,7 +31,6 @@
self.addTrack(input_file)
self.indexName()
self.makeArchive()
- #shutil.rmtree(self.out_path)
self.outHtml()
print "Success!\n"
@@ -59,7 +50,7 @@
bam_track = dict()
bam_track['type'] = 'JBrowse/View/Track/Alignments2'
bam_track['storeClass'] = 'JBrowse/Store/SeqFeature/BAM'
- bam_track['label'] = track['fileName']
+ bam_track['label'] = track['label']
bam_track['urlTemplate'] = os.path.join('../raw', track['fileName'])
bam_track['baiUrlTemplate'] = os.path.join('../raw', track['index'])
utils.add_tracks_to_json(json_file, bam_track, 'add_tracks')
@@ -68,14 +59,14 @@
self.createTrackList()
json_file = os.path.join(self.json, "trackList.json")
bigwig_track = dict()
- bigwig_track['label'] = track['fileName']
+ bigwig_track['label'] = track['label']
bigwig_track['urlTemplate'] = os.path.join('../raw', track['fileName'])
bigwig_track['type'] = 'JBrowse/View/Track/Wiggle/XYPlot'
bigwig_track['storeClass'] = 'JBrowse/Store/SeqFeature/BigWig'
utils.add_tracks_to_json(json_file, bigwig_track, 'add_tracks')
else:
gff3_file = os.path.join(self.raw, track['fileName'])
- label = track['fileName']
+ label = track['label']
if track['dataType'] == 'bedSpliceJunctions' or track['dataType'] == 'gtf':
p = subprocess.Popen(['flatfile-to-json.pl', '--gff', gff3_file, '--trackType', 'CanvasFeatures', '--trackLabel', label, '--config', '{"glyph": "JBrowse/View/FeatureGlyph/Segments"}', '--out', self.json])
elif track['dataType'] == 'gff3_transcript':
@@ -98,7 +89,6 @@
#TODO: this will list all zip files in the filedir and sub-dirs. worked in Galaxy but all list zip files in test-data when
#run it locally. May need modify
def outHtml(self):
- #htmloutput = tempfile.NamedTemporaryFile(self.outfile, suffix = '.html', bufsize=0, delete=False)
with open(self.outfile, 'w') as htmlfile:
htmlstr = 'The JBrowse Hub is created:
'
zipfiles = '
Download'
@@ -112,15 +102,13 @@
relative_file_path = os.path.join(relative_directory, file)
htmlstr += zipfiles % relative_file_path
- #htmlstr = htmlstr % zipfile
htmlfile.write(htmlstr)
def createTrackList(self):
trackList = os.path.join(self.json, "trackList.json")
if not os.path.exists(trackList):
os.mknod(trackList)
- #open(trackList,'w').close()
-
+
diff -r daf6a1122200 -r d8049deb0c97 blastxmlToGff3.py
--- a/blastxmlToGff3.py Wed Mar 15 11:46:38 2017 -0400
+++ b/blastxmlToGff3.py Fri Mar 17 12:28:32 2017 -0400
@@ -7,14 +7,14 @@
def align2cigar(hsp_query, hsp_reference):
- '''
+ """
Build CIGAR representation from an hsp_query
input:
hsp_query
hsp_sbjct
output:
CIGAR string
- '''
+ """
query = hsp_query
ref = hsp_reference
# preType, curType:
@@ -98,6 +98,8 @@
attribute['ID'] = field['seqid'] + '_' + str(field['start']) + '_' + str(field['end']) + '_' + query_name + '_' + str(target_start) + '_' + str(target_end)
attribute['Target'] = query_name + " " + str(target_start) + " " + str(target_end)
attribute['Gap'] = align2cigar(query, ref)
+ #store the query sequence in the file in order to display alignment with BlastAlignment plugin
+ attribute['query'] = hsp.query
# show reading frame attribute only if the frame is not (0, 0)
if hsp.frame[0] != 0 or hsp.frame[1] != 0:
attribute['reading_frame'] = str(hsp.frame[0]) + ", " + str(hsp.frame[1])
diff -r daf6a1122200 -r d8049deb0c97 jbrowse_hub.py
--- a/jbrowse_hub.py Wed Mar 15 11:46:38 2017 -0400
+++ b/jbrowse_hub.py Fri Mar 17 12:28:32 2017 -0400
@@ -1,22 +1,19 @@
#!/usr/bin/env python
-import os
import sys
import argparse
-import subprocess
-from bedToGff3 import bedToGff3
-import blastxmlToGff3
+import json
import utils
-import tempfile
import trackObject
import TrackHub
-import shutil
+
+
def main(argv):
parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.')
# Reference genome mandatory
- parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome')
+ parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome (Required)')
# Genome name
parser.add_argument('-g', '--genome_name', help='Name of reference genome')
@@ -25,7 +22,10 @@
parser.add_argument('-o', '--out', help='output html')
# Output folder
- parser.add_argument('-e', '--extra_files_path', help="Directory of JBrowse Hub folder")
+ parser.add_argument('-e', '--extra_files_path', help='Directory of JBrowse Hub folder')
+
+ #Tool Directory
+ parser.add_argument('-d', '--tool_directory', help='The directory of JBrowse file convertion scripts and UCSC tools')
# GFF3 structure: gene->transcription->CDS
parser.add_argument('--gff3_transcript', action='append', help='GFF3 format, structure: gene->transcription->CDS')
@@ -51,25 +51,45 @@
# GTF format
parser.add_argument('--gtf', action='append', help='GTF format from StringTie')
+ # Metadata json format
+ parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
+
args = parser.parse_args()
all_datatype_dictionary = dict()
+ if not args.fasta:
+ parser.print_help()
+ raise RuntimeError("No reference genome\n")
reference = args.fasta
genome = 'unknown'
- out_path = '.'
+ out_path = 'unknown.html'
extra_files_path = '.'
+ tool_directory = '.'
if args.genome_name:
- genome = utils.sanitize_name_path(args.genome_name)
+ genome = utils.sanitize_name(args.genome_name)
if args.out:
out_path = args.out
if args.extra_files_path:
- extra_files_path = utils.sanitize_name_path(args.extra_files_path)
- cwd = os.getcwd()
+ extra_files_path = utils.sanitize_name(args.extra_files_path)
+
#tool_directory not work for Galaxy tool, all tools need to exist in the current PATH, deal with it with tool dependencies
- tool_directory = os.path.join(cwd, 'JBrowse-1.12.1/bin')
+ if args.tool_directory:
+ tool_directory = args.tool_directory
+
+ #Calculate chromsome sizes using genome reference and uscs tools
chrom_size = utils.getChromSizes(reference, tool_directory)
- all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path) #store converted files in the array: all_tracks.tracks
+
+ #get metadata from json file
+ json_inputs_data = args.data_json
+ if json_inputs_data:
+ inputs_data = json.loads(json_inputs_data)
+ else:
+ inputs_data = {}
+
+ #Initate trackObject
+ all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path)
+
array_inputs_bam = args.bam
array_inputs_bed_simple_repeats = args.bedSimpleRepeats
array_inputs_bed_splice_junctions = args.bedSpliceJunctions
@@ -78,6 +98,7 @@
array_inputs_gff3_mrna = args.gff3_mrna
array_inputs_gtf = args.gtf
array_inputs_blastxml = args.blastxml
+
if array_inputs_bam:
all_datatype_dictionary['bam'] = array_inputs_bam
if array_inputs_bed_simple_repeats:
@@ -95,7 +116,7 @@
if array_inputs_blastxml:
all_datatype_dictionary['blastxml'] = array_inputs_blastxml
- print all_datatype_dictionary
+ print "input tracks: \n", all_datatype_dictionary
for datatype, inputfiles in all_datatype_dictionary.items():
try:
@@ -105,11 +126,23 @@
print 'Cannot open', datatype
else:
for f in inputfiles:
- all_tracks.addToRaw(f, datatype)
+ metadata = {}
+ if f in inputs_data.keys():
+ metadata = inputs_data[f]
+ #Convert tracks into gff3 format
+ all_tracks.addToRaw(f, datatype, metadata)
jbrowseHub = TrackHub.TrackHub(all_tracks, reference, out_path, tool_directory, genome, extra_files_path)
jbrowseHub.createHub()
-
+
+"""
+def extractMetadata(array_inputs, inputs_data):
+ metadata_dict = {}
+ for input_false_path in array_inputs:
+ for key, data_value in inputs_data.items():
+ if key == input_false_path:
+ metadata_dict[input_false_path]
+"""
if __name__ == "__main__":
main(sys.argv)
diff -r daf6a1122200 -r d8049deb0c97 jbrowse_hub.xml
--- a/jbrowse_hub.xml Wed Mar 15 11:46:38 2017 -0400
+++ b/jbrowse_hub.xml Fri Mar 17 12:28:32 2017 -0400
@@ -17,36 +17,70 @@
@@ -71,6 +105,7 @@
type="data"
label="BAM File"
/>
+
@@ -95,6 +130,7 @@
/>
+
+
+
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r daf6a1122200 -r d8049deb0c97 tool_dependencies.xml
--- a/tool_dependencies.xml Wed Mar 15 11:46:38 2017 -0400
+++ b/tool_dependencies.xml Fri Mar 17 12:28:32 2017 -0400
@@ -8,6 +8,9 @@
+This package is based on package_biopython_1_67 owned by biopython.
+https://toolshed.g2.bx.psu.edu/repository?user_id=fd5c6d0f82f315d8
+
This Galaxy Tool Shed package installs Biopython from source, having
first installed NumPy which is a build time depencency. This requires
and assumes a standard C compiler is already installed, along with
diff -r daf6a1122200 -r d8049deb0c97 trackObject.py
--- a/trackObject.py Wed Mar 15 11:46:38 2017 -0400
+++ b/trackObject.py Fri Mar 17 12:28:32 2017 -0400
@@ -5,15 +5,14 @@
import utils
import bedToGff3
import blastxmlToGff3
-import tempfile
-import subprocess
+
class trackObject:
def __init__(self, chrom_size, genome, extra_files_path):
self.chrom_size = chrom_size
outputDirect = os.path.join(extra_files_path, genome)
self.raw_folder = os.path.join(outputDirect, 'raw')
- print self.raw_folder
+ #Store metadata of the tracks
self.tracks = []
try:
if os.path.exists(self.raw_folder):
@@ -25,11 +24,11 @@
except OSError as oserror:
print "Cannot create raw folder error({0}): {1}".format(oserror.errno, oserror.strerror)
- def addToRaw(self, dataFile, dataType):
- '''
+ def addToRaw(self, dataFile, dataType, metaData):
+ """
Convert gff3, BED, blastxml and gtf files into gff3 files
and store converted files in folder 'raw'
- '''
+ """
fileName = os.path.basename(dataFile)
des_path = os.path.join(self.raw_folder, fileName)
@@ -61,18 +60,7 @@
utils.gtfToGff3(dataFile, des_path, self.chrom_size)
track['fileName'] = fileName
track['dataType'] = dataType
+ track.update(metaData)
+ if 'label' not in metaData.keys():
+ track['label'] = fileName
self.tracks.append(track)
-
-
-
-'''
- def checkGff3(self, dataFile, dataType):
- with open(dataFile, 'r') as f:
- for line in f:
- if not line.startswith('#'):
- seq_type = line.rstrip().split('\t')[2]
- if seq_type == 'transcript':
- return 'gff3-transcript'
- if seq_type == 'mRNA':
- return 'gff3'
-'''
\ No newline at end of file
diff -r daf6a1122200 -r d8049deb0c97 utils.py
--- a/utils.py Wed Mar 15 11:46:38 2017 -0400
+++ b/utils.py Fri Mar 17 12:28:32 2017 -0400
@@ -1,21 +1,21 @@
#!/usr/bin/env python
-'''
+"""
This file include common used functions for converting file format to gff3
-'''
+"""
from collections import OrderedDict
import json
import subprocess
import os
import tempfile
-
+import string
def write_features(field, attribute, gff3):
- '''
+ """
The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)
field, attribute are ordered dictionary
gff3 is the file handler
- '''
+ """
attr = []
for v in field.values():
gff3.write(str(v) + '\t')
@@ -47,10 +47,10 @@
return chrom_sizes
def sequence_region(chrom_sizes):
- '''
+ """
This function read from a chromatin size file generated by twoBitInfo and write the information to dict
return a dict
- '''
+ """
f = open(chrom_sizes, 'r')
sizes = f.readlines()
sizes_dict = {}
@@ -76,11 +76,11 @@
num = num + 1
def add_tracks_to_json(trackList_json, new_tracks, modify_type):
- '''
+ """
Add to track configuration (trackList.json)
# modify_type = 'add_tracks': add a new track like bam or bigwig, new_track = dict()
# modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict())
- '''
+ """
with open(trackList_json, 'r+') as f:
data = json.load(f)
if modify_type == 'add_tracks':
@@ -98,9 +98,9 @@
f.close()
def gtfToGff3(gtf_file, gff3_file, chrom_sizes):
- '''
+ """
Covert gtf file output from StringTie to gff3 format
- '''
+ """
gff3 = open(gff3_file, 'w')
gff3.write("##gff-version 3\n")
sizes_dict = sequence_region(chrom_sizes)
@@ -144,12 +144,15 @@
write_features(field, attribute, gff3)
gff3.close()
-def sanitize_name_path(input_path):
- '''
+
+def sanitize_name(input_name):
+ """
Galaxy will name all the files and dirs as *.dat,
- the function is simply replacing '.' to '_' for the dirs
- '''
- return input_path.replace('.', '_')
+ the function can replace '.' to '_' for the dirs
+ """
+ validChars = "_-%s%s" % (string.ascii_letters, string.digits)
+ sanitized_name = ''.join([c if c in validChars else '_' for c in input_name])
+ return "gonramp_" + sanitized_name
def createBamIndex(bamfile):
p = subprocess.Popen(['samtools', 'index', bamfile])