Mercurial > repos > yating-l > jbrowsearchivecreator
changeset 46:061da5d3a219 draft
planemo upload for repository https://github.com/goeckslab/jbrowse-archive-creator.git commit 3160592f4119e684ab5843dd28a2e6cf11df0121-dirty
line wrap: on
line diff
--- a/README.md Tue May 01 21:52:46 2018 -0400 +++ b/README.md Tue Jun 19 16:28:36 2018 -0400 @@ -1,5 +1,5 @@ # JBrowse Hub Creator -This Galaxy tool permits to prepare your files to be ready for JBrowse visualization. +This Galaxy tool is used to prepare your files to be ready for JBrowse visualization. ## Features 1. Similar interface to Hub Archive Creator. @@ -7,33 +7,36 @@ 3. Group the tracks 4. Set the color for each track 5. Set the label for each track -6. Create workflows within Galaxy to automatize pipeline analysis and get them ready to visualization inside JBrowse...in a few clicks! +6. Support generating Tabix Indexed CanVasFeatures tracks +7. Create workflows within Galaxy to automatize pipeline analysis and get them ready to visualization inside JBrowse...in a few clicks! At the moment, Supported datatypes are: -- Bam -- Bed +- BAM +- BED + - Generic BED - Splice Junctions (BED 12+1) - Simple Repeats (BED 4+12) + - BLAT alignment (BigPsl) + - BLAST alignment (BED 12+12) - BigWig -- Gff3 -- Gtf -- Blastxml -- BigPsl +- GFF3 +- GTF +- Blast XML output ## Installation: -1. You would need to add this tool into your Galaxy. - 1. (strongly preferred) **ToolShed Installation**: Tool is in [testtoolshed](https://testtoolshed.g2.bx.psu.edu/view/yating-l/jbrowse_hub/b7bf45272ab7) - 2. OR **Local Installation**: See https://wiki.galaxyproject.org/Admin/Tools/AddToolTutorial -2. The tool can be used with or without Conda (activate it in your galaxy.ini) -3. If installed without TS (by downloading on GitHub), you need to have all the binaries accessible within Galaxy. - You can use the script [install_linux_binaries](util/install_linux_binaries) with a linux x86-64 (64bits) + +**ToolShed Installation**: +- The JBrowse Archive Creator tool is published at [ToolShed Repository](https://toolshed.g2.bx.psu.edu/view/yating-l/jbrowsearchivecreator) + +- Refer to [Installing Tools into Galaxy](https://galaxyproject.org/admin/tools/add-tool-from-toolshed-tutorial) tutorial if you want to learn how to install a tool from ToolShed. + ## Future See [TODO.md](todo.md) for more information ## Contribute -- Source Code: https://github.com/Yating-L/jbrowse_hub +- Source Code: https://github.com/goeckslab/jbrowse-archive-creator.git ## Support @@ -41,7 +44,7 @@ - For more information about how to use G-OnRamp: - [Wilson Leung](wleung@wustl.edu) - Product owner and developer - - [Yating Liu](yliu41@wustl.edu) - Community manager and Developer + - [Yating Liu](yliu41@wustl.edu) - Community manager and developer - For more information about the project vision, or for partneship: - [Elgin, Sarah](selgin@wustl.edu) - PI
--- a/TrackHub.py Tue May 01 21:52:46 2018 -0400 +++ b/TrackHub.py Tue Jun 19 16:28:36 2018 -0400 @@ -51,10 +51,12 @@ if Datatype.trackType == 'HTMLFeatures': self.myTrackStyle = TrackStyles(self.tool_directory, self.mySpecieFolderPath, self.trackList) self.logger = logging.getLogger(__name__) + self.nameIndexTrackList = [] - def addTrack(self, trackDbObject): + if trackDbObject['nameIndex'] == "true": + self.nameIndexTracksList.append(trackDbObject['trackLabel']) if trackDbObject['dataType'].lower() == 'bam': subtools.add_track_json(self.trackList, trackDbObject['options']) elif trackDbObject['dataType'].lower() == 'bigwig': @@ -104,7 +106,7 @@ subtools.prepare_refseqs(self.reference_genome.false_path, self.mySpecieFolderPath) def _indexName(self): - subtools.generate_names(self.mySpecieFolderPath) + subtools.generate_names(self.mySpecieFolderPath, self.nameIndexTrackList) print "finished name index \n" def _outHtml(self):
--- a/datatypes/Datatype.py Tue May 01 21:52:46 2018 -0400 +++ b/datatypes/Datatype.py Tue Jun 19 16:28:36 2018 -0400 @@ -49,6 +49,7 @@ self.track = None self.trackSettings = dict() self.extraSettings = collections.OrderedDict() + self.nameIndex = False @staticmethod @@ -98,6 +99,8 @@ self.extraSettings["category"] = self.trackSettings["group_name"] if "track_color" in self.trackSettings and self.trackSettings["track_color"]: self.extraSettings["color"] = self.trackSettings["track_color"] + #store information of whether to generate name index for the track + self.extraSettings["nameIndex"] = self.trackSettings["nameIndex"] @abc.abstractmethod
--- a/datatypes/converters/blastxmlToGff3.py Tue May 01 21:52:46 2018 -0400 +++ b/datatypes/converters/blastxmlToGff3.py Tue Jun 19 16:28:36 2018 -0400 @@ -65,6 +65,7 @@ group['parent_field']['source'] = source group['parent_field']['type'] = 'match' group['parent_attribute']['ID'] = contig_name + '_' + query_name + group['parent_attribute']['Name'] = query_name group['parent_attribute']['method'] = method group['parent_attribute']['length'] = length if contig_name not in seq_regions:
--- a/datatypes/interval/BedBlastAlignments.py Tue May 01 21:52:46 2018 -0400 +++ b/datatypes/interval/BedBlastAlignments.py Tue Jun 19 16:28:36 2018 -0400 @@ -14,7 +14,7 @@ super(BedBlastAlignments, self).__init__(input_bed_blast_alignments_false_path, data_bed_blast_alignments) #self.seqType = 1 - self.trackType = "G-OnRamp_plugin/BlastAlignment" + #self.trackType = "G-OnRamp_plugin/BlastAlignment" def initSettings(self): super(BedBlastAlignments, self).initSettings()
--- a/datatypes/interval/BigPsl.py Tue May 01 21:52:46 2018 -0400 +++ b/datatypes/interval/BigPsl.py Tue Jun 19 16:28:36 2018 -0400 @@ -26,7 +26,7 @@ def initSettings(self): super(BigPsl, self).initSettings() self.extraSettings["glyph"] = "JBrowse/View/FeatureGlyph/Segments" - #self.extraSettings["subfeatureClasses"] = "match_part" + def validateData(self): self.validateOptions = self.getValidateOptions(tab="True", autoSql=self.autoSql)
--- a/datatypes/validators/GtfValidation.py Tue May 01 21:52:46 2018 -0400 +++ b/datatypes/validators/GtfValidation.py Tue Jun 19 16:28:36 2018 -0400 @@ -74,7 +74,7 @@ start_position = fields[3] end_position = fields[4] - if start_position > 0 and end_position <= scaffold_size: + if int(start_position) > 0 and int(end_position) <= int(scaffold_size): # We are good, so we copy this line tmp.write(line) tmp.write(os.linesep)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jbrowse-archive-creator.iml Tue Jun 19 16:28:36 2018 -0400 @@ -0,0 +1,9 @@ +<?xml version="1.0" encoding="UTF-8"?> +<module type="PYTHON_MODULE" version="4"> + <component name="NewModuleRootManager" inherit-compiler-output="true"> + <exclude-output /> + <content url="file://$MODULE_DIR$" /> + <orderEntry type="inheritedJdk" /> + <orderEntry type="sourceFolder" forTests="false" /> + </component> +</module> \ No newline at end of file
--- a/jbrowseArchiveCreator.xml Tue May 01 21:52:46 2018 -0400 +++ b/jbrowseArchiveCreator.xml Tue Jun 19 16:28:36 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="jbrowse_hub" name="JBrowse Archive Creator" version="2.1.2"> +<tool id="jbrowse_hub" name="JBrowse Archive Creator" version="2.2.0"> <description> This Galaxy tool is used to prepare your files to be ready for displaying on JBrowse with Apollo plugin </description> @@ -81,6 +81,7 @@ #set track_color = str($f.formatChoice.track_color) #set group_name = str($g.group_name) #set longLabel = str($f.formatChoice.longLabel) + #set extra_data_dict = {"track_color": $track_color, "group_name": $group_name, "long_label": $longLabel} @@ -88,11 +89,12 @@ #set bam_index = $f.formatChoice.BAM.metadata.bam_index ## Add Bam format specific fields - #silent $extra_data_dict.update({"index": $bam_index}) + #silent $extra_data_dict.update({"index": $bam_index, "nameIndex": str($f.formatChoice.nameIndex)}) #silent $prepare_json("Bam", $f.formatChoice.BAM, $index_track_final, $extra_data_dict) #end if #if $f.formatChoice.format_select == "bed" + #silent $extra_data_dict.update({"nameIndex": str($f.formatChoice.bedChoice.nameIndex)}) #if $f.formatChoice.bedChoice.bed_select == "bed_generic" #silent $prepare_json("Bed", $f.formatChoice.bedChoice.BED_generic, $index_track_final, $extra_data_dict) @@ -119,18 +121,22 @@ #end if #end if #if $f.formatChoice.format_select == "blastxml" - #silent $prepare_json("BlastXml", $f.formatChoice.BlastXML, $index_track_final, + #silent $extra_data_dict.update({"nameIndex": str($f.formatChoice.nameIndex)}) + #silent $prepare_json("BlastXml", $f.formatChoice.BlastXML, $index_track_final, extra_data_dict) #end if #if $f.formatChoice.format_select == "bigwig" + #silent $extra_data_dict.update({"nameIndex": str($f.formatChoice.nameIndex)}) #silent $prepare_json("BigWig", $f.formatChoice.BIGWIG, $index_track_final, $extra_data_dict) #end if #if $f.formatChoice.format_select == 'gff3' + #silent $extra_data_dict.update({"nameIndex": str($f.formatChoice.nameIndex)}) #silent $prepare_json("Gff3", $f.formatChoice.GFF3, $index_track_final, $extra_data_dict) #end if #if $f.formatChoice.format_select == "gtf" + #silent $extra_data_dict.update({"nameIndex": str($f.formatChoice.nameIndex)}) ## Add also GTF from Agustus? See https://github.com/ENCODE-DCC/kentUtils/issues/8 #silent $prepare_json("Gtf", $f.formatChoice.GTF, $index_track_final, $extra_data_dict) @@ -200,11 +206,12 @@ </valid> </sanitizer> </param> + <param name="nameIndex" type="boolean" value="false" label="Do you want to builds name indexes for this track to enable search for a feature by name" /> </when> <when value="bed"> <conditional name="bedChoice"> <param name="bed_select" type="select" label="Bed Choice"> - <option value="bed_generic">BED format</option> + <option value="bed_generic">BED Generic</option> <option value="bed_simple_repeats_option">BED Simple repeat (bed4+12 / simpleRepeat.as)</option> <option value="bed_splice_junctions_option">BED Splice junctions (bed12+1 / spliceJunctions.as)</option> <option value="bed_blast_alignment_option">Blast alignments (bed12+12 / bigPsl.as)</option> @@ -217,6 +224,7 @@ type="data" label="Bed File" /> + <param name="nameIndex" type="boolean" value="false" label="Do you want to builds name indexes for this track to enable search for a feature by name" /> </when> <when value="bed_simple_repeats_option"> <param @@ -225,6 +233,7 @@ type="data" label="Bed Simple Repeats (Bed4+12) File" /> + <param name="nameIndex" type="boolean" value="false" label="Do you want to builds name indexes for this track to enable search for a feature by name" /> </when> <when value="bed_splice_junctions_option"> <param @@ -233,6 +242,7 @@ type="data" label="Bed Splice Junctions (Bed12+1) File" /> + <param name="nameIndex" type="boolean" value="false" label="Do you want to builds name indexes for this track to enable search for a feature by name" /> </when> <when value="bed_blast_alignment_option"> <param @@ -241,6 +251,7 @@ type="data" label="Bed Blast Alignments (Bed12+12) File" /> + <param name="nameIndex" type="boolean" value="true" label="Do you want to builds name indexes for this track to enable search for a feature by name" /> </when> <when value="bed_blat_alignment_option"> <param @@ -249,6 +260,7 @@ type="data" label="Bed BLAT Alignments (bigPsl) File" /> + <param name="nameIndex" type="boolean" value="true" label="Do you want to builds name indexes for this track to enable search for a feature by name" /> </when> </conditional> <param name="longLabel" type="text" size="30" label="Track label" /> @@ -275,6 +287,7 @@ </valid> </sanitizer> </param> + <param name="nameIndex" type="boolean" value="true" label="Do you want to builds name indexes for this track to enable search for a feature by name" /> </when> <when value="bigwig"> <param @@ -291,6 +304,7 @@ </valid> </sanitizer> </param> + <param name="nameIndex" type="boolean" value="false" label="Do you want to builds name indexes for this track to enable search for a feature by name" /> </when> <when value="gff3"> <param @@ -307,6 +321,7 @@ </valid> </sanitizer> </param> + <param name="nameIndex" type="boolean" value="false" label="Do you want to builds name indexes for this track to enable search for a feature by name" /> </when> <when value="gtf"> <param @@ -322,8 +337,9 @@ <add value="#"/> </valid> </sanitizer> + <param name="nameIndex" type="boolean" value="false" label="Do you want to builds name indexes for this track to enable search for a feature by name" /> </param> - </when> + </when> </conditional> </repeat> </repeat>
--- a/tracks/CanvasFeatures.py Tue May 01 21:52:46 2018 -0400 +++ b/tracks/CanvasFeatures.py Tue Jun 19 16:28:36 2018 -0400 @@ -26,10 +26,12 @@ if self.dataType == 'gff': # need .gff3.gz extension to index the name of the track with generate-name.pl track['urlTemplate'] = os.path.join('tracks', self.trackName + '.gff3.gz') - # needed to show match_part in Blat and Blast alignment as subfeatures - track['glyph'] = "JBrowse/View/FeatureGlyph/Segments" else: track['urlTemplate'] = os.path.join('tracks', self.trackName) + if 'glyph' in self.extraSettings: + track['glyph'] = self.extraSettings['glyph'] + if 'subfeatureClasses' in self.extraSettings: + track['subfeatureClasses'] = self.extraSettings['subfeatureClasses'] track['label'] = self.trackLabel track['category'] = self.extraSettings['category'] track['style'] = self.extraSettings['style']
--- a/tracks/TrackDb.py Tue May 01 21:52:46 2018 -0400 +++ b/tracks/TrackDb.py Tue Jun 19 16:28:36 2018 -0400 @@ -40,7 +40,8 @@ ("dataType", self.dataType), ("trackType", self.trackType)] ) - + + self.track_db["nameIndex"] = self.extraSettings['nameIndex'] extraConfigs = self.prepareExtraSetting() self.logger.debug("Generate extraConfigs = %s", json.dumps(extraConfigs))
--- a/util/Reader.py Tue May 01 21:52:46 2018 -0400 +++ b/util/Reader.py Tue Jun 19 16:28:36 2018 -0400 @@ -120,11 +120,15 @@ # TODO: Optimize this double loop for input_data in array_inputs: input_false_path = input_data["false_path"] - input_data["name"] = santitizer.sanitize_name_input(input_data["name"]) - extensionObject = ExtensionClass(input_false_path, input_data) - extensionObject.generateCustomTrack() - datatype_dictionary.update({input_data["order_index"]: extensionObject}) - self.logger.debug("%s object: %s has been created", ExtensionClass, input_data["name"]) + # if the file is empty, skip the rest + if os.path.isfile(input_false_path) and os.path.getsize(input_false_path) > 0: + input_data["name"] = santitizer.sanitize_name_input(input_data["name"]) + extensionObject = ExtensionClass(input_false_path, input_data) + extensionObject.generateCustomTrack() + datatype_dictionary.update({input_data["order_index"]: extensionObject}) + self.logger.debug("%s object: %s has been created", ExtensionClass, input_data["name"]) + else: + self.logger.info("The input file: %s is empty, skip creating the track for this data", input_data["name"]) return datatype_dictionary
--- a/util/subtools.py Tue May 01 21:52:46 2018 -0400 +++ b/util/subtools.py Tue Jun 19 16:28:36 2018 -0400 @@ -382,8 +382,11 @@ p = _handleExceptionAndCheckCall(array_call) return p -def generate_names(outputFolder, hashBits=4): - array_call = ['generate-names.pl', '--hashBits', '4', '-v', '--out', outputFolder] +def generate_names(outputFolder, nameIndexTrackList, hashBits=4): + array_call = ['generate-names.pl', '--hashBits', hashBits, '-v', '--completionLimit', '0', '--out', outputFolder] + if nameIndexTrackList: + array_call.append('--tracks') + array_call.append(','.join(nameIndexTrackList)) p = _handleExceptionAndCheckCall(array_call) return p