HOMER database files

# HG changeset patch # User bgruening # Date 1377551638 14400 # Node ID f17bdf6f27bd35aaba9c9293c3c164ae1ff0699c # Parent b2e673e1db33e4ab139eefa88205bd426ef2b680 Deleted selected files diff -r b2e673e1db33 -r f17bdf6f27bd README.rst --- a/README.rst Mon Aug 26 17:13:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -Galaxy datatypes for HOMER tools -================================ - -These HOMER datatypes are copyright 2013 by Björn Grüning. - -See the licence text below. - - -History -======= - -======= ====================================================================== -Version Changes -------- ---------------------------------------------------------------------- -v0.0.1 - First release. -======= ====================================================================== - - -Installation -============ - -Doing this automatically via the Galaxy Tool Shed is probably simplest. - - -Manual Installation -=================== - -Normally you would install this via the Galaxy ToolShed, which would move -the provided homer.py file into a suitable location and process the -datatypes_conf.xml entry to be combined with your local configuration. - -However, if you really want to this should work for a manual install. Add -the following lines to the datatypes_conf.xml file in the Galaxy main folder:: - - - -Also create the file lib/galaxy/datatypes/homer.py by moving, copying or linking -the homer.py file provided in this tar-ball. Finally add 'import homer' near -the start of file lib/galaxy/datatypes/registry.py (after the other import -lines). - - -Bug Reports -=========== - -You can file an issue here https://github.com/bgruening/galaxytools/issues or ask -us on the Galaxy development list http://lists.bx.psu.edu/listinfo/galaxy-dev - - -Developers -========== - -Development is happening here: - - https://github.com/bgruening/galaxytools/ - - -Licence (MIT) -============= - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - -NOTE: This is the licence for the Galaxy HOMER datatypes **only**. HOMER -and associated data files are available and licenced separately. diff -r b2e673e1db33 -r f17bdf6f27bd datatypes_conf.xml --- a/datatypes_conf.xml Mon Aug 26 17:13:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ - - - - - - - - - - - diff -r b2e673e1db33 -r f17bdf6f27bd homer.py --- a/homer.py Mon Aug 26 17:13:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,102 +0,0 @@ -""" -HOMER special datatypes -""" -import os -from galaxy.datatypes.data import get_file_peek -from galaxy.datatypes.data import Text, Data -from galaxy.datatypes.metadata import MetadataElement -from galaxy.datatypes.images import Html - - -class TagDirectory( Html ): - """Base class for HOMER's Tag Directory datatype.""" - - file_ext = 'homer_tagdir' - composite_type = 'auto_primary_file' - allow_datatype_change = False - - def __init__(self, **kwd): - Html.__init__( self, **kwd ) - #self.add_composite_file('tagInfo.txt', description = 'basic configuration information', mimetype = 'text/html') # Contains basic configuration information - self.add_composite_file('tagLengthDistribution.txt', description = 'histogram of read lengths used for alignment', mimetype = 'text/html') # File contains a histogram of read lengths used for alignment. - self.add_composite_file('tagCountDistribution.txt', description = 'histogram of clonal read depth, showing the number of reads per unique position', mimetype = 'text/html') # File contains a histogram of clonal read depth, showing the number of reads per unique position. - self.add_composite_file('tagAutocorrelation.txt', description = 'distribution of distances between adjacent reads in the genome', mimetype = 'text/html') # The autocorrelation routine creates a distribution of distances between adjacent reads in the genome. - self.add_composite_file('tagFreq.txt', description = "nucleotide and dinucleotide frequencies as a function of distance from the 5' end of all reads", mimetype = 'text/html', optional=True) # Calculates the nucleotide and dinucleotide frequencies as a function of distance from the 5' end of all reads. - self.add_composite_file('tagFreqUniq.txt', description = "nucleotide and dinucleotide frequencies as a function of distance from the 5' end of all reads (counted only once)", mimetype = 'text/html', optional=True) # Same as tagFreq.txt, however individual genomic positions are only counted once. - self.add_composite_file('tagGCcontent.txt', description = 'Distribution of fragment GC%-content', mimetype = 'text/html', optional=True) # Distribution of fragment GC%-content. - self.add_composite_file('genomeGCcontent.txt', description = 'Distribution of fragment GC%-content at each location in the genome', mimetype = 'text/html', optional=True) # Distribution of fragment GC%-content at each location in the genome. - - - def regenerate_primary_file(self,dataset): - """ - regenerate the index file after metadata generation - """ - rval = ['HOMER database files'] - rval.append('') - rval.append('

CuffDiff Outputs:

%s' % ( sfname, sfname ) ) - rval.append( '

' ) - f = file( dataset.file_name, 'w' ) - f.write( '%s\n' % '\n'.join( rval ) ) - f.close() - if not dataset.info: - dataset.info = 'HOMER datatype object' - if not dataset.blurb: - dataset.blurb = 'Composite file - HOMER' - return True - - def generate_primary_file( self, dataset = None ): - rval = ['HOMER database files

%s%s' % ( composite_name, composite_name, opt_text ) ) - rval.append( '

' ) - return "\n".join( rval ) - - def set_meta( self, dataset, **kwd ): - Html.set_meta( self, dataset, **kwd ) - self.regenerate_primary_file(dataset) - - - def display_data(self, trans, data, preview=False, filename=None, - to_ext=None, size=None, offset=None, **kwd): - """Apparently an old display method, but still gets called. - - This allows us to format the data shown in the central pane via the "eye" icon. - """ - return "This is a HOMER database." - - def set_peek( self, dataset, is_multi_byte=False ): - """Set the peek and blurb text.""" - if not dataset.dataset.purged: - dataset.peek = "HOMER database (multiple files)" - dataset.blurb = "HOMER database (multiple files)" - else: - dataset.peek = 'file does not exist' - dataset.blurb = 'file purged from disk' - - def display_peek( self, dataset ): - """Create HTML content, used for displaying peek.""" - try: - return dataset.peek - except: - return "HOMER database (multiple files)" - - def get_mime(self): - """Returns the mime type of the datatype (pretend it is text for peek)""" - return 'text/plain' - - def merge(split_files, output_file): - """Merge HOMER databases (not implemented).""" - raise NotImplementedError("Merging HOMER databases is not supported") - - def split( cls, input_datasets, subdir_generator_function, split_params): - """Split a HOMER database (not implemented).""" - if split_params is None: - return None - raise NotImplementedError("Can't split HOMER databases") - diff -r b2e673e1db33 -r f17bdf6f27bd tool-data/homer.loc.sample --- a/tool-data/homer.loc.sample Mon Aug 26 17:13:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -#This is a sample file distributed with Galaxy that is used to define a -#list of homer installations with associated databases, using three columns tab separated -#(longer whitespace are TAB characters): -# -#That files should enable the administrator the establish some kind of -#reproducibility of HOMER data. It is recommended to install HOMER from scratch -#in a new folder and define the location here. If you want to update your HOMER -#installation consider to check out a completly new HOMER version in a new PATH. -# -#The entries are as follows: -# -# -# -#Your homer.loc file should include an entry per line for each "base name" -#you have stored. For example: -# -#homer_08_Aug_2013 HOMER 4.2 08 Aug 2013 /data/0/galaxy_data/homer/08_08_2013/ -#homer_02_July_2013 HOMER 4.2 02 July 2013 /data/0/galaxy_data/homer/02_07_2013/ -#homer_1_Jan_2013 HOMER 4.1 1 Jan 2013 /data/0/galaxy_data/homer/01_01_2013 - -#...etc... -# -#You can install and populate HOMER with all relevant data, following the instructions here: -#http://biowhat.ucsd.edu/homer/introduction/install.html -# -#The Galaxy Toolshed will take care to install all requirements, but they are only -#accessible during execution time. So feel free to ignore warnings during the installation, -#that your are missing weblogo, blat and Co. - - diff -r b2e673e1db33 -r f17bdf6f27bd tool-data/homer_available_genomes.loc.sample --- a/tool-data/homer_available_genomes.loc.sample Mon Aug 26 17:13:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -hg18 -hg19 -mm9 -mm10 diff -r b2e673e1db33 -r f17bdf6f27bd tool_dependencies.xml --- a/tool_dependencies.xml Mon Aug 26 17:13:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ - - - - - - - - ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.26/ncbi-blast-2.2.26+-x64-linux.tar.gz - ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.26/ncbi-blast-2.2.26+-ia32-linux.tar.gz - ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.26/ncbi-blast-2.2.26+-ia32-linux.tar.gz - ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.26/ncbi-blast-2.2.26+-universal-macosx.tar.gz - ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.26/ncbi-blast-2.2.26+-universal-macosx.tar.gz - ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.26/ncbi-blast-2.2.26+-universal-macosx.tar.gz - - - - tar -zxvf $INSTALL_DIR/ncbi-blast-2.2.26+-x64-linux.tar.gz ; - tar -zxvf $INSTALL_DIR/ncbi-blast-2.2.26+-ia32-linux.tar.gz ; - tar -zxvf $INSTALL_DIR/ncbi-blast-2.2.26+-universal-macosx.tar.gz ; - - - $INSTALL_DIR - - - - -Downloads the precompiled 32bit Linux, 64bit Linux, or Mac OS X BLAST+ -binaries from the NCBI, which is faster than performing a local compliation, -avoids any issues with build dependencies, and is more reproducible between -installations as there is no variability from the compiler or library versions. - -For more details, see: -http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download - - - diff -r b2e673e1db33 -r f17bdf6f27bd tools/findMotifsGenome.xml --- a/tools/findMotifsGenome.xml Mon Aug 26 17:13:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,134 +0,0 @@ - - - - blat - weblogo - ghostscript - - - #import os - #import tempfile - - #set $tmpdir = os.path.abspath( tempfile.mkdtemp() ) - export PATH=\$PATH:$database.fields.path; - - findMotifsGenome.pl $infile ${infile.metadata.dbkey} $tmpdir - - -p 4 - $mask - -size $size - -len $motif_len - -mis $mismatches - -S $number_of_motifs - $noweight - $cpg - -nlen $nlen - -olen $olen - $hypergeometric - $norevopp - $rna - - #if $bg_infile: - -bg $bg_infile - #end if - - #if $logfile_output: - 2> $out_logfile - #else: - 2>&1 - #end if - - ; - cp $tmpdir/knownResults.txt $known_results_tabular; - - #if $concat_motifs_output: - cp $tmpdir/homerMotifs.all.motifs $out_concat_motifs; - #end if - - #if $html_output: - #set $go_path = os.path.join($tmpdir, 'geneOntology.html') - - mkdir $denovo_results_html.files_path; - cp $tmpdir/homerResults.html $denovo_results_html; - cp $tmpdir/homerResults.html "$denovo_results_html.files_path"; - cp -r $tmpdir/homerResults/ "$denovo_results_html.files_path"; - - - mkdir "$known_results_html.files_path"; - cp $tmpdir/knownResults.html $known_results_html; - cp $tmpdir/knownResults.html "$known_results_html.files_path"; - cp $tmpdir/homerResults.html "$known_results_html.files_path"; - cp -r $tmpdir/knownResults/ "$known_results_html.files_path"; - - #if os.path.exists( $go_path ): - cp $go_path "$denovo_results_html.files_path"; - cp $go_path "$known_results_html.files_path"; - #end if - - #end if - - ##rm -rf $tmpdir - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - html_output is True - - - html_output is True - - - concat_motifs_output is True - - - logfile_output is True - - - - - - - - - - - .. class:: infomark - - **Homer findMotifsGenome** - -Autonormalization attempts to remove sequence bias from lower order oligos (1-mers, 2-mers ... up to #). -Region level autonormalization, which is for 1/2/3 mers by default, attempts to normalize background regions by adjusting their weights. -If this isn't getting the job done (autonormalization is not guaranteed to remove all sequence bias), you can try the more aggressive motif level autonormalization (-olen #). -This performs the autonormalization routine on the oligo table during de novo motif discovery. - - - - diff -r b2e673e1db33 -r f17bdf6f27bd tools/findPeaks.xml --- a/tools/findPeaks.xml Mon Aug 26 17:13:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ - - - - blat - weblogo - ghostscript - - - - export PATH=\$PATH:$database.fields.path; - - findPeaks $affected_tag_dir.extra_files_path -o $outputPeakFile - - #if $control_tag_dir: - -i $control_tag_dir.extra_files_path - #end if - - #if $logfile_output: - 2> $out_logfile - #else: - 2>&1 - #end if - - - - - - - - - - - - - - - - - - - .. class:: infomark - - **Homer findPeaks** - -Requires tag directories (see makeTagDirectory) - - - - diff -r b2e673e1db33 -r f17bdf6f27bd tools/homer_macros.xml --- a/tools/homer_macros.xml Mon Aug 26 17:13:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ - - - - - - - - - - - diff -r b2e673e1db33 -r f17bdf6f27bd tools/makeTagDirectory.xml --- a/tools/makeTagDirectory.xml Mon Aug 26 17:13:45 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,138 +0,0 @@ - - - blat - weblogo - ghostscript - - (TagDirectory) - - #set $HOMER_PATH = str($database.fields.path) - export PATH=\$PATH:$database.fields.path; - - makeTagDirectory $tag_dir.extra_files_path - #for $infile in $alignment_files: - $infile.file - #end for - - #if $logfile_output: - 2> $out_logfile - #else: - 2>&1 - #end if - - - - - - - - - - - - - - - - - - - - - .. class:: infomark - - **Homer makeTagDirectory** - - For more options, look under: "Command line options" - - http://biowhat.ucsd.edu/homer/ngs/tagDir.html - -**Parameter list** - -Command line options (not all of them are supported):: - - Usage: makeTagDirectory <directory> <alignment file 1> [file 2] ... [options] - - Creates a platform-independent 'tag directory' for later analysis. - Currently BED, eland, bowtie, and sam files are accepted. The program will try to - automatically detect the alignment format if not specified. Program will also - unzip *.gz, *.bz2, and *.zip files and convert *.bam to sam files on the fly - Existing tag directories can be added or combined to make a new one using -d/-t - If more than one format is needed and the program cannot auto-detect it properly, - make separate tag directories by running the program separately, then combine them. - To perform QC/manipulations on an existing tag directory, add "-update" - - Options: - -fragLength <# | given> (Set estimated fragment length - given: use read lengths) - By default treats the sample as a single read ChIP-Seq experiment - -format <X> where X can be: (with column specifications underneath) - bed - BED format files: - (1:chr,2:start,3:end,4:+/- or read name,5:# tags,6:+/-) - -force5th (5th column of BED file contains # of reads mapping to position) - sam - SAM formatted files (use samTools to covert BAMs into SAM if you have BAM) - -unique (keep if there is a single best alignment based on mapq) - -mapq <#> (Minimum mapq for -unique, default: 10, set negative to use AS:i:/XS:i:) - -keepOne (keep one of the best alignments even if others exist) - -keepAll (include all alignments in SAM file) - -mis (Maximum allowed mismatches, default: no limit, uses MD:Z: tag) - bowtie - output from bowtie (run with --best -k 2 options) - (1:read name,2:+/-,3:chr,4:position,5:seq,6:quality,7:NA,8:misInfo) - eland_result - output from basic eland - (1:read name,2:seq,3:code,4:#zeroMM,5:#oneMM,6:#twoMM,7:chr, - 8:position,9:F/R,10-:mismatches - eland_export - output from illumina pipeline (22 columns total) - (1-5:read name info,9:sequence,10:quality,11:chr,13:position,14:strand) - eland_extended - output from illumina pipeline (4 columns total) - (1:read name,2:sequence,3:match stats,4:positions[,]) - mCpGbed - encode style mCpG reporting in extended BED format, no auto-detect - (1:chr,2:start,3:end,4:name,5:,6:+/-,7:,8:,9:,10:#C,11:#mC) - allC - Lister style output files detailing the read information about all cytosines - (1:chr,2:pos,3:strand,4:context,#mC,#totalC,#C - -minCounts <#> (minimum number of reads to report mC/C ratios, default: 10) - -mCcontext <CG|CHG|CHH|all> (only use C's in this context, default: CG) - HiCsummary - minimal paired-end read mapping information - (1:readname,2:chr1,3:5'pos1,4:strand1,5:chr2,6:5'pos2,7:strand2) - -force5th (5th column of BED file contains # of reads mapping to position) - -d <tag directory> [tag directory 2] ... (add Tag directory to new tag directory) - -t <tag file> [tag file 2] ... (add tag file i.e. *.tags.tsv to new tag directory) - -single (Create a single tags.tsv file for all "chromosomes" - i.e. if >100 chromosomes) - -update (Use current tag directory for QC/processing, do not parse new alignment files) - -tbp <#> (Maximum tags per bp, default: no maximum) - -precision <1|2|3> (number of decimal places to use for tag totals, default: 1) - - GC-bias options: - -genome <genome version> (To see available genomes, use "-genome list") - -or- (for custom genomes): - -genome <path-to-FASTA file or directory of FASTA files> - - -checkGC (check Sequence bias, requires "-genome") - -freqStart <#> (offset to start calculating frequency, default: -50) - -freqEnd <#> (distance past fragment length to calculate frequency, default: +50) - -oligoStart <#> (oligo bias start) - -oligoEnd <#> (oligo bias end) - -normGC <target GC profile file> (i.e. tagGCcontent.txt file from control experiment) - Use "-normGC default" to match the genomic GC distribution - -normFixedOligo <oligoFreqFile> (normalize 5' end bias, "-normFixedOligo default" ok) - -minNormRatio <#> (Minimum deflation ratio of tag counts, default: 0.25) - -maxNormRatio <#> (Maximum inflation ratio of tag counts, default: 2.0) - -iterNorm <#> (Sets -max/minNormRatio to 1 and 0, iteratively normalizes such that the - resulting distrubtion is no more than #% different than target, i.e. 0.1,default: off) - - Paired-end/HiC options - -illuminaPE (when matching PE reads, assumes last character of read name is 0 or 1) - -removePEbg (remove paired end tags within 1.5x fragment length on same chr) - -PEbgLength <#> (remove PE reads facing on another within this distance, default: 1.5x fragLen) - -restrictionSite <seq> (i.e. AAGCTT for HindIII, assign data < 1.5x fragment length to sites) - Must specify genome sequence directory too. (-rsmis <#> to specify mismatches, def: 0) - -both, -one, -onlyOne, -none (Keeps reads near restriction sites, default: keep all) - -removeSelfLigation (removes reads linking same restriction fragment) - -removeRestrictionEnds (removes reads starting on a restriction fragment) - -assignMidPoint (will place reads in the middle of HindIII fragments) - -restrictionSiteLength <#> (maximum distance from restriction site, default: 1.5x fragLen) - -removeSpikes <size bp> <#> (remove tags from regions with > than # times - the average tags per size bp, suggest "-removeSpikes 10000 5") - - - - -