HOMER database files

Files created by makeTagDirectory

\n'] + + flist.sort() + for i,f in enumerate(flist): + if not(os.path.isdir(f)): + fn = os.path.split(f)[-1] + res.append('\n' % (fn,getFileString(fn, dr))) + + res.append('

\n') + + return res + +if __name__ == '__main__': + op = optparse.OptionParser() + op.add_option('-e', '--executable', default='makeTagDirectory') + op.add_option('-o', '--htmloutput', default=None) + op.add_option('-f', '--format', default="sam") + opts, args = op.parse_args() + #assert os.path.isfile(opts.executable),'## makeTagDirectory.py error - cannot find executable %s' % opts.executable + + #if not os.path.exists(opts.outputdir): + #os.makedirs(opts.outputdir) + f = makeTagDirectory(opts, args) + + html,retval = f.run_makeTagDirectory() + f = open(opts.htmloutput, 'w') + f.write(''.join(html)) + f.close() + if retval <> 0: + print >> sys.stderr, serr # indicate failure + + + diff -r a281b5931ffb -r 675d25a0b9d4 tools/makeTagDirectory.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/makeTagDirectory.xml Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,139 @@ + + + blat + weblogo + ghostscript + + (TagDirectory). Used by findPeaks + + + + export PATH=$PATH:$database.fields.path; + + makeTagDirectory $tagdir + #for $infile in alignment_files: + $infile + #end for + + + + + + + + + + + + + + + + + + + + + + + + + + + .. class:: infomark + + **Homer makeTagDirectory** + + For more options, look under: "Command line options" + + http://biowhat.ucsd.edu/homer/ngs/tagDir.html + +**Parameter list** + +Command line options (not all of them are supported):: + + Usage: makeTagDirectory <directory> <alignment file 1> [file 2] ... [options] + + Creates a platform-independent 'tag directory' for later analysis. + Currently BED, eland, bowtie, and sam files are accepted. The program will try to + automatically detect the alignment format if not specified. Program will also + unzip *.gz, *.bz2, and *.zip files and convert *.bam to sam files on the fly + Existing tag directories can be added or combined to make a new one using -d/-t + If more than one format is needed and the program cannot auto-detect it properly, + make separate tag directories by running the program separately, then combine them. + To perform QC/manipulations on an existing tag directory, add "-update" + + Options: + -fragLength <# | given> (Set estimated fragment length - given: use read lengths) + By default treats the sample as a single read ChIP-Seq experiment + -format <X> where X can be: (with column specifications underneath) + bed - BED format files: + (1:chr,2:start,3:end,4:+/- or read name,5:# tags,6:+/-) + -force5th (5th column of BED file contains # of reads mapping to position) + sam - SAM formatted files (use samTools to covert BAMs into SAM if you have BAM) + -unique (keep if there is a single best alignment based on mapq) + -mapq <#> (Minimum mapq for -unique, default: 10, set negative to use AS:i:/XS:i:) + -keepOne (keep one of the best alignments even if others exist) + -keepAll (include all alignments in SAM file) + -mis (Maximum allowed mismatches, default: no limit, uses MD:Z: tag) + bowtie - output from bowtie (run with --best -k 2 options) + (1:read name,2:+/-,3:chr,4:position,5:seq,6:quality,7:NA,8:misInfo) + eland_result - output from basic eland + (1:read name,2:seq,3:code,4:#zeroMM,5:#oneMM,6:#twoMM,7:chr, + 8:position,9:F/R,10-:mismatches + eland_export - output from illumina pipeline (22 columns total) + (1-5:read name info,9:sequence,10:quality,11:chr,13:position,14:strand) + eland_extended - output from illumina pipeline (4 columns total) + (1:read name,2:sequence,3:match stats,4:positions[,]) + mCpGbed - encode style mCpG reporting in extended BED format, no auto-detect + (1:chr,2:start,3:end,4:name,5:,6:+/-,7:,8:,9:,10:#C,11:#mC) + allC - Lister style output files detailing the read information about all cytosines + (1:chr,2:pos,3:strand,4:context,#mC,#totalC,#C + -minCounts <#> (minimum number of reads to report mC/C ratios, default: 10) + -mCcontext <CG|CHG|CHH|all> (only use C's in this context, default: CG) + HiCsummary - minimal paired-end read mapping information + (1:readname,2:chr1,3:5'pos1,4:strand1,5:chr2,6:5'pos2,7:strand2) + -force5th (5th column of BED file contains # of reads mapping to position) + -d <tag directory> [tag directory 2] ... (add Tag directory to new tag directory) + -t <tag file> [tag file 2] ... (add tag file i.e. *.tags.tsv to new tag directory) + -single (Create a single tags.tsv file for all "chromosomes" - i.e. if >100 chromosomes) + -update (Use current tag directory for QC/processing, do not parse new alignment files) + -tbp <#> (Maximum tags per bp, default: no maximum) + -precision <1|2|3> (number of decimal places to use for tag totals, default: 1) + + GC-bias options: + -genome <genome version> (To see available genomes, use "-genome list") + -or- (for custom genomes): + -genome <path-to-FASTA file or directory of FASTA files> + + -checkGC (check Sequence bias, requires "-genome") + -freqStart <#> (offset to start calculating frequency, default: -50) + -freqEnd <#> (distance past fragment length to calculate frequency, default: +50) + -oligoStart <#> (oligo bias start) + -oligoEnd <#> (oligo bias end) + -normGC <target GC profile file> (i.e. tagGCcontent.txt file from control experiment) + Use "-normGC default" to match the genomic GC distribution + -normFixedOligo <oligoFreqFile> (normalize 5' end bias, "-normFixedOligo default" ok) + -minNormRatio <#> (Minimum deflation ratio of tag counts, default: 0.25) + -maxNormRatio <#> (Maximum inflation ratio of tag counts, default: 2.0) + -iterNorm <#> (Sets -max/minNormRatio to 1 and 0, iteratively normalizes such that the + resulting distrubtion is no more than #% different than target, i.e. 0.1,default: off) + + Paired-end/HiC options + -illuminaPE (when matching PE reads, assumes last character of read name is 0 or 1) + -removePEbg (remove paired end tags within 1.5x fragment length on same chr) + -PEbgLength <#> (remove PE reads facing on another within this distance, default: 1.5x fragLen) + -restrictionSite <seq> (i.e. AAGCTT for HindIII, assign data < 1.5x fragment length to sites) + Must specify genome sequence directory too. (-rsmis <#> to specify mismatches, def: 0) + -both, -one, -onlyOne, -none (Keeps reads near restriction sites, default: keep all) + -removeSelfLigation (removes reads linking same restriction fragment) + -removeRestrictionEnds (removes reads starting on a restriction fragment) + -assignMidPoint (will place reads in the middle of HindIII fragments) + -restrictionSiteLength <#> (maximum distance from restriction site, default: 1.5x fragLen) + -removeSpikes <size bp> <#> (remove tags from regions with > than # times + the average tags per size bp, suggest "-removeSpikes 10000 5") + + + + + diff -r a281b5931ffb -r 675d25a0b9d4 tools/pos2bed.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/pos2bed.xml Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,37 @@ + + + homer + + + + + pos2bed.pl $input_peak 1> $out_bed + 2> $out_log || echo "Error running pos2bed." >&2 + + + + + + + + + + + + + + + + + + + .. class:: infomark + + Converts: homer peak positions -(to)-> BED format + + **Homer pos2bed.pl** + + http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html + + + diff -r a281b5931ffb -r 675d25a0b9d4 tools/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/tool_dependencies.xml Mon Aug 12 08:16:21 2013 -0400 @@ -0,0 +1,26 @@ + + + + + + http://biowhat.ucsd.edu/homer/configureHomer.pl + + $INSTALL_DIR/homer/ + + configureHomer.pl + $INSTALL_DIR/homer/ + + perl .$INSTALL_DIR/homer/configureHomer.pl -install + + + $INSTALL_DIR/homer/bin + + + + + I'm sorry but this may not work + + + + +