# HG changeset patch # User sauria # Date 1433527385 14400 # Node ID 5e3de32a8688dc163c53c91e147613833a1a399c # Parent 5d96a89cd82093272349bb43faaac79109a54b70 planemo upload for repository https://github.com/bxlab/galaxy_tools/suites/suite_hifive commit abaefa638db82abe90f335d783c9503dce28944f-dirty diff -r 5d96a89cd820 -r 5e3de32a8688 hifive.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hifive.xml Fri Jun 05 14:03:05 2015 -0400 @@ -0,0 +1,388 @@ + + + manipulate, analyze, and plot HiC and 5C chromatin interaction data + + hifive_macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (command["command_select"] == "hic-complete") + + + (command["command_select"] == "hic-complete") + + + (command["command_select"] == "hic-complete") + + + (command["command_select"] == "fends") + + + (command["command_select"] == "hic-data") + + + (command["command_select"] == "hic-project") + + + (command["command_select"] == "hic-normalize") + + + (command["command_select"] == "hic-heatmap") + + + (command["command_select"] == "hic-heatmap") + (command["image"]["generate"] == "yes") + + + (command["command_select"] == "hic-interval") + + + (command["command_select"] == "hic-interval") + (command["image"]["generate"] == "yes") + + + (command["command_select"] == "5c-complete") + + + (command["command_select"] == "5c-complete") + + + (command["command_select"] == "5c-complete") + + + (command["command_select"] == "fragments") + + + (command["command_select"] == "5c-data") + + + (command["command_select"] == "5c-project") + + + (command["command_select"] == "5c-normalize") + + + (command["command_select"] == "5c-heatmap") + + + (command["command_select"] == "5c-heatmap") + (command["image"]["generate"] == "yes") + + + (command["command_select"] == "5c-interval") + + + (command["command_select"] == "5c-interval") + (command["image"]["generate"] == "yes") + + + + diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/complete_fivec_project.xml --- a/hifive/hifive/complete_fivec_project.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,326 +0,0 @@ - - - hifive - numpy - scipy - h5py - cython - - from start to finish, including creating a fragmentset, dataset, and corrected HiFive 5C project - - hifive 5c-complete $analysis.algorithm -q - -m $mindist -x $maxdist - - #if str( $datafile.format ) == "bam": - #for $bam_file in $datafile.paired_bam: - -B "${bam_file.readend1}" "${bam_file.readend2}" - #end for - #else: - #for $count_file in $datafile.count_files: - -C "${count_file.count}" - #end for - #end if - #if str( $regions ) != "": - -r $regions - #end if - #if str( $analysis.algorithm ) == "probability" or str( $analysis.algorithm ) == "binning-probability" or str( $analysis.algorithm ) == "probability-binning": - -b $analysis.probiter -l $analysis.step -g $analysis.change $analysis.precalc - #end if - #if str( $analysis.algorithm ) == "express" or str( $analysis.algorithm ) == "binning-express" or str( $analysis.algorithm ) == "probability-express": - $analysis.removedist -w $analysis.expreads - #if str( $analysis.subalgorithm.kr ) == "yes": - -z - #else: - -e $analysis.expiter $analysis.logged - #end if - #end if - #if str( $analysis.algorithm ) == "binning" or str( $analysis.algorithm ) == "binning-express" or str( $analysis.algorithm ) == "binning-probability" or str( $analysis.algorithm ) == "express-binning" or str( $analysis.algorithm ) == "probability-binning": - -i $analysis.biniter -t $analysis.threshold -y $analysis.binreads - -v $analysis.model -n $analysis.modelbins -u $analysis.modeltypes - #end if - - -o $frags $data $project $bed - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -This tool takes a BED file containing probed restriction enzyme fragment boundaries and either a set of paired-end BAM files or a set of tabular counts files containing pairs of fragments and associated counts and creates and analyzes a HiFive 5C project. - -The bed file containing targeted restriction enzyme fragment boundaries is converted into an hdf5-type fragment file of fragment characteristics. In addition to coordinates, strand, and chromosome information, additional columns can be included containing other fragment characteristics, such as GC content. If additional columns are included, they must be labeled in the header with a label containing no spaces or commas. These names can be used with the binning algorithm to include the fragment characteristic in the model to be learned. - -Reads are then loaded and paired with the specified fragment file, creating a HiFive dataset object. Data can be a series of paired-end bam files or a tabular format list of paired fragments and their observed read count (fragment1 fragment2 count). - -Next, fends are filtered in an iterative manner using the minimum interaction cutoff and interaction size parameters specified to ensure that all valid fends have at least the minimum number of interactions with other valid fends. Subsequently, a distance dependence approximation curve is calculated piecewise using the number of bins specified. The first bin encompasses all interactions less than or equal to the minimum bin cutoff value. The remaining bins are evenly sized between log(minimum cutoff) and log(max possible interaction size). - -Finally, corrections values are learned for either each valid fragment, ranges of fragment characteristics, or both. The 'probability' and 'express' algorithms learn correction values associated with each fragment while the 'binning' algorithm learns fragment characteristic corrections. These can be chained together in either order to produce more robust corrections. - -The probability algorithm assumes non-zero counts to distributed according to a log-normal distribution with each interaction having a mean equal to the distance-depedence predicted signal times each of the interaction fragment correction parameters and a universal sigma value. Using the probability algorithm, learning is done using a backtracking line gradient descent approach. Learning proceeds for up to the maximum number of iterations but is terminated early if all of the absolute gradient values fall below the cutoff threshold. At each step, the learning rate is scaled down by the step value if the current learning rate does not produce sufficient improvement as measured by the Arjimo criterion. - -The express algorithm is a variant of matrix balancing and approximates the corrections through an iterative norm-2 adjustment to given all fragments a mean ratio of one for valid counts versus predicted signal from distance-dependence. This can be done using intra-regional interactions, inter-regional interactions, or all interactions. - -The binning algorithm divides each model parameter into some number of bins and based on a log-normal distribution, correction values for each bin are learned, maximizing the log-likelihood of the data. Model parameters can be the fragment lengths ('len') and any other characteristics passed as additional columns (with header labels) in the bed file used to create the HiFive fragment file. Each parameter has a number of bins specified to divide it into and can be partitioned according to its type to contain approximately equal numbers of fragments ('even'), or to cover equal portions of the range of parameter values ('fixed'). In addition, parameter types can include the '-const' suffix to denote a parameter that should not be optimized after seeding. - - - \ No newline at end of file diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/complete_hic_project.xml --- a/hifive/hifive/complete_hic_project.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,273 +0,0 @@ - - - hifive - numpy - scipy - h5py - cython - - from start to finish, including creating a fendset, dataset, and corrected HiFive HiC project - - hifive hic-complete $analysis.algorithm -q - -f $minint -m $filtermin -x $filtermax -j $minbin -n $numbin -B $bed - - #if str( $datafile.format ) == "bam": - #for $bam_file in $datafile.paired_bam: - --bam ${bam_file.readend1} ${bam_file.readend2} - #end for - #elif str( $datafile.format ) == "mat": - --mat ${datafile.mat} - #else: - #for raw_file in $datafile.raw_files: - --raw ${raw_file.raw} - #end for - #end if - #if str( $chroms ) != "": - -c $chroms - #end if - #if str( $analysis.algorithm ) == "probability" or str( $analysis.algorithm ) == "binning-probability": - -b $analysis.probiter -l $analysis.step -g $analysis.change $analysis.precalc - #end if - #if str( $analysis.algorithm ) == "express" or str( $analysis.algorithm ) == "binning-express" : - $analysis.removedist -w $analysis.expreads $analysis.binary -g $analysis.change - #if str( $analysis.subalgorithm.kr ) == "yes": - -z - #else - -e $analysis.expiter - #end if - #end if - #if str( $analysis.algorithm ) == "binning" or str( $analysis.algorithm ) == "binning-express" or str( $analysis.algorithm ) == "binning-probability": - -r $analysis.biniter -t $analysis.threshold -y $analysis.binreads - -v $analysis.model -s $analysis.modelbins -u $analysis.modeltypes --pseudocounts $analysis.pseudo - #end if - - -o $fends $data $project - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -This tool takes a BED file containing restriction enzyme fragment boundaries or cutsites and a set of paired-end BAM files, a tabular counts file containing pairs of fend indices and associated counts, or a set of tabular raw files containing mapping coordinate pairs for each mapped read and creates and analyzes a HiFive HiC project. - -The bed file containing either restriction enzyme cutopoints or fragment bounds is converted into an hdf5-type fragment file of fragment characteristics. In addition to coordinates, strand, and chromosome information, additional columns can be included containing other fragment characteristics, such as GC content. If additional columns are included, they must be labeled in the header with a label containing no spaces or commas. These names can be used with the binning algorithm to include the fragment characteristic in the model to be learned. Additional characteristics should be comma-separated pairs of values corresponding to the upstream and downstream sides of the cutsite or ends of the fragment, depending on the whether the bed file contains cutsites or fragment coordinates, respectively. - -Reads are then paired with the specified fend file, creating a HiFive dataset object. Data can be a series of paired-end bam files, a tabular format list of paired genomic positions (chromosome1 coordinate1 strand1 chromosome2 coordinate2 strand2), or a HiCPipe-style mat-formatted list of fend-pairs and observed read counts. - -Finally, corrections values are learned for either each valid fend, ranges of fend characteristics, or both. The 'probability' and 'express' algorithms learn correction values associated with each fend while the 'binning' algorithm learns fend characteristic corrections. These can be chained together in either order to produce more robust corrections. - -Using the probability algorithm, observation of counts are assumed to be distributed according to a binomial distribution with an observation probability for each interaction equal to the product of the distance-dependence signal and the two fend correction parameters. Using the probability algorithm, learning is done using a backtracking line gradient descent approach. Learning proceeds for up to the maximum number of iterations but is terminated early if all of the absolute gradient values fall below the cutoff threshold. At each step, the learning rate is scaled down by the step value if the current learning rate does not produce sufficient improvement as measured by the Arjimo criterion. - -The express algorithm is a variant of matrix balancing and approximates the corrections through an iterative norm-2 adjustment to given all fragments a mean ratio of one for valid counts versus signal predicted from distance-dependence. This can be done using intra-regional interactions, inter-regional interactions, or all interactions. - -The binning algorithm divides each model parameter into some number of bins and based on a binomial distribution, correction values for each bin are learned, maximizing the log-likelihood of the data. Model parameters can be the fend lengths ('len'), fend GC content ('gc'), and any other characteristics passed as additional columns (with header labels) in the bed file used to create the HiFive fend file. Each parameter has a number of bins specified to divide it into and can be partitioned according to its type to contain approximately equal numbers of fends ('even'), or to cover equal portions of the range of parameter values ('fixed'). In addition, parameter types can include the '-const' suffix to denote a parameter that should not be optimized after seeding. - - - \ No newline at end of file diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/create_fivec_dataset.xml --- a/hifive/hifive/create_fivec_dataset.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ - - - hifive - numpy - scipy - h5py - cython - - create a HiFive 5C data object - - hifive 5c-data -q - - #if str( $datafile.format ) == "bam": - #for $bam_file in $datafile.paired_bam: - -B "${bam_file.readend1}" "${bam_file.readend2}" - #end for - #else: - #for $count_file in $datafile.count_files: - -C "${count_file.count}" - #end for - #end if - - $frags $output - - - - - - - - - - - - - - - - - - - - - - - - - -This tool reads 5C data, pairs it with the specified fragment file, and creates a HiFive dataset object. Data can be a series of paired-end bam files or a tabular format list of paired fragments and their observed read count (fragment1 fragment2 count). - - - \ No newline at end of file diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/create_fivec_fragmentset.xml --- a/hifive/hifive/create_fivec_fragmentset.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ - - - hifive - numpy - scipy - h5py - cython - - create a HiFive fragment object - hifive fragments -q $input $output - - - - - - - - -This tool converts a bed file containing targeted restriction enzyme fragment boundaries into an hdf5-type fragment file of fragment characteristics. In addition to coordinates, strand, and chromosome information, additional columns can be included containing other fragment characteristics, such as GC content. If additional columns are included, they must be labeled in the header with a label containing no spaces or commas. These names can be used with the binning algorithm to include the fragment characteristic in the model to be learned. - - - \ No newline at end of file diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/create_fivec_heatmap.xml --- a/hifive/hifive/create_fivec_heatmap.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,119 +0,0 @@ - - - hifive - numpy - scipy - h5py - cython - - create an hdf5 dictionary of heatmaps for a HiFive 5C project - - hifive 5c-heatmap -q - -d $datatype $trans1 - - #if str( $binning.binned ) == "unbinned": - -b 0 -a $binning.array - #else: - -b $binning.binsize - #end if - #if str( $regions ) != "": - -r $regions - #end if - #if str( $dynamic.binning ) == "yes": - -y -x $dynamic.expbinsize -f $dynamic.minobs -g $dynamic.search $dynamic.failed - #end if - #if str( $image.generate ) == "yes": - -k min_color=$image.mincolor -k mid_color=$image.midcolor -k max_color=$image.maxcolor - -i $image_file -k logged=$image.logged - #if $image.minscore: - -k minscore=$image.minscore - #end if - #if $image.maxscore: - -k maxscore=$image.maxscore - #end if - #end if - - $data $output - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - (image['generate'] == "yes") - - - - -This tool takes a HiFive 5C project and calculates expected and observed interaction signals for all requested regions (and inter-region pairs, if requested). All signals, along with bin boundaries, are stored in an hdf5 dictionary. - - - \ No newline at end of file diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/create_fivec_project.xml --- a/hifive/hifive/create_fivec_project.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,34 +0,0 @@ - - - hifive - numpy - scipy - h5py - cython - - create a HiFive 5C project object - - hifive 5c-project -q - -f $minint -m $filtermin -x $filtermax - $data $output - - - - - - - - - - - - - - - - - -This tool creates a HiFive 5C project object and pairs a 5C data file with it. Fragments are filtered in an iterative manner using the minimum interaction cutoff parameter specified to ensure that all valid fends have at least the minimum number of interactions with other valid fends. Subsequently, a distance dependence approximation line is calculated using a regression line to approximate the linear relationship between log(# reads) and log(distance). - - - \ No newline at end of file diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/create_hic_dataset.xml --- a/hifive/hifive/create_hic_dataset.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ - - - hifive - numpy - scipy - h5py - cython - - create a HiFive HiC data object - - hifive hic-data -q - - #if str( $datafile.format ) == "bam": - #for $bam_file in $datafile.paired_bam: - --bam ${bam_file.readend1} ${bam_file.readend2} - #end for - #elif str( $datafile.format ) == "mat": - --mat ${datafile.mat} - #else: - #for raw_file in $datafile.raw_files: - --raw ${raw_file.raw} - #end for - #end if - - -i $insert $fends $output - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -This tool reads HiC data, pairs it with the specified fend file, and creates a HiFive dataset object. Data can be a series of paired-end bam files, a tabular format list of paired genomic positions (chromosome1 coordinate1 strand1 chromosome2 coordinate2 strand2), or a HiCPipe-style mat-formatted list of fend-pairs and observed read counts. - - - \ No newline at end of file diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/create_hic_fendset.xml --- a/hifive/hifive/create_hic_fendset.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ - - - hifive - numpy - scipy - h5py - cython - - create a HiFive fend object - hifive fends -q --bed $input $output - - - - - - - - -This tool converts a bed file containing either restriction enzyme cutopoints or fragment bounds into an hdf5-type fragment file of fragment characteristics. In addition to coordinates, strand, and chromosome information, additional columns can be included containing other fragment characteristics, such as GC content. If additional columns are included, they must be labeled in the header with a label containing no spaces or commas. These names can be used with the binning algorithm to include the fragment characteristic in the model to be learned. Additional characteristics should be comma-separated pairs of values corresponding to the upstream and downstream sides of the cutsite or ends of the fragment, depending on the whether the bed file contains cutsites or fragment coordinates, respectively. - - - \ No newline at end of file diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/create_hic_heatmap.xml --- a/hifive/hifive/create_hic_heatmap.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,103 +0,0 @@ - - - hifive - numpy - scipy - h5py - cython - - create an hdf5 dictionary of heatmaps for a HiFive HiC project - - hifive hic-heatmap -q - -b $binsize -d $datatype $trans1 - - #if str( $chroms ) != "": - -c $chroms - #end if - #if str( $dynamic.binning ) == "yes": - -y -x $dynamic.expbinsize -f $dynamic.minobs -a $dynamic.search $dynamic.failed - #end if - #if str( $image.generate ) == "yes": - -k min_color=$image.mincolor -k mid_color=$image.midcolor -k max_color=$image.maxcolor - -i $image_file -k logged=$image.logged - #if $image.minscore: - -k minscore=$image.minscore - #end if - #if $image.maxscore: - -k maxscore=$image.maxscore - #end if - #end if - - $data $output - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - (image['generate'] == "yes") - - - - - - - -This tool takes a HiFive HiC project and calculates expected and observed interaction signals for all requested chromosomes (and inter-chromosome pairs, if requested). All signals, along with bin boundaries, are stored in an hdf5 dictionary. - - - \ No newline at end of file diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/create_hic_project.xml --- a/hifive/hifive/create_hic_project.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,40 +0,0 @@ - - - hifive - numpy - scipy - h5py - cython - - create a HiFive HiC project object - - hifive hic-project -q - -f $minint -m $filtermin -x $filtermax -j $minbin -n $numbin - $data $output - - - - - - - - - - - - - - - - - - - - - - - -This tool creates a HiFive HiC project object and pairs a HiC data file with it. Fends are filtered in an iterative manner using the minimum interaction cutoff and interaction size parameters specified to ensure that all valid fends have at least the minimum number of interactions with other valid fends. Subsequently, a distance dependence approximation curve is calculated piecewise using the number of bins specified. The first bin encompasses all interactions less than or equal to the minimum bin cutoff value. The remaining bins are evenly sized between log(minimum cutoff) and log(max possible interaction size). - - - \ No newline at end of file diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/get_interval_fivec_data.xml --- a/hifive/hifive/get_interval_fivec_data.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,114 +0,0 @@ - - - hifive - numpy - scipy - h5py - cython - - create an interval file containing 5C data - - hifive 5c-interval -q - -c $region -d $datatype -b $binsize - - #if $start: - -s $start - #end if - #if $stop: - -e $stop - #end if - #if str( $dynamic.binning ) == "yes": - -y -x $dynamic.expbinsize -f $dynamic.minobs -g $dynamic.search $dynamic.failed - #end if - #if str( $image.generate ) == "yes": - -k min_color=$image.mincolor -k mid_color=$image.midcolor -k max_color=$image.maxcolor - -i $image_file -k logged=$image.logged - #if str( $binsize ) != "0": - $image.rotate - #end if - #if $image.minscore: - -k minscore=$image.minscore - #end if - #if $image.maxscore: - -k maxscore=$image.maxscore - #end if - #end if - - -q $project $output - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - (image['generate'] == "yes") - - - - -This tool pulls data from a HiFive 5C project file for a specified region and returns a chromatin interaction interval file. Data can be either uncorrected, corrected for distance dependence or fragment bias only, or fully corrected (enrichment). - - - \ No newline at end of file diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/get_interval_hic_data.xml --- a/hifive/hifive/get_interval_hic_data.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,117 +0,0 @@ - - - hifive - numpy - scipy - h5py - cython - - create an interval file containing HiC data - - hifive hic-interval -q - -c $chrom -d $datatype -b $binsize - - #if $start: - -s $start - #end if - #if $stop: - -e $stop - #end if - #if $maxdist: - -m $maxdist - #end if - #if str( $dynamic.binning ) == "yes": - -y -x $dynamic.expbinsize -f $dynamic.minobs -a $dynamic.search $dynamic.failed - #end if - #if str( $image.generate ) == "yes": - -k min_color=$image.mincolor -k mid_color=$image.midcolor -k max_color=$image.maxcolor - -i $image_file -k logged=$image.logged $image.rotate - #if $image.minscore: - -k minscore=$image.minscore - #end if - #if $image.maxscore: - -k maxscore=$image.maxscore - #end if - #end if - - $project $output - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - (image['generate'] == "yes") - - - - -This tool pulls data from a HiFive HiC project file for a specified region of a chromosome and returns a chromatin interaction interval file. Data can be either uncorrected, corrected for distance dependence or fend bias only, or fully corrected (enrichment). - - - \ No newline at end of file diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/learn_fivec_normalization.xml --- a/hifive/hifive/learn_fivec_normalization.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,283 +0,0 @@ - - - hifive - numpy - scipy - h5py - cython - - learn fragment normalization values using probablistic approach - - hifive 5c-normalize $analysis.algorithm -q - -m $mindist -x $maxdist -o $output - - #if str( $regions ) != "": - -r $regions - #end if - #if str( $analysis.algorithm ) == "probability" or str( $analysis.algorithm ) == "binning-probability" or str( $analysis.algorithm ) == "probability-binning": - -b $analysis.probiter -l $analysis.step -g $analysis.change $analysis.precalc - #end if - #if str( $analysis.algorithm ) == "express" or str( $analysis.algorithm ) == "binning-express" or str( $analysis.algorithm ) == "probability-express": - $analysis.removedist -w $analysis.expreads - #if str( $analysis.subalgorithm.kr ) == "yes": - -z - #else: - -e $analysis.expiter $analysis.logged - #end if - #end if - #if str( $analysis.algorithm ) == "binning" or str( $analysis.algorithm ) == "binning-express" or str( $analysis.algorithm ) == "binning-probability" or str( $analysis.algorithm ) == "express-binning" or str( $analysis.algorithm ) == "probability-binning": - -i $analysis.biniter -t $analysis.threshold -y $analysis.binreads - -v $analysis.model -n $analysis.modelbins -u $analysis.modeltypes - #end if - - $data - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -This tool takes a HiFive 5C project and learns corrections values for either each valid fragment, ranges of fragment characteristics, or both. The 'probability' and 'express' algorithms learn correction values associated with each fragment while the 'binning' algorithm learns fragment characteristic corrections. These can be chained together in either order to produce more robust corrections. - -The probability algorithm assumes non-zero counts to distributed according to a log-normal distribution with each interaction having a mean equal to the distance-depedence predicted signal times each of the interaction fragment correction parameters and a universal sigma value. Using the probability algorithm, learning is done using a backtracking line gradient descent approach. Learning proceeds for up to the maximum number of iterations but is terminated early if all of the absolute gradient values fall below the cutoff threshold. At each step, the learning rate is scaled down by the step value if the current learning rate does not produce sufficient improvement as measured by the Arjimo criterion. - -The express algorithm is a variant of matrix balancing and approximates the corrections through an iterative norm-2 adjustment to given all fragments a mean ratio of one for valid counts versus signal predicted from distance-dependence. This can be done using intra-regional interactions, inter-regional interactions, or all interactions. - -The binning algorithm divides each model parameter into some number of bins and based on a log-normal distribution, correction values for each bin are learned, maximizing the log-likelihood of the data. Model parameters can be the fragment lengths ('len') and any other characteristics passed as additional columns (with header labels) in the bed file used to create the HiFive fragment file. Each parameter has a number of bins specified to divide it into and can be partitioned according to its type to contain approximately equal numbers of fragments ('even'), or to cover equal portions of the range of parameter values ('fixed'). In addition, parameter types can include the '-const' suffix to denote a parameter that should not be optimized after seeding. - - - \ No newline at end of file diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/learn_hic_normalization.xml --- a/hifive/hifive/learn_hic_normalization.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,220 +0,0 @@ - - - hifive - numpy - scipy - h5py - cython - - learn fend normalization values using probablistic approach - - hifive hic-normalize $analysis.algorithm -q - -m $mindist -x $maxdist -o $output - - #if str( $chroms ) != "": - -c $chroms - #end if - #if str( $analysis.algorithm ) == "probability" or str( $analysis.algorithm ) == "binning-probability": - -b $analysis.probiter -l $analysis.step -g $analysis.change $analysis.precalc - #end if - #if str( $analysis.algorithm ) == "express" or str( $analysis.algorithm ) == "binning-express" : - $analysis.removedist -w $analysis.expreads $analysis.binary -g $analysis.change - #if str( $analysis.subalgorithm.kr ) == "yes": - -z - #else - -e $analysis.expiter - #end if - #end if - #if str( $analysis.algorithm ) == "binning" or str( $analysis.algorithm ) == "binning-express" or str( $analysis.algorithm ) == "binning-probability": - -r $analysis.biniter -t $analysis.threshold -y $analysis.binreads - -v $analysis.model -s $analysis.modelbins -u $analysis.modeltypes --pseudocounts $analysis.pseudo - #end if - - $data - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -This tool takes a HiFive HiC project and learns corrections values for either each valid fend, ranges of fend characteristics, or both. The 'probability' and 'express' algorithms learn correction values associated with each fend while the 'binning' algorithm learns fend characteristic corrections. These can be chained together in either order to produce more robust corrections. - -Using the probability algorithm, observation of counts are assumed to be distributed according to a binomial distribution with an observation probability for each interaction equal to the product of the distance-dependence signal and the two fend correction parameters. Using the probability algorithm, learning is done using a backtracking line gradient descent approach. Learning proceeds for up to the maximum number of iterations but is terminated early if all of the absolute gradient values fall below the cutoff threshold. At each step, the learning rate is scaled down by the step value if the current learning rate does not produce sufficient improvement as measured by the Arjimo criterion. - -The express algorithm is a variant of matrix balancing and approximates the corrections through an iterative norm-2 adjustment to given all fragments a mean ratio of one for valid counts versus signal predicted from distance-dependence. This can be done using intra-regional interactions, inter-regional interactions, or all interactions. - -The binning algorithm divides each model parameter into some number of bins and based on a binomial distribution, correction values for each bin are learned, maximizing the log-likelihood of the data. Model parameters can be the fend lengths ('len'), fend GC content ('gc'), and any other characteristics passed as additional columns (with header labels) in the bed file used to create the HiFive fend file. Each parameter has a number of bins specified to divide it into and can be partitioned according to its type to contain approximately equal numbers of fends ('even'), or to cover equal portions of the range of parameter values ('fixed'). In addition, parameter types can include the '-const' suffix to denote a parameter that should not be optimized after seeding. - - - \ No newline at end of file diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/readme.rst --- a/hifive/hifive/readme.rst Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -============================= -Galaxy wrapper for HiFive -============================= - -This wrapper allows use of the hifive package, a set of tools for handling HiC -and 5C data, on Galaxy. This includes managing data from mapped reads, either -in bam, mat, or raw formats. All stages use hdf5 dictionaries for fast access -and minimal memory and storage usage. - -This toolset includes methods for normalizing data from either HiC or 5C -experiments at the fragment-end, or fragment level resolution, respectively. -Once normalized, data can be used for plotting, binning, or other statistical -tests within the package very quickly. - -Original documentation can be found `here `_. - -Installing HiFive -============================= - -It is recommended to install this wrapper via the `Galaxy Test Tool Shed`. - -.. _`Galaxy Tool Shed`: http://testtoolshed.g2.bx.psu.edu/view/msauria/hifive - - -================ -Wrapper Licence -================ - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff -r 5d96a89cd820 -r 5e3de32a8688 hifive/hifive/tool_dependencies.xml --- a/hifive/hifive/tool_dependencies.xml Thu Apr 23 17:56:22 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ - - - - - - - - - - - - - - - - - - diff -r 5d96a89cd820 -r 5e3de32a8688 hifive_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hifive_macros.xml Fri Jun 05 14:03:05 2015 -0400 @@ -0,0 +1,816 @@ + + + + + hifive + + + + 0.1 + + + + + + + + + + + +-B $command.bed + + + + + + +-i $command.insert +#if str( $command.datafile.format ) == "bam": + #for $bam_file in $command.datafile.paired_bam: + --bam ${bam_file.readend1} ${bam_file.readend2} + #end for +#elif str( $command.datafile.format ) == "mat": + --mat ${command.datafile.mat} +#else: + #for raw_file in $command.datafile.raw_files: + --raw ${raw_file.raw} + #end for +#end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +-f $command.minint +-j $command.minbin +-n $command.numbin + + + + + + + + + + + + + + +-m $command.filtermin +-x $command.filtermax + + + + + + + + + + + +#if str( $command.chroms ) != "": + -c $command.chroms +#end if + + + + + + +$command.analysis.algorithm +#if str( $command.analysis.algorithm ) == "probability" or str( $command.analysis.algorithm ) == "binning-probability": + -b $command.analysis.probiter + -l $command.analysis.step + -g $command.analysis.change + $command.analysis.precalc +#end if +#if str( $command.analysis.algorithm ) == "express" or str( $command.analysis.algorithm ) == "binning-express" : + $command.analysis.removedist + -w $command.analysis.expreads + $command.analysis.binary + -g $command.analysis.change + #if str( $command.analysis.subalgorithm.kr ) == "yes": + -z + #else + -e $command.analysis.expiter + #end if +#end if +#if str( $command.analysis.algorithm ) == "binning" or str( $command.analysis.algorithm ) == "binning-express" or str( $command.analysis.algorithm ) == "binning-probability": + -r $command.analysis.biniter + -t $command.analysis.threshold + -y $command.analysis.binreads + -v $command.analysis.model + -s $command.analysis.modelbins + -u $command.analysis.modeltypes + --pseudocounts $command.analysis.pseudo +#end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +-d $command.datatype + + + + + + + + + + + +$command.bed + + + + + + +#if str( $command.datafile.format ) == "bam": + #for $bam_file in $command.datafile.paired_bam: + -B ${bam_file.readend1} ${bam_file.readend2} + #end for +#else: + #for $count_file in $command.datafile.count_files: + -C ${count_file.count} + #end for +#end if + + + + + + + + + + + + + + + + + + + + + + +-f $command.minint + + + + + + + + +-m $command.mindist +-x $command.maxdist + + + + + + + + + + + +#if str( $command.regions ) != "": + -r $command.regions +#end if + + + + + + +$command.analysis.algorithm +#if str( $command.analysis.algorithm ) == "probability" or str( $command.analysis.algorithm ) == "binning-probability" or str( $command.analysis.algorithm ) == "probability-binning": + -b $command.analysis.probiter + -l $command.analysis.step + -g $command.analysis.change + $command.analysis.precalc +#end if +#if str( $command.analysis.algorithm ) == "express" or str( $command.analysis.algorithm ) == "binning-express" or str( $command.analysis.algorithm ) == "probability-express": + $command.analysis.removedist + -w $command.analysis.expreads + #if str( $command.analysis.subalgorithm.kr ) == "yes": + -z + #else: + -e $command.analysis.expiter + $command.analysis.logged + #end if +#end if +#if str( $command.analysis.algorithm ) == "binning" or str( $command.analysis.algorithm ) == "binning-express" or str( $command.analysis.algorithm ) == "binning-probability" or str( $command.analysis.algorithm ) == "express-binning" or str( $command.analysis.algorithm ) == "probability-binning": + -i $command.analysis.biniter + -t $command.analysis.threshold + -y $command.analysis.binreads + -v $command.analysis.model + -n $command.analysis.modelbins + -u $command.analysis.modeltypes +#end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +-d $command.datatype + + + + + + + + + + + +-b $command.binsize + + + + + + + + +#if str( $command.dynamic.binning ) == "yes": + -y + -x $command.dynamic.expbinsize + -f $command.dynamic.minobs + -a $command.dynamic.search + $command.dynamic.failed +#end if + + +#if str( $command.dynamic.binning ) == "yes": + -y + -x $command.dynamic.expbinsize + -f $command.dynamic.minobs + -g $command.dynamic.search + $command.dynamic.failed +#end if + + + + + + + + + + + + + + + + + + + + + + + + + + + +-k min_color=$command.image.mincolor +-k mid_color=$command.image.midcolor +-k max_color=$command.image.maxcolor +-k logged=$command.image.logged +#if $command.image.minscore: + -k minscore=$command.image.minscore +#end if +#if $command.image.maxscore: + -k maxscore=$command.image.maxscore +#end if + + + + + + + + + + + + + + +$command.image.rotate + + + + + + + + + + + + diff -r 5d96a89cd820 -r 5e3de32a8688 readme.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.rst Fri Jun 05 14:03:05 2015 -0400 @@ -0,0 +1,68 @@ +============================= +Galaxy wrapper for HiFive +============================= + +This wrapper allows use of the hifive package, a set of tools for handling HiC +and 5C data, on Galaxy. This includes managing data from mapped reads, either +in bam, mat, or raw formats. All stages use hdf5 dictionaries for fast access +and minimal memory and storage usage. + +This toolset includes methods for normalizing data from either HiC or 5C +experiments at the fragment-end, or fragment level resolution, respectively. +Once normalized, data can be used for plotting, binning, or other statistical +tests within the package very quickly. + +Original documentation can be found `here `_. + +Automated Installation +====================== + +Galaxy should be able to automatically install the dependencies, i.e. the +``h5py`` and ``hifive`` tool depedency packages. + +Manual Installation +=================== + +For those not using Galaxy's automated installation from the Tool Shed, put +the XML and Python files in the ``tools/hifive/`` folder and add the +XML files to your ``tool_conf.xml`` as normal. For example, use:: + +
+ + + + + + + + + + + + + + +
+ + +================ +Wrapper Licence +================ + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff -r 5d96a89cd820 -r 5e3de32a8688 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Jun 05 14:03:05 2015 -0400 @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + +