Mercurial > repos > jbrayet > filterpeaks_1_0_docker
changeset 2:a60039e19ab6 draft
Uploaded
author | jbrayet |
---|---|
date | Tue, 09 Feb 2016 08:43:23 -0500 |
parents | 5ee836825323 |
children | 5053f15bf925 |
files | peak2bed.pl |
diffstat | 1 files changed, 128 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peak2bed.pl Tue Feb 09 08:43:23 2016 -0500 @@ -0,0 +1,128 @@ +#!/usr/bin/perl + +#filter out dulpicates from SAMPLE (optional) and create a control dataset w/o duplicates with the same number of reads as in the SAMPLE + +use strict; +use warnings; +use diagnostics; + +my $usage = qq{ + $0 + + ----------------------------- + mandatory parameters: + + -f peaks + -t min peak height + -o output file + + ----------------------------- + optional parameters: + -n name + none +}; + +if(scalar(@ARGV) == 0){ + print $usage; + exit(0); +} + +## mandatory arguments + +my $filename = ""; +my $output_fname = ""; + +my $minPeakH = 0; + +my $chromLengthsFile=""; +my $expName = "User Track"; + +## optional arguments + +## parse command line arguments + +while(scalar(@ARGV) > 0){ + my $this_arg = shift @ARGV; + if ( $this_arg eq '-h') {print "$usage\n"; exit; } + + elsif ( $this_arg eq '-f') {$filename = shift @ARGV;} + elsif ( $this_arg eq '-t') {$minPeakH = shift @ARGV;} + elsif ( $this_arg eq '-o') {$output_fname = shift @ARGV;} + elsif ( $this_arg eq '-g') {$chromLengthsFile = shift @ARGV;} + elsif ( $this_arg eq '-n') {$expName = shift @ARGV;} + elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";} +} + +if ( $filename eq ""){ + die "you should specify chip file\n"; +} +if( $output_fname eq ""){ + die "you should specify output filename\n"; +} + +$minPeakH-=0.5 unless ($minPeakH=~m/0\.5/); + +#read chromosome lengths if provided: +my %max; +if ($chromLengthsFile ne "") { + open FILE, "< $chromLengthsFile " || die "$chromLengthsFile : $!\n"; + while(<FILE>){ + chomp; + if (/(chr\S+)\s(\d+)/) { + $max{$1}=$2; + } + } + close FILE; +} + +###### + +print "\n-----------------\n\n"; + +my %hash; +my $chipCount = 0; +my @header; + + +open FILE, "< $filename " || die "$filename : $!\n"; +open OUT, "> $output_fname" || die "$output_fname: $!\n"; +print OUT "track name=\'$expName\' description=\'$expName\'\n"; +my $count = 0; +my $scount = 0; + +my $correction = 1; + +while(<FILE>){ + chomp; + next if (/max/); + next if (/track/); + if (/summit/) { + $correction = 0; + next ; + } + next if (/^\#/); + my @fields = split(/\t/,$_); + my $entry = $fields[0+$correction]."\t".$fields[1+$correction]."\t".$fields[2+$correction]; + $count++; + if ($fields[5]>=$minPeakH) { + $scount ++; + $fields[$correction]= "chr".$fields[$correction] unless ($fields[$correction]=~m/chr/); + + if ($chromLengthsFile ne "") {my $maxV = $max{$fields[$correction]}; $fields[$correction+1]= min($fields[$correction+1],$maxV);$fields[$correction+2]=min($fields[$correction+2],$maxV); + $fields[$correction+3]=min($fields[$correction+3],$maxV); } + print OUT join("\t",$fields[$correction],$fields[$correction+1],$fields[$correction+2],$fields[$correction+3],$fields[$correction+4],"+",$fields[$correction+1],$fields[$correction+2],"255,120,11","1",$fields[$correction+2]-$fields[$correction+1],0,"\n"); + } +} + +close FILE; +close OUT; +print "read: $count peaks; selected: $scount\n"; + +sub min { + my ($a,$b) = @_; + if($a<$b) { + return $a; + } + $b; +} +