# HG changeset patch
# User grau
# Date 1383855371 18000
# Node ID 85fd336b5b458d1b80a1a736ec8a4b2a861b97da
# Parent dec223357d6b97442727afae30558ae9d449f706
Uploaded
diff -r dec223357d6b -r 85fd336b5b45 ._DimontWeb.jar
Binary file ._DimontWeb.jar has changed
diff -r dec223357d6b -r 85fd336b5b45 DimontDataExtractor.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/DimontDataExtractor.xml Thu Nov 07 15:16:11 2013 -0500
@@ -0,0 +1,70 @@
+
+
diff -r dec223357d6b -r 85fd336b5b45 DimontWeb.xml
--- a/DimontWeb.xml Wed Nov 06 17:10:20 2013 -0500
+++ b/DimontWeb.xml Thu Nov 07 15:16:11 2013 -0500
@@ -40,6 +40,7 @@
JAR_PATH
+ java
diff -r dec223357d6b -r 85fd336b5b45 extract_data_single_galaxy.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_data_single_galaxy.pl Thu Nov 07 15:16:11 2013 -0500
@@ -0,0 +1,128 @@
+use strict;
+
+if(@ARGV == 0){
+die <
+
+ : the chromosome FastA containing all chromosome sequences
+ : the file containing the peaks in tabular format,
+ e.g., bed, gff, narrowPeak
+ : the column of containing the chromosome
+ : the column of containing the start position relative to
+ the chromosome start
+ : center: "Center of peak (relative to start)", end: "End of peak (global coordinates)"
+ : the column of containing the peak center position (center) relative to
+ or the column of containing the end position (end)
+ : fixed width of all regions
+ : the column of containing the peak statistic
+ or a similar measure of confidence
+ : the path to the output file, written as FastA
+USAGE
+}
+
+
+my $chromFa = $ARGV[0];
+my $bed = $ARGV[1];
+my $chromcol = $ARGV[2]-1;
+my $startcol = $ARGV[3]-1;
+my $seccolm = $ARGV[4];
+my $seccol = $ARGV[5]-1;
+my $width = $ARGV[6];
+my $statcol = $ARGV[7]-1;
+my $outfile = $ARGV[8];
+
+my $sort = 1;
+
+
+sub loadSeq{
+ my $prefix = shift;
+ print $prefix," ";
+ open(FA,$chromFa);
+ my $head = "";
+ my @lines = ();
+ while(){
+ chomp();
+ if(/^>/){
+ if($head){
+ last;
+ }
+ if(/^>\s*(${prefix}|chr${prefix})(\s.*$|$)/i){
+ $head = $_;
+ }
+ }elsif($head){
+ push(@lines,lc($_));
+ }
+ }
+ my $str = join("",@lines);
+ print "loaded\n";
+ return $str;
+}
+
+
+
+open(IN,$ARGV[1]);
+
+my @lines = ();
+
+while(){
+ chomp();
+ my @parts = split("\t",$_);
+ $parts[$chromcol] =~ s/chr0/chr/g;
+ my @vals = ();
+ if($seccolm eq "center"){
+ @vals = ($parts[$chromcol],$parts[$startcol]+$parts[$seccol],$parts[$statcol]);
+ }else{
+ @vals = ($parts[$chromcol],int(($parts[$startcol]+$parts[$seccol])/2),$parts[$statcol]);
+ }
+ push(@vals,$width);
+ push(@lines,\@vals);
+}
+
+close(IN);
+print "Read input file ".$bed."\n";
+
+
+if($sort){
+
+ @lines = sort { ${$a}[0] cmp ${$b}[0] } @lines;
+
+}
+
+open(OUT,">".$outfile);
+
+print "Extracting sequences...\n\n";
+
+my $oldchr = "";
+my $sequence = "";
+for my $line (@lines){
+ my @ar = @{$line};
+ my $chr = $ar[0];
+ unless($chr eq $oldchr){
+ $sequence = loadSeq($chr);
+ }
+ $oldchr = $chr;
+ my $w = $ar[3];
+ if($w <= 0){
+ print $w," -> next\n";
+ next;
+ }
+ if($w % 2 == 0){
+ $w = $w/2;
+ }else{
+ $w = ($w-1)/2;
+ }
+
+ my $start = $ar[1]-$w-1;
+
+ my $head = "> chr: ".$chr."; start: ".$start."; peak: ".($ar[1]-$start)."; signal: ".$ar[2]."\n";
+ my $curr = substr($sequence,$start,$ar[3]);
+ if($curr =~ /[^ACGTacgt]/){
+ print "Sequence for\n\t",substr($head,1),"omitted due to ambiguous nucleotides.\n\n";
+ }else{
+ print OUT $head,$curr,"\n";
+ }
+}
+
+close(OUT);
+print "\nDone.\n";
\ No newline at end of file
diff -r dec223357d6b -r 85fd336b5b45 test-data/.DS_Store
Binary file test-data/.DS_Store has changed
diff -r dec223357d6b -r 85fd336b5b45 test-data/._.DS_Store
Binary file test-data/._.DS_Store has changed
diff -r dec223357d6b -r 85fd336b5b45 test-data/._Test
Binary file test-data/._Test has changed
diff -r dec223357d6b -r 85fd336b5b45 test-data/._dimont_test.fasta
Binary file test-data/._dimont_test.fasta has changed
diff -r dec223357d6b -r 85fd336b5b45 test-data/Test/._Motif_(rc)1.png
Binary file test-data/Test/._Motif_(rc)1.png has changed
diff -r dec223357d6b -r 85fd336b5b45 test-data/Test/._Motif_(rc)3.png
Binary file test-data/Test/._Motif_(rc)3.png has changed
diff -r dec223357d6b -r 85fd336b5b45 test-data/Test/._Motif_0.png
Binary file test-data/Test/._Motif_0.png has changed
diff -r dec223357d6b -r 85fd336b5b45 test-data/Test/._Motif_2.png
Binary file test-data/Test/._Motif_2.png has changed
diff -r dec223357d6b -r 85fd336b5b45 test-data/Test/._Test_html.html
Binary file test-data/Test/._Test_html.html has changed
diff -r dec223357d6b -r 85fd336b5b45 test-data/mini.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mini.bed Thu Nov 07 15:16:11 2013 -0500
@@ -0,0 +1,3 @@
+chr1 20 250 id1 0 . 12.3
+chr1 374 450 id2 0 . 11.1
+chr2 53 273 id3 0 . 3.45
diff -r dec223357d6b -r 85fd336b5b45 test-data/mini2.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mini2.bed Thu Nov 07 15:16:11 2013 -0500
@@ -0,0 +1,3 @@
+chr1 90 70 id1 0 . 12.3
+chr1 374 74 id2 0 . 11.1
+chr2 53 120 id3 0 . 3.45
diff -r dec223357d6b -r 85fd336b5b45 test-data/mini2_extracted.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mini2_extracted.fa Thu Nov 07 15:16:11 2013 -0500
@@ -0,0 +1,6 @@
+> chr: chr1; start: 59; peak: 101; signal: 12.3
+catattatagggagaaatatgatcgcgtatgcgagagtagtgccaacatattgtgctctttgattttttggcaacccaaaatggtggcggatgaacgagatgataatatattcaagttgccgctaatcagaaataaattcattgcaacgttaaatacagcacaatatatgatcgcgtatgcgagagtagtgccaacatat
+> chr: chr1; start: 347; peak: 101; signal: 11.1
+atttagattgcctattaaatatgatcgcgtatgcgagagtagtgccaacatattgtgctctctatataatgactgcctctcattctgtcttattttaccgcaaacccaaatcgacaatgcacgacagaggaagcagaacagatatttagattgcctctcattttctctcccatattatagggagaaatatgatcgcgtat
+> chr: chr2; start: 72; peak: 101; signal: 3.45
+gccaacatattgtgctctttgattttttggcaacccaaaatggtggcggatgaacgagatgataatatattcaagttgccgctaatcagaaataaattcattgcaacgttaaatacagcacaatatatgatcgcgtatgcgagagtagtgccaacatattgtgctaatgagtgcctctcgttctctgtcttatattaccg
diff -r dec223357d6b -r 85fd336b5b45 test-data/mini_extracted.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mini_extracted.fa Thu Nov 07 15:16:11 2013 -0500
@@ -0,0 +1,6 @@
+> chr: chr1; start: 34; peak: 101; signal: 12.3
+ttagattgcctctcattttctctcccatattatagggagaaatatgatcgcgtatgcgagagtagtgccaacatattgtgctctttgattttttggcaacccaaaatggtggcggatgaacgagatgataatatattcaagttgccgctaatcagaaataaattcattgcaacgttaaatacagcacaatatatgatcgc
+> chr: chr1; start: 311; peak: 101; signal: 11.1
+agacaatacacgacagagagagagagcagcggagatatttagattgcctattaaatatgatcgcgtatgcgagagtagtgccaacatattgtgctctctatataatgactgcctctcattctgtcttattttaccgcaaacccaaatcgacaatgcacgacagaggaagcagaacagatatttagattgcctctcatttt
+> chr: chr2; start: 62; peak: 101; signal: 3.45
+cgagagtagtgccaacatattgtgctctttgattttttggcaacccaaaatggtggcggatgaacgagatgataatatattcaagttgccgctaatcagaaataaattcattgcaacgttaaatacagcacaatatatgatcgcgtatgcgagagtagtgccaacatattgtgctaatgagtgcctctcgttctctgtct
diff -r dec223357d6b -r 85fd336b5b45 test-data/minigenome.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/minigenome.fa Thu Nov 07 15:16:11 2013 -0500
@@ -0,0 +1,44 @@
+> chr1
+Cgacaatgcacgacagaggaagcagaacagatatttagattgcctctcat
+tttctctcccatattatagggagaaatatgatcgcgtatgcgagagtagt
+gccaacatattgtgctctttgattttttggcaacccaaaatggtggcgga
+tgaaCGAGATGATAATATATTCAAGTTGCCGCTAATCAGAAATAAATTCA
+TTGCAACGTTAAATACAGCACAATATATGATCGCGTATGCGAGAGTAGTG
+CCAACATATTGTGCTAATGAGTGCCTCTCGTTCTCTGTCTTATATTACCG
+CAAACCCAAAAAgacaatacacgacagagagagagagcagcggagatatt
+tagattgcctattaaatatgatcgcgtatgcgagagtagtgccaacatat
+tgtgctctCTATATAATGACTGCCTCTCATTCTGTCTTATTTTACCGCAA
+ACCCAAatcgacaatgcacgacagaggaagcagaacagatatttagattg
+cctctcattttctctcccatattatagggagaaatatgatcgcgtatgcg
+agagtagtgccaacatattgtgctctttgattttttggcaacccaaaatg
+gtggcggatgaaCGAGATGATAATATATTCAAGTTGCCGCTAATCAGAAA
+TAAATTCATTGCAACGTTAAATACAGCACAATATATGATCGCGTATGCGA
+GAGTAGTGCCAACATATTGTGCTAATGAGTGCCTCTCGTTCTCTGTCTTA
+TATTACCGCAAACCCAAAAAgacaatacacgacagagagagagagcagcg
+gagatatttagattgcctattaaatatgatcgcgtatgcgagagtagtgc
+caacatattgtgctctCTATATAATGACTGCCTCTCATTCTGTCTTATTT
+TACCGCAAACCCAAatcgacaatgcacgacagaggaagcagaacagatat
+ttagattgcctctcattttctctcccatattatagggagaaatatgatcg
+cgtatgcgagagtagtgccaacatattgtgctctttgattttttggcaac
+ccaaaatggtggcggatgaaCGAGATGATAATATATTCAAGTTGCCGCTA
+ATCAGAAATAAATTCATTGCAACGTTAAATACAGCACAATATATGATCGC
+GTATGCGAGAGTAGTGCCAACATATTGTGCTAATGAGTGCCTCTCGTTCT
+CTGTCTTATATTACCGCAAACCCAAAAAgacaatacacgacagagagaga
+gagcagcggagatatttagattgcctattaaatatgatcgcgtatgcgag
+> chr2
+agatatttagattgcctctcattttctctcccatattatagggagaaata
+tgatcgcgtatgcgagagtagtgccaacatattgtgctctttgatttttt
+ggcaacccaaaatggtggcggatgaaCGAGATGATAATATATTCAAGTTG
+CCGCTAATCAGAAATAAATTCATTGCAACGTTAAATACAGCACAATATAT
+GATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTAATGAGTGCCTCT
+CGTTCTCTGTCTTATATTACCGCAAACCCAAAAAgacaatacacgacaga
+gagagagagcagcggagatatttagattgcctattaaatatgatcgcgta
+tgcgagagtagtgccaacatattgtgctctCTATATAATGACTGCCTCTC
+ATTCTGTCTTATTTTACCGCAAACCCAAatcgacaatgcacgacagagga
+agcagaacagatatttagattgcctctcattttctctcccatattatagg
+gagaaatatgatcgcgtatgcgagagtagtgccaacatattgtgctcttt
+gattttttggcaacccaaaatggtggcggatgaaCGAGATGATAATATAT
+TCAAGTTGCCGCTAATCAGAAATAAATTCATTGCAACGTTAAATACAGCA
+CAATATATGATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTAATGA
+GTGCCTCTCGTTCTCTGTCTTATATTACCGCAAACCCAAAAAgacaatac
+acgacagagagagagagcagcggagatatttagattgcctattaaatatg
\ No newline at end of file