Mercurial > repos > erinija > dnp_mapping
changeset 0:b4bef5178d86 draft default tip
"planemo upload commit 4846fbc43d4c7437de1ce996392fd13a71abd9c7"
author | erinija |
---|---|
date | Tue, 07 Sep 2021 15:03:57 +0000 |
parents | |
children | |
files | dnp-mapping.sh dnp_mapping.xml test-data/601 test-data/cf test-data/class1_mtr test-data/pos |
diffstat | 6 files changed, 501 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dnp-mapping.sh Tue Sep 07 15:03:57 2021 +0000 @@ -0,0 +1,142 @@ +#!/bin/sh +if test "$#" -ne 6; then + +echo "" +echo " CALL " +echo " sh dnp-mapping.sh input.fasta input.pattern input.trimstart input.length output.file1 output.file2" +echo "" +echo " INPUT" +echo " input.fasta - input fasta file " +echo " input.pattern - 'one or more columns with dinucleotide frequency pattern'" +echo " input.trimstart - 'number of positions to trim from the start of the sequence'" +echo " input.length - 'sequence length to retain past trimming from start'" +echo "" +echo " OUTPUT" +echo " output.file1 - tabular file with correlations " +echo " output.file2 - file to store the max correlation position " + +echo "" +echo " DESCRIPTION" +echo " Each sequence in the fasta file is reduced by trimming " +echo " and retaining a given number of positions, but no less than 147." +echo " Correlation of the nucleosome's sequence with the patterns" +echo " is computed within the sliding window. Correlation coefficients " +echo " of the patterns with the sequence starting at a position 73 - dyad " +echo " are computed and saved in output.file1. The maximum correlation position" +echo " is saved in output.file2." +echo "" +echo " REQUIREMENT" +echo " dnp-mapping installed" +echo " conda install -c bioconda dnp-mapping" +echo "" + exit 1 +fi + +faseqfile=$1 +patternfile=$2 +seqstart=$3 +seqlength=$4 + +outfile1=$5 +outfile2=$6 + +call=dnp-mapping + + +awk_program=$( cat << 'EOF' +################################################################### +# position of maximum +# parameters: window=W (minimal distance between two peaks) +# buffer=N (size of buffer) +################################################################### +function max_pos_funct(min_pos, max_pos) +{ + sum=0; + start_position=min_pos; + for(i=min_pos+window;i<=max_pos&&sum<1000;) + { + sum++; + max=arr[i]; + pos=i; + for(j=i-window;j<=i+window&&j<=max_pos;j++) + { + if(arr[j]>max) + { + max=arr[j] + pos=j + } + } + if(arr[pos]>=arr[pos-1]&&arr[pos]>=arr[pos+1]&&arr[pos]>0) + { + if(pos==i) + { + start_position=pos+window+1 + printf("%d %f\n", pos+buffer*num_buf, arr[pos]); + i=pos+window*2+1; + } + else + { + if(pos>=start_position&&pos>min_pos) + { + i=pos; + } + else + { + i+=window*2+1; + } + } + } + else + { + i+=window+1; + } + if(sum==999) + { + i+=window*3; + sum=1; + } + } +# printf("\n"); +} +{ + if(FNR==1) + num_buf=0; + pos_buf=int($1/buffer); + if(pos_buf>num_buf) + { + max_pos_funct(1,buffer); + num_buf=pos_buf; + } + arr[$1-num_buf*buffer]=$2; +} +END{ + max_pos_funct(1,$1-num_buf*buffer); +} + +EOF +) + +> ${outfile2} +> ${outfile1} + +for seq in `cat ${faseqfile} | tr "\t" "="`; + do + echo $seq | sed 's/=.*$//' > id; + echo $seq | sed 's/^.*=//' > dseq; + dseq=`cat dseq`; + echo ${dseq:${seqstart}:${seqlength}} > dseq + id=`cat id`; + echo ${id} + cat dseq + ${call} -m ${patternfile} -s dseq | awk -v id=${id} '{print $0 "\t" id}' >> ${outfile1} + + #compute average correlation + cat ${outfile1} | gawk '{sum=0; for(i=2;i<=NF;i++) sum+=$i; print $1, sum/(NF-1);}' > avgc + + # compute most likely position of the nucleosome + cat avgc | awk "$awk_program" window=73 buffer=10000 | awk -v num=$patternfile -v id=$id '{print id "\t" num "\t" $0}' >> ${outfile2} ; + done + +rm id dseq avgc +exit 0 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dnp_mapping.xml Tue Sep 07 15:03:57 2021 +0000 @@ -0,0 +1,75 @@ +<tool id="dnp_mapping" name="Mapping nucleosome position" version="0.1.0"> + <requirements> + <requirement type="package" version="1.0">dnp-mapping</requirement> + </requirements> + <command detect_errors="exit_code" interpreter="bash"><![CDATA[ + dnp-mapping.sh '$input1' '$input2' '$input3' '$input4' '$output1' '$output2' + ]]></command> + <inputs> + <param type="data" name="input1" format="tabular" label="Tabular fasta" /> + <param type="data" name="input2" format="tabular" label="Patterns matrix" /> + <param name="input3" type="integer" value="0" label="Trim from start of sequences" /> + <param name="input4" type="integer" value="400" label="Sequence length" /> + </inputs> + <outputs> + <data name="output1" format="tabular" /> + <data name="output2" format="tabular" /> + </outputs> + <tests> + <test> + <param name="input1" value="601"/> + <param name="input2" value="class1_mtr"/> + <param name="input3" value="0"/> + <param name="input4" value="400"/> + <output name="output1" file="cf"/> + <output name="output2" file="pos"/> + </test> + </tests> + <help><![CDATA[ + +Description:: + + + Maps nucleosome position in sequence given a pattern + of dinucleotide frequencies along the sequence by + computing a correlation between the sequence and the pattern. + + Pattern matrix should contain a header line identifying + dinucleotides of which the pattern is. Trim from start + number tells how many positions to trim from the start + of the sequence (default 0). Sequence length is how many + positions in sequence to retain after trimming + (default the rest of the sequence). + + +Example:: + + Input tabular fasta: + Widom601Seq CGGGATCCTAATGACCAAGGAAAGCATGATTCTTCA... + + Input pattern matrix file: + YY CC + 0.285353 0.056062 + 0.281269 0.055327 + 0.280065 0.054823 + + + Output tabular position file: + Widom601Seq test-data/class6_mtr 153 0.140767 + + ]]></help> + <citations> + <citation type="bibtex"> +@article{ioshikhes2011, + title={Variety of genomic DNA patterns for nucleosome positioning}, + author={Ioshikhes, Ilya and Hosid, Sergey and Pugh Franklin}, + journal={Genome Research}, + volume={21}, + number={11}, + pages={1863-1871}, + year={2011}, + publisher={CSH Press}, + url = {https://genome.cshlp.org/content/21/11/1863.full} +}</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/601 Tue Sep 07 15:03:57 2021 +0000 @@ -0,0 +1,1 @@ +Widom601Seq CGGGATCCTAATGACCAAGGAAAGCATGATTCTTCACACCGAGTTCATCCCTTATGTGATGGACCCTATACGCGGCCGCCCTGGAGAATCCCGGTGCCGAGGCCGCTCAATTGGTCGTAGACAGCTCTAGCACCGCTTAAACGCACGTACGCGCTGTCCCCCGCGTTTTAACCGCCAAGGGGATTACTCCCTAGTCTCCAGGCACGTGTCAGATATATACATCCTGTGCATGTATTGAACAGCGACCTTGCCGGTGCCAGTCGGATAGTGTTCCGAGCTCCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cf Tue Sep 07 15:03:57 2021 +0000 @@ -0,0 +1,139 @@ +72 -0.0849 -0.0974 Widom601Seq +73 -0.0985 -0.1190 Widom601Seq +74 -0.1263 -0.1206 Widom601Seq +75 -0.1516 -0.0997 Widom601Seq +76 -0.1499 -0.0784 Widom601Seq +77 -0.1057 -0.0630 Widom601Seq +78 -0.0613 -0.0340 Widom601Seq +79 -0.0107 -0.0558 Widom601Seq +80 0.0212 -0.0425 Widom601Seq +81 0.0513 -0.0393 Widom601Seq +82 0.0527 -0.0389 Widom601Seq +83 0.0376 -0.0211 Widom601Seq +84 0.0259 -0.0036 Widom601Seq +85 0.0167 0.0005 Widom601Seq +86 0.0236 -0.0116 Widom601Seq +87 0.0248 -0.0839 Widom601Seq +88 0.0538 -0.1019 Widom601Seq +89 0.0776 -0.1201 Widom601Seq +90 0.0833 -0.1269 Widom601Seq +91 0.0764 -0.1304 Widom601Seq +92 0.0587 -0.1331 Widom601Seq +93 0.0489 -0.1097 Widom601Seq +94 0.0483 -0.0620 Widom601Seq +95 0.0506 -0.0013 Widom601Seq +96 0.0563 0.0362 Widom601Seq +97 0.0552 0.0429 Widom601Seq +98 0.0403 0.0188 Widom601Seq +99 0.0031 -0.0339 Widom601Seq +100 -0.0283 -0.1138 Widom601Seq +101 -0.0275 -0.1887 Widom601Seq +102 -0.0191 -0.2230 Widom601Seq +103 -0.0163 -0.2255 Widom601Seq +104 -0.0223 -0.1856 Widom601Seq +105 -0.0296 -0.1246 Widom601Seq +106 -0.0595 -0.0688 Widom601Seq +107 -0.0678 -0.0254 Widom601Seq +108 -0.0661 0.0001 Widom601Seq +109 -0.0465 0.0030 Widom601Seq +110 -0.0015 0.0158 Widom601Seq +111 0.0251 -0.0130 Widom601Seq +112 0.0636 0.0049 Widom601Seq +113 0.0868 0.0235 Widom601Seq +114 0.0978 0.0313 Widom601Seq +115 0.0875 0.0278 Widom601Seq +116 0.0330 0.0219 Widom601Seq +117 -0.0224 0.0189 Widom601Seq +118 -0.0191 0.0365 Widom601Seq +119 0.0265 0.0520 Widom601Seq +120 0.0615 0.0677 Widom601Seq +121 0.0921 0.0423 Widom601Seq +122 0.1053 0.0220 Widom601Seq +123 0.0969 0.0468 Widom601Seq +124 0.0490 0.0887 Widom601Seq +125 0.0108 0.1433 Widom601Seq +126 -0.0242 0.1851 Widom601Seq +127 -0.0467 0.2068 Widom601Seq +128 -0.0610 0.1764 Widom601Seq +129 -0.0814 0.1096 Widom601Seq +130 -0.0965 0.0176 Widom601Seq +131 -0.1027 -0.0517 Widom601Seq +132 -0.1074 -0.0601 Widom601Seq +133 -0.1193 -0.0182 Widom601Seq +134 -0.1373 0.0494 Widom601Seq +135 -0.1378 0.1276 Widom601Seq +136 -0.1541 0.1706 Widom601Seq +137 -0.1680 0.1883 Widom601Seq +138 -0.1836 0.1874 Widom601Seq +139 -0.1437 0.1451 Widom601Seq +140 -0.0848 0.0791 Widom601Seq +141 -0.0136 0.0240 Widom601Seq +142 0.0538 0.0116 Widom601Seq +143 0.1108 0.0363 Widom601Seq +144 0.1461 0.0842 Widom601Seq +145 0.1407 0.1203 Widom601Seq +146 0.0992 0.1517 Widom601Seq +147 0.0596 0.1786 Widom601Seq +148 0.0267 0.1623 Widom601Seq +149 0.0619 0.1720 Widom601Seq +150 0.1307 0.1915 Widom601Seq +151 0.1883 0.1782 Widom601Seq +152 0.2398 0.1561 Widom601Seq +153 0.2593 0.1630 Widom601Seq +154 0.2497 0.1666 Widom601Seq +155 0.1948 0.1520 Widom601Seq +156 0.1235 0.1111 Widom601Seq +157 0.0747 0.0692 Widom601Seq +158 0.0566 0.0527 Widom601Seq +159 0.0661 0.0647 Widom601Seq +160 0.0967 0.0803 Widom601Seq +161 0.1069 0.0867 Widom601Seq +162 0.0956 0.0400 Widom601Seq +163 0.0388 -0.0282 Widom601Seq +164 -0.0345 -0.0577 Widom601Seq +165 -0.1090 -0.0748 Widom601Seq +166 -0.1796 -0.0807 Widom601Seq +167 -0.2238 -0.0842 Widom601Seq +168 -0.2311 -0.1111 Widom601Seq +169 -0.2329 -0.2170 Widom601Seq +170 -0.1875 -0.2650 Widom601Seq +171 -0.1329 -0.2811 Widom601Seq +172 -0.0894 -0.2670 Widom601Seq +173 -0.0536 -0.2268 Widom601Seq +174 -0.0169 -0.1451 Widom601Seq +175 -0.0199 -0.1091 Widom601Seq +176 -0.0119 -0.0397 Widom601Seq +177 -0.0052 -0.0147 Widom601Seq +178 -0.0092 -0.0246 Widom601Seq +179 -0.0071 -0.0487 Widom601Seq +180 0.0376 -0.0776 Widom601Seq +181 0.1085 -0.1024 Widom601Seq +182 0.1782 -0.1237 Widom601Seq +183 0.1917 -0.1221 Widom601Seq +184 0.1737 -0.1093 Widom601Seq +185 0.1261 -0.0950 Widom601Seq +186 0.0817 -0.0895 Widom601Seq +187 0.0252 -0.0797 Widom601Seq +188 -0.0040 -0.0788 Widom601Seq +189 -0.0127 -0.0650 Widom601Seq +190 -0.0007 -0.0475 Widom601Seq +191 0.0304 -0.0395 Widom601Seq +192 0.0427 -0.0339 Widom601Seq +193 0.0210 -0.0284 Widom601Seq +194 -0.0324 -0.0314 Widom601Seq +195 -0.0966 -0.0550 Widom601Seq +196 -0.1432 -0.0977 Widom601Seq +197 -0.1964 -0.1449 Widom601Seq +198 -0.2331 -0.1806 Widom601Seq +199 -0.2504 -0.1936 Widom601Seq +200 -0.2244 -0.1792 Widom601Seq +201 -0.1906 -0.1427 Widom601Seq +202 -0.1732 -0.1017 Widom601Seq +203 -0.1600 -0.0490 Widom601Seq +204 -0.1390 0.0124 Widom601Seq +205 -0.1350 0.0343 Widom601Seq +206 -0.0944 0.0873 Widom601Seq +207 -0.0339 0.1284 Widom601Seq +208 0.0093 0.1498 Widom601Seq +209 0.0260 0.1531 Widom601Seq +210 0.0413 0.1215 Widom601Seq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/class1_mtr Tue Sep 07 15:03:57 2021 +0000 @@ -0,0 +1,143 @@ +YY CC +0.285353 0.056062 +0.281269 0.055327 +0.280065 0.054823 +0.280892 0.054075 +0.282842 0.053391 +0.285205 0.053584 +0.286759 0.054226 +0.286997 0.054783 +0.285956 0.054777 +0.285134 0.054833 +0.285523 0.055256 +0.287184 0.055801 +0.289618 0.056153 +0.292037 0.05629 +0.293477 0.056174 +0.29298 0.055816 +0.291153 0.055393 +0.289209 0.055261 +0.287625 0.055462 +0.286558 0.055835 +0.285957 0.056413 +0.286008 0.056942 +0.286348 0.057504 +0.286082 0.057413 +0.284528 0.056795 +0.281906 0.055761 +0.279152 0.054631 +0.277372 0.053876 +0.276062 0.053256 +0.275021 0.052943 +0.274373 0.052792 +0.274964 0.052781 +0.276158 0.05303 +0.277534 0.053199 +0.278591 0.053694 +0.279602 0.053979 +0.280454 0.053916 +0.280561 0.053614 +0.279716 0.053129 +0.278889 0.053204 +0.27896 0.053273 +0.280242 0.05358 +0.281833 0.053727 +0.283059 0.053724 +0.28401 0.053575 +0.284503 0.053343 +0.284104 0.052835 +0.282933 0.052243 +0.281079 0.051729 +0.279659 0.051733 +0.27883 0.052169 +0.279389 0.052569 +0.280668 0.052759 +0.281807 0.052686 +0.281911 0.052575 +0.280647 0.052256 +0.278369 0.051516 +0.275605 0.050446 +0.273386 0.049735 +0.271897 0.049303 +0.271456 0.049327 +0.272129 0.049507 +0.273622 0.050103 +0.27503 0.050714 +0.276287 0.051524 +0.277737 0.052287 +0.279112 0.053071 +0.279509 0.053409 +0.278812 0.053487 +0.277934 0.053391 +0.277342 0.053302 +0.277525 0.053246 +0.278172 0.053421 +0.27916 0.053804 +0.279953 0.054343 +0.280412 0.054698 +0.280062 0.054656 +0.279717 0.054386 +0.279621 0.054019 +0.280257 0.05366 +0.280957 0.053388 +0.281606 0.053433 +0.282718 0.053819 +0.284282 0.054281 +0.286042 0.054529 +0.286818 0.054337 +0.285802 0.053871 +0.282839 0.053156 +0.279115 0.05272 +0.275711 0.052583 +0.273545 0.052788 +0.273119 0.053152 +0.274227 0.053684 +0.275852 0.053944 +0.277329 0.053804 +0.278456 0.053259 +0.27897 0.052542 +0.279043 0.05209 +0.278276 0.051703 +0.277741 0.05164 +0.277598 0.051866 +0.278316 0.052347 +0.279527 0.052915 +0.280741 0.05336 +0.281966 0.053694 +0.282732 0.053794 +0.282982 0.053419 +0.282956 0.052962 +0.28274 0.052648 +0.282668 0.05283 +0.282385 0.052865 +0.282652 0.053137 +0.283331 0.053645 +0.284376 0.054408 +0.285144 0.054957 +0.285582 0.055012 +0.285563 0.054912 +0.28496 0.054419 +0.283141 0.053636 +0.28038 0.052967 +0.277661 0.052561 +0.276195 0.052939 +0.276278 0.053514 +0.276958 0.053978 +0.277169 0.053919 +0.276803 0.053513 +0.275698 0.052945 +0.2739 0.052028 +0.271708 0.051056 +0.270081 0.050598 +0.26973 0.050928 +0.270201 0.051638 +0.2713 0.052568 +0.273233 0.053273 +0.27526 0.053542 +0.276416 0.053264 +0.276098 0.052712 +0.275468 0.052339 +0.276155 0.052314 +0.278355 0.052531 +0.280732 0.052755 +0.282022 0.053338