changeset 0:b4bef5178d86 draft default tip

"planemo upload commit 4846fbc43d4c7437de1ce996392fd13a71abd9c7"
author erinija
date Tue, 07 Sep 2021 15:03:57 +0000
parents
children
files dnp-mapping.sh dnp_mapping.xml test-data/601 test-data/cf test-data/class1_mtr test-data/pos
diffstat 6 files changed, 501 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dnp-mapping.sh	Tue Sep 07 15:03:57 2021 +0000
@@ -0,0 +1,142 @@
+#!/bin/sh
+if test "$#" -ne 6; then
+
+echo ""
+echo " CALL  "
+echo "   sh dnp-mapping.sh input.fasta input.pattern input.trimstart input.length output.file1 output.file2"
+echo ""
+echo " INPUT" 
+echo "   input.fasta     - input fasta file "
+echo "   input.pattern   - 'one or more columns with dinucleotide frequency pattern'"
+echo "   input.trimstart - 'number of positions to trim from the start of the sequence'"
+echo "   input.length    - 'sequence length to retain past trimming from start'"
+echo ""
+echo " OUTPUT"
+echo "   output.file1   - tabular file with correlations  "
+echo "   output.file2   - file to store the max correlation position  "
+
+echo ""
+echo " DESCRIPTION"
+echo "   Each sequence in the fasta file is reduced by trimming  "
+echo "   and retaining a given number of positions, but no less than 147." 
+echo "   Correlation of the nucleosome's sequence with the patterns" 
+echo "   is computed within the sliding window. Correlation coefficients "
+echo "   of the patterns with the sequence starting at a position 73 - dyad " 
+echo "   are computed and saved in output.file1. The maximum correlation position"
+echo "   is saved in output.file2."
+echo ""
+echo " REQUIREMENT"
+echo "   dnp-mapping installed"
+echo "   conda install -c bioconda dnp-mapping"
+echo ""
+  exit 1
+fi
+
+faseqfile=$1
+patternfile=$2
+seqstart=$3
+seqlength=$4
+
+outfile1=$5
+outfile2=$6
+
+call=dnp-mapping
+
+
+awk_program=$( cat << 'EOF' 
+###################################################################
+# position of maximum
+# parameters: window=W (minimal distance between two peaks)
+#             buffer=N (size of buffer)
+###################################################################
+function max_pos_funct(min_pos, max_pos)
+{
+  sum=0;
+    start_position=min_pos;
+    for(i=min_pos+window;i<=max_pos&&sum<1000;)
+    {
+      sum++;
+      max=arr[i];
+      pos=i;
+      for(j=i-window;j<=i+window&&j<=max_pos;j++)
+      {
+        if(arr[j]>max)
+        {
+          max=arr[j]
+          pos=j
+        }
+      }
+      if(arr[pos]>=arr[pos-1]&&arr[pos]>=arr[pos+1]&&arr[pos]>0)
+      {
+        if(pos==i)
+        {
+          start_position=pos+window+1
+          printf("%d %f\n", pos+buffer*num_buf, arr[pos]);
+          i=pos+window*2+1;
+        }
+        else
+        {
+          if(pos>=start_position&&pos>min_pos)
+          {
+            i=pos;
+          }
+          else
+          {
+            i+=window*2+1;
+          }
+        }
+      }
+      else
+      {
+        i+=window+1;
+      }
+      if(sum==999)
+      {
+        i+=window*3;
+        sum=1;
+      }
+    }
+#  printf("\n");
+}
+{
+  if(FNR==1)
+    num_buf=0;
+  pos_buf=int($1/buffer);
+  if(pos_buf>num_buf)
+  {
+    max_pos_funct(1,buffer);
+    num_buf=pos_buf;
+  }
+  arr[$1-num_buf*buffer]=$2;
+}
+END{
+  max_pos_funct(1,$1-num_buf*buffer);
+}
+
+EOF
+)
+
+> ${outfile2}
+> ${outfile1}
+
+for seq in `cat ${faseqfile} | tr "\t" "="`; 
+  do 
+    echo $seq | sed 's/=.*$//' > id; 
+    echo $seq | sed 's/^.*=//' > dseq; 
+    dseq=`cat dseq`;
+    echo ${dseq:${seqstart}:${seqlength}} > dseq 
+    id=`cat id`;
+    echo ${id}
+    cat dseq
+    ${call} -m ${patternfile} -s dseq | awk -v id=${id} '{print $0 "\t" id}'  >>  ${outfile1}
+
+    #compute average correlation    
+    cat ${outfile1} | gawk '{sum=0; for(i=2;i<=NF;i++) sum+=$i; print $1, sum/(NF-1);}' > avgc
+    
+    # compute most likely position of the nucleosome 
+    cat avgc | awk "$awk_program" window=73 buffer=10000 | awk -v num=$patternfile -v id=$id '{print id "\t" num "\t" $0}' >> ${outfile2} ; 
+ done 
+
+rm id dseq avgc 
+exit 0
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dnp_mapping.xml	Tue Sep 07 15:03:57 2021 +0000
@@ -0,0 +1,75 @@
+<tool id="dnp_mapping" name="Mapping nucleosome position" version="0.1.0">
+    <requirements>
+        <requirement type="package" version="1.0">dnp-mapping</requirement>
+    </requirements>
+    <command detect_errors="exit_code" interpreter="bash"><![CDATA[
+        dnp-mapping.sh '$input1' '$input2' '$input3' '$input4' '$output1' '$output2'
+    ]]></command>
+    <inputs>
+      <param type="data" name="input1" format="tabular" label="Tabular fasta" />
+      <param type="data" name="input2" format="tabular" label="Patterns matrix" />
+      <param name="input3" type="integer" value="0" label="Trim from start of sequences" />
+      <param name="input4" type="integer" value="400" label="Sequence length" />
+    </inputs>
+    <outputs>
+        <data name="output1" format="tabular" />
+        <data name="output2" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input1" value="601"/>
+            <param name="input2" value="class1_mtr"/>
+            <param name="input3" value="0"/>
+            <param name="input4" value="400"/>
+            <output name="output1" file="cf"/>
+            <output name="output2" file="pos"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+Description::
+
+
+   Maps nucleosome position in sequence given a pattern 
+   of dinucleotide frequencies along the sequence by 
+   computing a correlation between the sequence and the pattern.
+
+   Pattern matrix should contain a header line identifying 
+   dinucleotides of which the pattern is. Trim from start
+   number tells how many positions to trim from the start 
+   of the sequence (default 0). Sequence length is how many 
+   positions in sequence to retain after trimming 
+   (default the rest of the sequence).   
+
+
+Example::
+
+   Input tabular fasta:
+	Widom601Seq     CGGGATCCTAATGACCAAGGAAAGCATGATTCTTCA...
+
+   Input pattern matrix file:
+      	YY CC
+	0.285353 0.056062
+	0.281269 0.055327
+	0.280065 0.054823
+      
+   
+   Output tabular position file:
+	Widom601Seq     test-data/class6_mtr    153 0.140767
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+@article{ioshikhes2011,
+  title={Variety of genomic DNA patterns for nucleosome positioning},
+  author={Ioshikhes, Ilya  and Hosid, Sergey and Pugh Franklin},
+  journal={Genome Research},
+  volume={21},
+  number={11},
+  pages={1863-1871},
+  year={2011},
+  publisher={CSH Press},
+  url = {https://genome.cshlp.org/content/21/11/1863.full}
+}</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/601	Tue Sep 07 15:03:57 2021 +0000
@@ -0,0 +1,1 @@
+Widom601Seq	CGGGATCCTAATGACCAAGGAAAGCATGATTCTTCACACCGAGTTCATCCCTTATGTGATGGACCCTATACGCGGCCGCCCTGGAGAATCCCGGTGCCGAGGCCGCTCAATTGGTCGTAGACAGCTCTAGCACCGCTTAAACGCACGTACGCGCTGTCCCCCGCGTTTTAACCGCCAAGGGGATTACTCCCTAGTCTCCAGGCACGTGTCAGATATATACATCCTGTGCATGTATTGAACAGCGACCTTGCCGGTGCCAGTCGGATAGTGTTCCGAGCTCCC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cf	Tue Sep 07 15:03:57 2021 +0000
@@ -0,0 +1,139 @@
+72  -0.0849 -0.0974	Widom601Seq
+73  -0.0985 -0.1190	Widom601Seq
+74  -0.1263 -0.1206	Widom601Seq
+75  -0.1516 -0.0997	Widom601Seq
+76  -0.1499 -0.0784	Widom601Seq
+77  -0.1057 -0.0630	Widom601Seq
+78  -0.0613 -0.0340	Widom601Seq
+79  -0.0107 -0.0558	Widom601Seq
+80   0.0212 -0.0425	Widom601Seq
+81   0.0513 -0.0393	Widom601Seq
+82   0.0527 -0.0389	Widom601Seq
+83   0.0376 -0.0211	Widom601Seq
+84   0.0259 -0.0036	Widom601Seq
+85   0.0167  0.0005	Widom601Seq
+86   0.0236 -0.0116	Widom601Seq
+87   0.0248 -0.0839	Widom601Seq
+88   0.0538 -0.1019	Widom601Seq
+89   0.0776 -0.1201	Widom601Seq
+90   0.0833 -0.1269	Widom601Seq
+91   0.0764 -0.1304	Widom601Seq
+92   0.0587 -0.1331	Widom601Seq
+93   0.0489 -0.1097	Widom601Seq
+94   0.0483 -0.0620	Widom601Seq
+95   0.0506 -0.0013	Widom601Seq
+96   0.0563  0.0362	Widom601Seq
+97   0.0552  0.0429	Widom601Seq
+98   0.0403  0.0188	Widom601Seq
+99   0.0031 -0.0339	Widom601Seq
+100  -0.0283 -0.1138	Widom601Seq
+101  -0.0275 -0.1887	Widom601Seq
+102  -0.0191 -0.2230	Widom601Seq
+103  -0.0163 -0.2255	Widom601Seq
+104  -0.0223 -0.1856	Widom601Seq
+105  -0.0296 -0.1246	Widom601Seq
+106  -0.0595 -0.0688	Widom601Seq
+107  -0.0678 -0.0254	Widom601Seq
+108  -0.0661  0.0001	Widom601Seq
+109  -0.0465  0.0030	Widom601Seq
+110  -0.0015  0.0158	Widom601Seq
+111   0.0251 -0.0130	Widom601Seq
+112   0.0636  0.0049	Widom601Seq
+113   0.0868  0.0235	Widom601Seq
+114   0.0978  0.0313	Widom601Seq
+115   0.0875  0.0278	Widom601Seq
+116   0.0330  0.0219	Widom601Seq
+117  -0.0224  0.0189	Widom601Seq
+118  -0.0191  0.0365	Widom601Seq
+119   0.0265  0.0520	Widom601Seq
+120   0.0615  0.0677	Widom601Seq
+121   0.0921  0.0423	Widom601Seq
+122   0.1053  0.0220	Widom601Seq
+123   0.0969  0.0468	Widom601Seq
+124   0.0490  0.0887	Widom601Seq
+125   0.0108  0.1433	Widom601Seq
+126  -0.0242  0.1851	Widom601Seq
+127  -0.0467  0.2068	Widom601Seq
+128  -0.0610  0.1764	Widom601Seq
+129  -0.0814  0.1096	Widom601Seq
+130  -0.0965  0.0176	Widom601Seq
+131  -0.1027 -0.0517	Widom601Seq
+132  -0.1074 -0.0601	Widom601Seq
+133  -0.1193 -0.0182	Widom601Seq
+134  -0.1373  0.0494	Widom601Seq
+135  -0.1378  0.1276	Widom601Seq
+136  -0.1541  0.1706	Widom601Seq
+137  -0.1680  0.1883	Widom601Seq
+138  -0.1836  0.1874	Widom601Seq
+139  -0.1437  0.1451	Widom601Seq
+140  -0.0848  0.0791	Widom601Seq
+141  -0.0136  0.0240	Widom601Seq
+142   0.0538  0.0116	Widom601Seq
+143   0.1108  0.0363	Widom601Seq
+144   0.1461  0.0842	Widom601Seq
+145   0.1407  0.1203	Widom601Seq
+146   0.0992  0.1517	Widom601Seq
+147   0.0596  0.1786	Widom601Seq
+148   0.0267  0.1623	Widom601Seq
+149   0.0619  0.1720	Widom601Seq
+150   0.1307  0.1915	Widom601Seq
+151   0.1883  0.1782	Widom601Seq
+152   0.2398  0.1561	Widom601Seq
+153   0.2593  0.1630	Widom601Seq
+154   0.2497  0.1666	Widom601Seq
+155   0.1948  0.1520	Widom601Seq
+156   0.1235  0.1111	Widom601Seq
+157   0.0747  0.0692	Widom601Seq
+158   0.0566  0.0527	Widom601Seq
+159   0.0661  0.0647	Widom601Seq
+160   0.0967  0.0803	Widom601Seq
+161   0.1069  0.0867	Widom601Seq
+162   0.0956  0.0400	Widom601Seq
+163   0.0388 -0.0282	Widom601Seq
+164  -0.0345 -0.0577	Widom601Seq
+165  -0.1090 -0.0748	Widom601Seq
+166  -0.1796 -0.0807	Widom601Seq
+167  -0.2238 -0.0842	Widom601Seq
+168  -0.2311 -0.1111	Widom601Seq
+169  -0.2329 -0.2170	Widom601Seq
+170  -0.1875 -0.2650	Widom601Seq
+171  -0.1329 -0.2811	Widom601Seq
+172  -0.0894 -0.2670	Widom601Seq
+173  -0.0536 -0.2268	Widom601Seq
+174  -0.0169 -0.1451	Widom601Seq
+175  -0.0199 -0.1091	Widom601Seq
+176  -0.0119 -0.0397	Widom601Seq
+177  -0.0052 -0.0147	Widom601Seq
+178  -0.0092 -0.0246	Widom601Seq
+179  -0.0071 -0.0487	Widom601Seq
+180   0.0376 -0.0776	Widom601Seq
+181   0.1085 -0.1024	Widom601Seq
+182   0.1782 -0.1237	Widom601Seq
+183   0.1917 -0.1221	Widom601Seq
+184   0.1737 -0.1093	Widom601Seq
+185   0.1261 -0.0950	Widom601Seq
+186   0.0817 -0.0895	Widom601Seq
+187   0.0252 -0.0797	Widom601Seq
+188  -0.0040 -0.0788	Widom601Seq
+189  -0.0127 -0.0650	Widom601Seq
+190  -0.0007 -0.0475	Widom601Seq
+191   0.0304 -0.0395	Widom601Seq
+192   0.0427 -0.0339	Widom601Seq
+193   0.0210 -0.0284	Widom601Seq
+194  -0.0324 -0.0314	Widom601Seq
+195  -0.0966 -0.0550	Widom601Seq
+196  -0.1432 -0.0977	Widom601Seq
+197  -0.1964 -0.1449	Widom601Seq
+198  -0.2331 -0.1806	Widom601Seq
+199  -0.2504 -0.1936	Widom601Seq
+200  -0.2244 -0.1792	Widom601Seq
+201  -0.1906 -0.1427	Widom601Seq
+202  -0.1732 -0.1017	Widom601Seq
+203  -0.1600 -0.0490	Widom601Seq
+204  -0.1390  0.0124	Widom601Seq
+205  -0.1350  0.0343	Widom601Seq
+206  -0.0944  0.0873	Widom601Seq
+207  -0.0339  0.1284	Widom601Seq
+208   0.0093  0.1498	Widom601Seq
+209   0.0260  0.1531	Widom601Seq
+210   0.0413  0.1215	Widom601Seq
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/class1_mtr	Tue Sep 07 15:03:57 2021 +0000
@@ -0,0 +1,143 @@
+YY CC
+0.285353 0.056062
+0.281269 0.055327
+0.280065 0.054823
+0.280892 0.054075
+0.282842 0.053391
+0.285205 0.053584
+0.286759 0.054226
+0.286997 0.054783
+0.285956 0.054777
+0.285134 0.054833
+0.285523 0.055256
+0.287184 0.055801
+0.289618 0.056153
+0.292037 0.05629
+0.293477 0.056174
+0.29298 0.055816
+0.291153 0.055393
+0.289209 0.055261
+0.287625 0.055462
+0.286558 0.055835
+0.285957 0.056413
+0.286008 0.056942
+0.286348 0.057504
+0.286082 0.057413
+0.284528 0.056795
+0.281906 0.055761
+0.279152 0.054631
+0.277372 0.053876
+0.276062 0.053256
+0.275021 0.052943
+0.274373 0.052792
+0.274964 0.052781
+0.276158 0.05303
+0.277534 0.053199
+0.278591 0.053694
+0.279602 0.053979
+0.280454 0.053916
+0.280561 0.053614
+0.279716 0.053129
+0.278889 0.053204
+0.27896 0.053273
+0.280242 0.05358
+0.281833 0.053727
+0.283059 0.053724
+0.28401 0.053575
+0.284503 0.053343
+0.284104 0.052835
+0.282933 0.052243
+0.281079 0.051729
+0.279659 0.051733
+0.27883 0.052169
+0.279389 0.052569
+0.280668 0.052759
+0.281807 0.052686
+0.281911 0.052575
+0.280647 0.052256
+0.278369 0.051516
+0.275605 0.050446
+0.273386 0.049735
+0.271897 0.049303
+0.271456 0.049327
+0.272129 0.049507
+0.273622 0.050103
+0.27503 0.050714
+0.276287 0.051524
+0.277737 0.052287
+0.279112 0.053071
+0.279509 0.053409
+0.278812 0.053487
+0.277934 0.053391
+0.277342 0.053302
+0.277525 0.053246
+0.278172 0.053421
+0.27916 0.053804
+0.279953 0.054343
+0.280412 0.054698
+0.280062 0.054656
+0.279717 0.054386
+0.279621 0.054019
+0.280257 0.05366
+0.280957 0.053388
+0.281606 0.053433
+0.282718 0.053819
+0.284282 0.054281
+0.286042 0.054529
+0.286818 0.054337
+0.285802 0.053871
+0.282839 0.053156
+0.279115 0.05272
+0.275711 0.052583
+0.273545 0.052788
+0.273119 0.053152
+0.274227 0.053684
+0.275852 0.053944
+0.277329 0.053804
+0.278456 0.053259
+0.27897 0.052542
+0.279043 0.05209
+0.278276 0.051703
+0.277741 0.05164
+0.277598 0.051866
+0.278316 0.052347
+0.279527 0.052915
+0.280741 0.05336
+0.281966 0.053694
+0.282732 0.053794
+0.282982 0.053419
+0.282956 0.052962
+0.28274 0.052648
+0.282668 0.05283
+0.282385 0.052865
+0.282652 0.053137
+0.283331 0.053645
+0.284376 0.054408
+0.285144 0.054957
+0.285582 0.055012
+0.285563 0.054912
+0.28496 0.054419
+0.283141 0.053636
+0.28038 0.052967
+0.277661 0.052561
+0.276195 0.052939
+0.276278 0.053514
+0.276958 0.053978
+0.277169 0.053919
+0.276803 0.053513
+0.275698 0.052945
+0.2739 0.052028
+0.271708 0.051056
+0.270081 0.050598
+0.26973 0.050928
+0.270201 0.051638
+0.2713 0.052568
+0.273233 0.053273
+0.27526 0.053542
+0.276416 0.053264
+0.276098 0.052712
+0.275468 0.052339
+0.276155 0.052314
+0.278355 0.052531
+0.280732 0.052755
+0.282022 0.053338
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pos	Tue Sep 07 15:03:57 2021 +0000
@@ -0,0 +1,1 @@
+Widom601Seq	test-data/class1_mtr	153 0.140767