Mercurial > repos > erinija > dnp_binary_strings
comparison dnp-compute-composite.sh @ 0:611156829647 draft default tip
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
| author | erinija |
|---|---|
| date | Fri, 01 May 2020 12:07:46 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:611156829647 |
|---|---|
| 1 #!/bin/sh | |
| 2 | |
| 3 if test "$#" -ne 2; then | |
| 4 echo "" | |
| 5 echo " CALL " | |
| 6 echo " sh dnp-compute-composite.sh compute-composite-input.tabular compute-composite-output.tabular" | |
| 7 echo "" | |
| 8 echo " INPUT " | |
| 9 echo " compute-composite-input.tabular - dinucleotide frequency profiles containing all 16 dinucleotides" | |
| 10 echo "" | |
| 11 echo " OUTPUT " | |
| 12 echo " compute-composite-output.tabular - original input with WW SS RR YY columns added to the output" | |
| 13 echo "" | |
| 14 echo " DESCRIPTION" | |
| 15 echo " Composite dinucleotides weak/weak WW (A or T) , strong/strong SS (G or C), purine/purine RR (A or G), " | |
| 16 echo " and pyrimidine/pyrimidine YY (C or T) are generalized dinucleotide frequency patterns in nucleosome sequences." | |
| 17 echo " Given a tabular innput file with all 16 dinucleotides the composite patterns are computed as follows" | |
| 18 echo " WW=AA+AT+TA+TT, SS=CC+CG+GC+GG, RR=AG+GA+AA+GG, YY=CC+TT+CT+TC and their columns added to the original table." | |
| 19 echo "" | |
| 20 echo " Example columns of an input table" | |
| 21 echo " pos AA AC AG AT ..." | |
| 22 echo " -73 0.08616 0.08034 0.07146 0.05934 ..." | |
| 23 echo " -72 0.11976 0.04966 0.03412 0.07274 ..." | |
| 24 echo " -71 0.07202 0.08882 0.18912 0.0462 ..." | |
| 25 echo " ... " | |
| 26 echo " Example columns of computed composites " | |
| 27 echo " ... WW SS RR" | |
| 28 echo " ... 0.27644 0.1614 0.29494" | |
| 29 echo " ... 0.36788 0.1091 0.29428" | |
| 30 echo " ... 0.21406 0.12566 0.34432" | |
| 31 echo " ..." | |
| 32 | |
| 33 exit 1 | |
| 34 fi | |
| 35 | |
| 36 name=$1 | |
| 37 out=$2 | |
| 38 | |
| 39 ## TO DO test that all required nucleotide columns are in file | |
| 40 ## if not, stop the execution | |
| 41 | |
| 42 # add the column of composite profile at the end | |
| 43 ## WW = AA+TT+AT+TA | |
| 44 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="AA") print i; exit}' ${name}` | |
| 45 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="AT") print i; exit}' ${name}` | |
| 46 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="TA") print i; exit}' ${name}` | |
| 47 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="TT") print i; exit}' ${name}` | |
| 48 echo WW > ww | |
| 49 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> ww | |
| 50 | |
| 51 ## SS = CC+CG+GC+GG | |
| 52 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="CC") print i; exit}' ${name}` | |
| 53 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="CG") print i; exit}' ${name}` | |
| 54 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="GC") print i; exit}' ${name}` | |
| 55 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="GG") print i; exit}' ${name}` | |
| 56 echo SS > ss | |
| 57 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> ss | |
| 58 | |
| 59 ## RR = AA+AG+GA+GG | |
| 60 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="AA") print i; exit}' ${name}` | |
| 61 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="AG") print i; exit}' ${name}` | |
| 62 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="GA") print i; exit}' ${name}` | |
| 63 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="GG") print i; exit}' ${name}` | |
| 64 echo RR > rr | |
| 65 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 | awk '{print $1+$2+$3+$4}' >> rr | |
| 66 | |
| 67 ## YY = CC+CT+TC+TT | |
| 68 i1=`awk '{ for (i=1; i<=NF; i++) if($i=="CC") print i; exit}' ${name}` | |
| 69 i2=`awk '{ for (i=1; i<=NF; i++) if($i=="CT") print i; exit}' ${name}` | |
| 70 i3=`awk '{ for (i=1; i<=NF; i++) if($i=="TC") print i; exit}' ${name}` | |
| 71 i4=`awk '{ for (i=1; i<=NF; i++) if($i=="TT") print i; exit}' ${name}` | |
| 72 echo YY > yy | |
| 73 cut -f${i1},${i2},${i3},${i4} ${name}| tail -n +2 |awk '{print $1+$2+$3+$4}' >> yy | |
| 74 | |
| 75 paste ${name} ww ss rr yy > ${out} | |
| 76 | |
| 77 rm ww ss rr yy |
