Mercurial > repos > erinija > dnp_correlation_between_profiles
diff dnp-symmetrize.sh @ 0:b45de206654d draft default tip
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author | erinija |
---|---|
date | Fri, 01 May 2020 12:08:23 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dnp-symmetrize.sh Fri May 01 12:08:23 2020 +0000 @@ -0,0 +1,73 @@ +#!/bin/sh + +if test "$#" -ne 2; then + +echo "" +echo " CALL " +echo " sh dnp-symmetrize.sh symmetrize-input.tabular symmetrize-output.tabular" +echo "" +echo " INPUT " +echo " symmetrize-input.tabular - selected length dinucleotide frequency profiles from forward and complementary sequences" +echo "" +echo " OUTPUT " +echo " symmetrize-output.tabular - symmetrized output" +echo "" +echo " DESCRIPTION" +echo " Symmetrization of dinucleotide profiles reveals patterns with respect to nucleosome's dyad position." +echo " Frequency profiles of each dinucleotide derived from forward and complementary sequences are superimposed " +echo " with respect to a center of the dyad by averaging forward and reverse complement profiles at an identified" +echo " nucleosome's location. A first column contains position number relative to the dyad position." +echo "" +echo " Example of an input table" +echo " pos AA.f AC.f AG.f AT.f CA.f CC.f CG.f CT.f ..." +echo " 20 0.100200 0.084720 0.077200 0.072480 0.066160 0.044160 0.004560 0.060720" +echo " 21 0.172440 0.024800 0.002080 0.101240 0.131840 0.007200 0.000320 0.095920" +echo " 22 0.077160 0.096240 0.314320 0.047360 0.012040 0.028560 0.011840 0.013680" +echo " ..." +echo "" +echo " Example of a few columns of an output table" +echo " pos AA AC AG AT ..." +echo " -73 0.08616 0.08034 0.07146 0.05934 ..." +echo " -72 0.11976 0.04966 0.03412 0.07274 ..." +echo " -71 0.07202 0.08882 0.18912 0.0462 ..." +echo " ... " + + exit 1 +fi + +# +# input file name +name=$1 +# symmetrize all dinucleotides that are in the table + +# output file +out=$2 + +# compute centerd sequence position +posi=`awk -v name=pos '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}` +posnum=`awk -v k=${posi} '{print $k}' $name |head -n2| tail -n1` + +echo "pos" > positions +awk -v k=${posi} '{print $k}' $name | tail -n +2 | awk -v offset=$((posnum-1)) '{print $1-offset-74}'>> positions + + +# get the dinucleotides from the first line +dinucleotides=`head -n1 ${name} | sed 's/pos//' | sed 's/.f//g' | sed 's/.r//g'` + +#cnum=1 +for di in ${dinucleotides} +do + # column number for forward and complementary profile + i1=`awk -v name=$di'.f' '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}` + i2=`awk -v name=$di'.r' '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}` + awk -v k=${i1} '{print $k}' $name | grep -v ".f" > temp.${i1} + awk -v k=${i2} '{print $k}' $name | grep -v ".r" | tac > temp.${i2} + echo ${di} > cp.${di} + paste temp.${i1} temp.${i2} | awk '{print ($1+$2)/2 }' >> cp.${di} + + #cnum=$((cnum+1)) + +done + +paste positions cp.* > ${out} +rm temp* cp* positions