Mercurial > repos > erinija > dnp_compute_composite
view dnp-symmetrize.sh @ 0:92b1f9bacad7 draft default tip
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author | erinija |
---|---|
date | Fri, 01 May 2020 12:12:16 +0000 |
parents | |
children |
line wrap: on
line source
#!/bin/sh if test "$#" -ne 2; then echo "" echo " CALL " echo " sh dnp-symmetrize.sh symmetrize-input.tabular symmetrize-output.tabular" echo "" echo " INPUT " echo " symmetrize-input.tabular - selected length dinucleotide frequency profiles from forward and complementary sequences" echo "" echo " OUTPUT " echo " symmetrize-output.tabular - symmetrized output" echo "" echo " DESCRIPTION" echo " Symmetrization of dinucleotide profiles reveals patterns with respect to nucleosome's dyad position." echo " Frequency profiles of each dinucleotide derived from forward and complementary sequences are superimposed " echo " with respect to a center of the dyad by averaging forward and reverse complement profiles at an identified" echo " nucleosome's location. A first column contains position number relative to the dyad position." echo "" echo " Example of an input table" echo " pos AA.f AC.f AG.f AT.f CA.f CC.f CG.f CT.f ..." echo " 20 0.100200 0.084720 0.077200 0.072480 0.066160 0.044160 0.004560 0.060720" echo " 21 0.172440 0.024800 0.002080 0.101240 0.131840 0.007200 0.000320 0.095920" echo " 22 0.077160 0.096240 0.314320 0.047360 0.012040 0.028560 0.011840 0.013680" echo " ..." echo "" echo " Example of a few columns of an output table" echo " pos AA AC AG AT ..." echo " -73 0.08616 0.08034 0.07146 0.05934 ..." echo " -72 0.11976 0.04966 0.03412 0.07274 ..." echo " -71 0.07202 0.08882 0.18912 0.0462 ..." echo " ... " exit 1 fi # # input file name name=$1 # symmetrize all dinucleotides that are in the table # output file out=$2 # compute centerd sequence position posi=`awk -v name=pos '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}` posnum=`awk -v k=${posi} '{print $k}' $name |head -n2| tail -n1` echo "pos" > positions awk -v k=${posi} '{print $k}' $name | tail -n +2 | awk -v offset=$((posnum-1)) '{print $1-offset-74}'>> positions # get the dinucleotides from the first line dinucleotides=`head -n1 ${name} | sed 's/pos//' | sed 's/.f//g' | sed 's/.r//g'` #cnum=1 for di in ${dinucleotides} do # column number for forward and complementary profile i1=`awk -v name=$di'.f' '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}` i2=`awk -v name=$di'.r' '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}` awk -v k=${i1} '{print $k}' $name | grep -v ".f" > temp.${i1} awk -v k=${i2} '{print $k}' $name | grep -v ".r" | tac > temp.${i2} echo ${di} > cp.${di} paste temp.${i1} temp.${i2} | awk '{print ($1+$2)/2 }' >> cp.${di} #cnum=$((cnum+1)) done paste positions cp.* > ${out} rm temp* cp* positions