Mercurial > repos > erinija > dnp_binary_strings
comparison dnp-select-range.sh @ 0:611156829647 draft default tip
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
| author | erinija |
|---|---|
| date | Fri, 01 May 2020 12:07:46 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:611156829647 |
|---|---|
| 1 #!/bin/sh | |
| 2 | |
| 3 if test "$#" -ne 5; then | |
| 4 echo "" | |
| 5 echo " CALL " | |
| 6 echo " sh dnp-select-range.sh select-range-input.tabular start length dinucleotides select-range-output.tabular" | |
| 7 echo "" | |
| 8 echo " INPUT " | |
| 9 echo " select-range-input.tabular - full length dinucleotide frequency profiles" | |
| 10 echo " start - start position of selection - identified start position of nucleosome's sequence" | |
| 11 echo " length - length of selection; default =146 nucleosome's length in base pairs" | |
| 12 echo " dinucleotides - any subset of dinucleotides enclosed by quotes 'AA AC AG AT CA CC ...'" | |
| 13 echo "" | |
| 14 echo " OUTPUT " | |
| 15 echo " select-range-output.tabular - rows of input file selected at a given position spanning a length" | |
| 16 echo "" | |
| 17 echo " DESCRIPTION" | |
| 18 echo " Selects rows from input table within a given range and adds a column with a position number." | |
| 19 echo "" | |
| 20 echo " Example of an input table" | |
| 21 echo " AA.f AA.r AC.f AC.r AG.f AG.r AT.f AT.r ..." | |
| 22 echo " 0.0763 0.067920 0.057800 0.078120 0.081600 0.061960 0.055600 0.044080 ..." | |
| 23 echo " 0.077160 0.073760 0.056000 0.072160 0.079400 0.060720 0.055960 0.047040 ..." | |
| 24 echo " 0.083320 0.071200 0.053840 0.080760 0.084560 0.064880 0.050440 0.048720 ..." | |
| 25 echo " 0.077960 0.068200 0.056040 0.075520 0.080120 0.061680 0.053160 0.047400 ..." | |
| 26 echo " 0.078200 0.069120 0.056880 0.074000 0.084360 0.060840 0.053520 0.046280 ..." | |
| 27 echo " ... " | |
| 28 echo "" | |
| 29 echo " Example of an output table where start=20" | |
| 30 echo " pos AA.f AC.f AG.f AT.f CA.f CC.f CG.f CT.f ..." | |
| 31 echo " 20 0.100200 0.084720 0.077200 0.072480 0.066160 0.044160 0.004560 0.060720 ..." | |
| 32 echo " 21 0.172440 0.024800 0.002080 0.101240 0.131840 0.007200 0.000320 0.095920 ..." | |
| 33 echo " 22 0.077160 0.096240 0.314320 0.047360 0.012040 0.028560 0.011840 0.013680 ..." | |
| 34 echo " ..." | |
| 35 | |
| 36 exit 1 | |
| 37 fi | |
| 38 | |
| 39 | |
| 40 # input file name | |
| 41 name=$1 | |
| 42 | |
| 43 # start position | |
| 44 startpos=$2 | |
| 45 | |
| 46 # length of selection (146bp) | |
| 47 length=$3 | |
| 48 | |
| 49 # list of dinucleotides | |
| 50 dinucleotides=$4 | |
| 51 | |
| 52 # output file | |
| 53 out=$5 | |
| 54 | |
| 55 # compute length of the file | |
| 56 len=`wc ${name} |awk '{print $1}'` | |
| 57 #echo $len | |
| 58 | |
| 59 len=$((len-1)) | |
| 60 | |
| 61 # endpos | |
| 62 endpos=$((startpos+length)) | |
| 63 | |
| 64 cnum=1 | |
| 65 # TO DO:should scheck if endpos is within the range | |
| 66 for di in ${dinucleotides} | |
| 67 do | |
| 68 # column number for forward and complementary profile | |
| 69 i1=`awk -v name=$di'.f' '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}` | |
| 70 i2=`awk -v name=$di'.r' '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}` | |
| 71 | |
| 72 echo "$di.f" > forward.${di} | |
| 73 awk -v k=${i1} '{print $k}' $name | sed -n "${startpos},${endpos}p" >> forward.${di} | |
| 74 echo "$di.r"> complement.${di} | |
| 75 awk -v k=${i2} '{print $k}' $name | sed -n "${startpos},${endpos}p" >> complement.${di} | |
| 76 cnum=$((cnum+1)) | |
| 77 done | |
| 78 | |
| 79 # sequence positions within range | |
| 80 echo "pos" > seq.pos | |
| 81 seq ${startpos} 1 ${endpos} >> seq.pos | |
| 82 | |
| 83 # paste forward | |
| 84 paste seq.pos forward.* > forward | |
| 85 paste complement.* > complement | |
| 86 | |
| 87 # create output | |
| 88 paste forward complement > ${out} | |
| 89 rm seq.pos forward* complement* |
