annotate dnp-select-range.sh @ 0:611156829647 draft default tip

"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author erinija
date Fri, 01 May 2020 12:07:46 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
1 #!/bin/sh
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
2
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
3 if test "$#" -ne 5; then
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
4 echo ""
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
5 echo " CALL "
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
6 echo " sh dnp-select-range.sh select-range-input.tabular start length dinucleotides select-range-output.tabular"
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
7 echo ""
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
8 echo " INPUT "
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
9 echo " select-range-input.tabular - full length dinucleotide frequency profiles"
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
10 echo " start - start position of selection - identified start position of nucleosome's sequence"
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
11 echo " length - length of selection; default =146 nucleosome's length in base pairs"
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
12 echo " dinucleotides - any subset of dinucleotides enclosed by quotes 'AA AC AG AT CA CC ...'"
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
13 echo ""
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
14 echo " OUTPUT "
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
15 echo " select-range-output.tabular - rows of input file selected at a given position spanning a length"
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
16 echo ""
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
17 echo " DESCRIPTION"
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
18 echo " Selects rows from input table within a given range and adds a column with a position number."
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
19 echo ""
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
20 echo " Example of an input table"
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
21 echo " AA.f AA.r AC.f AC.r AG.f AG.r AT.f AT.r ..."
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
22 echo " 0.0763 0.067920 0.057800 0.078120 0.081600 0.061960 0.055600 0.044080 ..."
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
23 echo " 0.077160 0.073760 0.056000 0.072160 0.079400 0.060720 0.055960 0.047040 ..."
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
24 echo " 0.083320 0.071200 0.053840 0.080760 0.084560 0.064880 0.050440 0.048720 ..."
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
25 echo " 0.077960 0.068200 0.056040 0.075520 0.080120 0.061680 0.053160 0.047400 ..."
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
26 echo " 0.078200 0.069120 0.056880 0.074000 0.084360 0.060840 0.053520 0.046280 ..."
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
27 echo " ... "
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
28 echo ""
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
29 echo " Example of an output table where start=20"
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
30 echo " pos AA.f AC.f AG.f AT.f CA.f CC.f CG.f CT.f ..."
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
31 echo " 20 0.100200 0.084720 0.077200 0.072480 0.066160 0.044160 0.004560 0.060720 ..."
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
32 echo " 21 0.172440 0.024800 0.002080 0.101240 0.131840 0.007200 0.000320 0.095920 ..."
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
33 echo " 22 0.077160 0.096240 0.314320 0.047360 0.012040 0.028560 0.011840 0.013680 ..."
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
34 echo " ..."
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
35
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
36 exit 1
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
37 fi
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
38
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
39
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
40 # input file name
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
41 name=$1
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
42
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
43 # start position
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
44 startpos=$2
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
45
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
46 # length of selection (146bp)
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
47 length=$3
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
48
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
49 # list of dinucleotides
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
50 dinucleotides=$4
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
51
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
52 # output file
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
53 out=$5
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
54
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
55 # compute length of the file
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
56 len=`wc ${name} |awk '{print $1}'`
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
57 #echo $len
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
58
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
59 len=$((len-1))
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
60
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
61 # endpos
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
62 endpos=$((startpos+length))
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
63
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
64 cnum=1
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
65 # TO DO:should scheck if endpos is within the range
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
66 for di in ${dinucleotides}
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
67 do
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
68 # column number for forward and complementary profile
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
69 i1=`awk -v name=$di'.f' '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}`
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
70 i2=`awk -v name=$di'.r' '{ for (i=1; i<=NF; i++) if($i==name) print i; exit}' ${name}`
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
71
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
72 echo "$di.f" > forward.${di}
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
73 awk -v k=${i1} '{print $k}' $name | sed -n "${startpos},${endpos}p" >> forward.${di}
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
74 echo "$di.r"> complement.${di}
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
75 awk -v k=${i2} '{print $k}' $name | sed -n "${startpos},${endpos}p" >> complement.${di}
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
76 cnum=$((cnum+1))
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
77 done
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
78
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
79 # sequence positions within range
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
80 echo "pos" > seq.pos
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
81 seq ${startpos} 1 ${endpos} >> seq.pos
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
82
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
83 # paste forward
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
84 paste seq.pos forward.* > forward
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
85 paste complement.* > complement
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
86
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
87 # create output
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
88 paste forward complement > ${out}
611156829647 "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
89 rm seq.pos forward* complement*