Mercurial > repos > erinija > plot_selected
comparison dnp-subset-dinuc-profile.sh @ 0:448204d12325 draft default tip
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
| author | erinija |
|---|---|
| date | Fri, 01 May 2020 12:13:00 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:448204d12325 |
|---|---|
| 1 #!/bin/sh | |
| 2 if test "$#" -ne 3; then | |
| 3 | |
| 4 echo " CALL " | |
| 5 echo " sh subset_dinuc_profile.sh input.fasta dinucleotides output" | |
| 6 echo "" | |
| 7 echo " INPUT" | |
| 8 echo " input.fasta - a batch of nucleosome (or any DNA) DNA sequences " | |
| 9 echo " dinucleotides - any subset of dinucleotides enclosed by quotes as 'AA AC AG AT CA CC' " | |
| 10 echo "" | |
| 11 echo " OUTPUT" | |
| 12 echo " output - file name to write the output in tabular format, columns have names as AA.f AA.r ..." | |
| 13 echo "" | |
| 14 echo " DESCRIPTION" | |
| 15 echo " Compute dinucleotide frequency profiles on forward and its complementary " | |
| 16 echo " sequences from a batch of fasta sequences. Output columns are labelled by AA.f, AA.r ... " | |
| 17 echo "" | |
| 18 echo " Example of input fasta lines" | |
| 19 echo " >chr9:42475963-42476182" | |
| 20 echo " CCAGGCAGACCCCATATTCAAGCTGCTGCCCCAGGGTGGTGTACAGATCTGGGGAGAAGAAGGATGA" | |
| 21 echo " >chr9:42476175-42476394" | |
| 22 echo " TCTGCACTCCAGCATGCCTGAGGAGAGGAGGGAATGCAGGATCCTAGTGGAAAGAGTACCAAGCTGG" | |
| 23 echo "" | |
| 24 echo " Example of output table" | |
| 25 echo " AA.f AA.r AC.f AC.r ..." | |
| 26 echo " 0.076000 0.059000 0.065000 0.078000 ..." | |
| 27 echo " 0.082000 0.060000 0.057000 0.076000 ..." | |
| 28 echo " 0.067000 0.075000 0.049000 0.071000 ..." | |
| 29 echo "" | |
| 30 echo "" | |
| 31 echo " REQUIREMENT" | |
| 32 echo " dnp-diprofile installed" | |
| 33 echo " conda install -c bioconda dnp-diprofile" | |
| 34 | |
| 35 exit 0 | |
| 36 fi | |
| 37 | |
| 38 name=$1 | |
| 39 diset=$2 | |
| 40 out=$3 | |
| 41 | |
| 42 call=dnp-diprofile | |
| 43 | |
| 44 ## the dinucleotide profiles are computed for the subset of dinucleotides listed in $diset | |
| 45 ## the profiles are outputs as columns of a table | |
| 46 | |
| 47 # prepare fasta, we copy here because | |
| 48 # in galaxy we don't have fa ending which is required by the dinuc | |
| 49 cp ${name} ${name}.fa | |
| 50 | |
| 51 # compute length of the fasta sequence | |
| 52 seq=`head -n2 $name | tail -n1` | |
| 53 len=${#seq} | |
| 54 #echo "Sequence length = " $len | |
| 55 | |
| 56 | |
| 57 # for each dinucleotide compute the forward | |
| 58 # and complementary profile and save | |
| 59 # in separate columns that will be merged in the end | |
| 60 for di in ${diset} | |
| 61 do | |
| 62 #echo ${di} | |
| 63 echo ${di}.f > ${di}.f | |
| 64 ${call} ${name}.fa -di ${di} -sl ${len} >> ${di}.f | |
| 65 echo ${di}.r > ${di}.r | |
| 66 ${call} ${name}.fa -di ${di} -sl ${len} -c >> ${di}.r | |
| 67 echo ${di}.f >> names | |
| 68 echo ${di}.r >> names | |
| 69 done; | |
| 70 paste `cat names` > ${out} | |
| 71 rm names | |
| 72 rm ${name}.fa | |
| 73 rm *.f *.r | |
| 74 exit 0 |
