Mercurial > repos > erinija > dnp_smooth
view shuffle_sequences.sh @ 0:7e69f62cb516 draft default tip
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author | erinija |
---|---|
date | Fri, 01 May 2020 12:10:21 +0000 |
parents | |
children |
line wrap: on
line source
#!/bin/sh if test "$#" -ne 3; then echo "" echo "" echo " CALL " echo " sh shuffle_sequences.sh input.fasta k output.fasta" echo "" echo " INPUT" echo " input.fasta - input fasta file " echo " k - 'preserve frequency of k letters (dinucleotide k=2)'" echo "" echo " OUTPUT" echo " output.fasta - shuffle fasta file " echo "" echo " DESCRIPTION" echo " Wrapper for ushuffle program Created by M Jiang. Reference PMID: 18405375 ." echo " The ushuffle shuffles nucleotides in fasta sequence preserving frequency" echo " of k-mers, a positional order is changed. By default k=2." echo " A sequence in fasta file is shuffled only once. The output fasta" echo " in the identifier >line contains the original sequence; a shuffled sequence " echo " is output on the second line." echo "" echo " REQUIREMENT" echo " ushuffle installed" echo " conda install -c bioconda ushuffle" echo "" exit 1 fi name=$1 diset=$2 out=$3 # comment for a galaxy tool call=ushuffle # prepare fasta into tabular cat ${name} | awk 'BEGIN{RS=">";OFS="\t"}NR>1{print ">"$1,$2}' > temp.tab.fa # read the tab file and create shuffled fasta while read -r line; do seq=`echo $line | tr " " "\t" | cut -f2` useq=`${call} -s ${seq} -k ${diset} -n 1`; echo $line echo $useq done < temp.tab.fa > ${out} rm temp.tab.fa