$name demultiplex

# HG changeset patch # User davidvanzessen # Date 1404217677 14400 # Node ID 79be0752711d9b094f972f65adca6286329acfe5 # Parent afddfd016ba6614328ca17aeaab509ec46e92616 Uploaded diff -r afddfd016ba6 -r 79be0752711d demultiplex.xml --- a/demultiplex.xml Thu May 08 07:50:42 2014 -0400 +++ b/demultiplex.xml Tue Jul 01 08:27:57 2014 -0400 @@ -4,7 +4,7 @@ fastx_toolkit - r_wrapper.sh $input $out_file $out_file.files_path $where $mismatches $partial $input.name + r_wrapper.sh $input $out_file $out_file.files_path $where $mismatches $partial $input.name $trim_start $trim_end #for $i, $b in enumerate($barcodes) "$b.id" "$b.mid" @@ -173,10 +173,17 @@ - + + + + + + + + diff -r afddfd016ba6 -r 79be0752711d r_wrapper.sh --- a/r_wrapper.sh Thu May 08 07:50:42 2014 -0400 +++ b/r_wrapper.sh Tue Jul 01 08:27:57 2014 -0400 @@ -10,9 +10,11 @@ ext="${name##*.}" name="${name%.*}" prefix=$name"_" +trim_start=$8 +trim_end=$9 dir="$(cd "$(dirname "$0")" && pwd)" -for ((i=8;i<=$#;i=i+2)) +for ((i=10;i<=$#;i=i+2)) do j=$((i+1)) echo -e "${!i}\t${!j}" >> $outDir/barcodes.txt @@ -22,7 +24,7 @@ echo "$3" result=`$dir/sff2fastq $input | $dir/fastx_barcode_splitter.pl --bcfile $outDir/barcodes.txt --prefix "$prefix" --suffix ".fastq" --$EOL --mismatches $mismatches --partial $partial` echo "$result" | tail -n +2 | sed 's/\t/,/g' > output.txt -echo "$name demultiplex" >> $output +echo "$name demultiplex

ID	Count	FASTQ	FASTA

" >> $output ls while IFS=, read barcode count location do @@ -33,8 +35,7 @@ fi file=$name"_"$barcode cat $file.fastq | awk 'NR%4==1{printf ">%s\n", substr($0,2)}NR%4==2{print}' > $file.fasta - #cat $file.fastq | perl -e '$i=0;while(< >){if(/^\@/&&$i==0){s/^\@/\>/;print;}elsif($i==1){print;$i=-3}$i++;}' > $file.fasta - #cat $file.fastq - echo "" >> $output + python $dir/trim.py --input $file.fasta --output ${file}_trimmed.fasta --start $trim_start --end $trim_end + echo "" >> $output done < output.txt -echo "" >> $output +echo "Original fasta after trim" >> $output diff -r afddfd016ba6 -r 79be0752711d trim.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trim.py Tue Jul 01 08:27:57 2014 -0400 @@ -0,0 +1,35 @@ +import argparse + +#docs.python.org/dev/library/argparse.html +parser = argparse.ArgumentParser() +parser.add_argument("--input", help="Input folder with files") +parser.add_argument("--output", help="Output file") +parser.add_argument("--start", help="How many nucleotides to trim from the start", type=int) +parser.add_argument("--end", help="How many nucleotides to trim from the end", type=int) + +args = parser.parse_args() +start = int(args.start) +end = int(args.end) + +if end <= 0: + import shutil + shutil.copy(args.input, args.output) + import sys + sys.exit() + +currentSeq = "" +currentId = "" + +with open(args.input, 'r') as i: + with open(args.output, 'w') as o: + for line in i.readlines(): + if line[0] is ">": + if currentSeq is not "" or currentId is not "": + o.write(currentId) + o.write(currentSeq[start:-end] + "\n") + currentId = line + currentSeq = "" + else: + currentSeq += line.rstrip() + o.write(currentId) + o.write(currentSeq.rstrip()[start:-end] + "\n")

ID	Count	FASTQ	FASTA	Trimmed FASTA
$barcode	$count	$file.fastq	$file.fasta
$barcode	$count	$file.fastq	$file.fasta	${file}_trimmed.fasta