# HG changeset patch # User drosofff # Date 1401563535 14400 # Node ID ad813be0021530177658750f21de52d051e5507a # Parent 2f536ef15f496c256dba12fe2df6f47939fc316c Uploaded diff -r 2f536ef15f49 -r ad813be00215 YAC/test-data/yac.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/YAC/test-data/yac.fastq Sat May 31 15:12:15 2014 -0400 @@ -0,0 +1,40 @@ +@SRR290479.1 HWI-EAS285:2:1:66:28/1 +TGTAAACATCCCCGACTGGCAGCATNTCGTATGCCG ++ +B@BBCBCCBCBCCC8A<@################## +@SRR290479.2 HWI-EAS285:2:1:67:348/1 +AAAGTGCTACTACTTTTGAGTCTATNTCGTACGCCG ++ +BAA@7?A@@A@@B<'25?6>59:;7#B7@44#;;324'117? +@SRR290479.4 HWI-EAS285:2:1:68:65/1 +ACTGGACTTGGAGTCCGAAGGCATCNCGTATTCCGT ++ +BBB@@ABAAB?9B42&9;################## +@SRR290479.5 HWI-EAS285:2:1:69:594/1 +AAGTGCCGCCAGGTTTTGAGTGGATNTCGTATGGCG ++ +AB?5;3>/=?>=;416481################# +@SRR290479.6 HWI-EAS285:2:1:70:700/1 +TATTGCACTTGTCCCGGCCTGAATCNCGTATCCCGT ++ +BCB=:ACCBB=>BB8<-################### +@SRR290479.7 HWI-EAS285:2:1:70:1679/1 +TGGTAGACTATGGAACGTAGGATCTNGCATGCCGCC ++ +BCBBCCBCCCBCCA?AB>:B@><>############ +@SRR290479.8 HWI-EAS285:2:1:71:1400/1 +AGTGGTAGAGCATTTGAATCTCGTANGCCGTCTTCT ++ +7@BC>>@55CCBCA3CBA14B.A16#*;9359B### +@SRR290479.9 HWI-EAS285:2:1:71:795/1 +TAGCTTATCAGACTGATGTTGACATNTCGTACGCCG ++ +BBBBBCBBCB;>AA',9=18?1:7:#<;57###### +@SRR290479.10 HWI-EAS285:2:1:71:596/1 +TTTGGCAATGGTAGAACTCCCACACNTCGTAGGCCG ++ +B@B>7>9A@<46B@79972################# diff -r 2f536ef15f49 -r ad813be00215 YAC/test-data/yac.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/YAC/test-data/yac.out Sat May 31 15:12:15 2014 -0400 @@ -0,0 +1,12 @@ +>1 +TGTAAACATCCCCGACTGGCAGC +>2 +AAAGTGCTACTACTTTTGAGTCT +>3 +ACTGGACTTGGAGTCCGAAGGC +>4 +AAGTGCCGCCAGGTTTTGAGTGG +>5 +TATTGCACTTGTCCCGGCCTGAATCNCGT +>6 +TAGCTTATCAGACTGATGTTGAC diff -r 2f536ef15f49 -r ad813be00215 YAC/yac.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/YAC/yac.py Sat May 31 15:12:15 2014 -0400 @@ -0,0 +1,83 @@ +#!/usr/bin/python +# yac = yet another clipper +# v 1.0.0 +# Usage yac.py $input $output $adapter_to_clip $min $max $Nmode +# Christophe Antoniewski + +import sys, string + +class Clip: + def __init__(self, inputfile, outputfile, adapter, minsize, maxsize): + self.inputfile = inputfile + self.outputfile = outputfile + self.adapter = adapter + self.minsize = int(minsize) + self.maxsize = int(maxsize) + def motives (sequence): + '''return a list of motives for perfect (6nt) or imperfect (7nt with one mismatch) search on import string module''' + sequencevariants = [sequence[0:6]] # initializes the list with the 6mer perfect match + dicsubst= {"A":"TGCN", "T":"AGCN", "G":"TACN", "C":"GATN"} + for pos in enumerate(sequence[:6]): + for subst in dicsubst[pos[1]]: + sequencevariants.append(sequence[:pos[0]]+ subst + sequence[pos[0]+1:7]) + return sequencevariants + self.adaptmotifs= motives(self.adapter) + + def scanadapt(self, adaptmotives=[], sequence=""): + '''scans sequence for adapter motives''' + if sequence.rfind(adaptmotives[0]) != -1: + return sequence[:sequence.rfind(adaptmotives[0])] + for motif in adaptmotives[1:]: + if sequence.rfind(motif) != -1: + return sequence[:sequence.rfind(motif)] + return sequence + + def clip_with_N (self): + '''clips adapter sequences from inputfile. + Reads containing N are retained.''' + iterator = 0 + id = 0 + F = open (self.inputfile, "r") + O = open (self.outputfile, "w") + for line in F: + iterator += 1 + if iterator % 4 == 2: + trim = self.scanadapt (self.adaptmotifs, line.rstrip() ) + if self.minsize <= len(trim) <= self.maxsize: + id += 1 + print >> O, ">%i\n%s" % (id, trim) + F.close() + O.close() + def clip_without_N (self): + '''clips adapter sequences from inputfile. + Reads containing N are rejected.''' + iterator = 0 + id = 0 + F = open (self.inputfile, "r") + O = open (self.outputfile, "w") + for line in F: + iterator += 1 + if iterator % 4 == 2: + trim = self.scanadapt (self.adaptmotifs, line.rstrip() ) + if "N" in trim: continue + if self.minsize <= len(trim) <= self.maxsize: + id += 1 + print >> O, ">%i\n%s" % (id, trim) + F.close() + O.close() + +def __main__ (inputfile, outputfile, adapter, minsize, maxsize, Nmode): + instanceClip = Clip (inputfile, outputfile, adapter, minsize, maxsize) + if Nmode == "accept": + instanceClip.clip_with_N() + else: + instanceClip.clip_without_N() + +if __name__ == "__main__" : + input = sys.argv[1] + output = sys.argv[2] + adapter = sys.argv[3] + minsize = sys.argv[4] + maxsize = sys.argv[5] + Nmode = sys.argv[6] + __main__(input, output, adapter, minsize, maxsize, Nmode) diff -r 2f536ef15f49 -r ad813be00215 YAC/yac.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/YAC/yac.xml Sat May 31 15:12:15 2014 -0400 @@ -0,0 +1,58 @@ + + + yac.py $input $output $clip_source.clip_sequence $min $max $Nmode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +This tool clips adapter sequences from a fastq file and fasta file of clipped reads with renumbered fasta headers. + +Clipped sequences with Ns can be discarded. + +Min size and max size filter clipped reads on their size. + +Note that unclipped reads that satisfy the min and max size conditions are kept. + + + + + + + + + + + + + + + + diff -r 2f536ef15f49 -r ad813be00215 test-data/yac.fastq --- a/test-data/yac.fastq Sat May 17 18:41:24 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,40 +0,0 @@ -@SRR290479.1 HWI-EAS285:2:1:66:28/1 -TGTAAACATCCCCGACTGGCAGCATNTCGTATGCCG -+ -B@BBCBCCBCBCCC8A<@################## -@SRR290479.2 HWI-EAS285:2:1:67:348/1 -AAAGTGCTACTACTTTTGAGTCTATNTCGTACGCCG -+ -BAA@7?A@@A@@B<'25?6>59:;7#B7@44#;;324'117? -@SRR290479.4 HWI-EAS285:2:1:68:65/1 -ACTGGACTTGGAGTCCGAAGGCATCNCGTATTCCGT -+ -BBB@@ABAAB?9B42&9;################## -@SRR290479.5 HWI-EAS285:2:1:69:594/1 -AAGTGCCGCCAGGTTTTGAGTGGATNTCGTATGGCG -+ -AB?5;3>/=?>=;416481################# -@SRR290479.6 HWI-EAS285:2:1:70:700/1 -TATTGCACTTGTCCCGGCCTGAATCNCGTATCCCGT -+ -BCB=:ACCBB=>BB8<-################### -@SRR290479.7 HWI-EAS285:2:1:70:1679/1 -TGGTAGACTATGGAACGTAGGATCTNGCATGCCGCC -+ -BCBBCCBCCCBCCA?AB>:B@><>############ -@SRR290479.8 HWI-EAS285:2:1:71:1400/1 -AGTGGTAGAGCATTTGAATCTCGTANGCCGTCTTCT -+ -7@BC>>@55CCBCA3CBA14B.A16#*;9359B### -@SRR290479.9 HWI-EAS285:2:1:71:795/1 -TAGCTTATCAGACTGATGTTGACATNTCGTACGCCG -+ -BBBBBCBBCB;>AA',9=18?1:7:#<;57###### -@SRR290479.10 HWI-EAS285:2:1:71:596/1 -TTTGGCAATGGTAGAACTCCCACACNTCGTAGGCCG -+ -B@B>7>9A@<46B@79972################# diff -r 2f536ef15f49 -r ad813be00215 test-data/yac.out --- a/test-data/yac.out Sat May 17 18:41:24 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ ->1 -TGTAAACATCCCCGACTGGCAGC ->2 -AAAGTGCTACTACTTTTGAGTCT ->3 -ACTGGACTTGGAGTCCGAAGGC ->4 -AAGTGCCGCCAGGTTTTGAGTGG ->5 -TATTGCACTTGTCCCGGCCTGAATCNCGT ->6 -TAGCTTATCAGACTGATGTTGAC