5
|
1 #!/usr/bin/python
|
|
2 # yac = yet another clipper
|
|
3 # v 1.0.0
|
|
4 # Usage yac.py $input $output $adapter_to_clip $min $max $Nmode
|
|
5 # Christophe Antoniewski <drosofff@gmail.com>
|
|
6
|
|
7 import sys, string
|
|
8
|
|
9 class Clip:
|
|
10 def __init__(self, inputfile, outputfile, adapter, minsize, maxsize):
|
|
11 self.inputfile = inputfile
|
|
12 self.outputfile = outputfile
|
|
13 self.adapter = adapter
|
|
14 self.minsize = int(minsize)
|
|
15 self.maxsize = int(maxsize)
|
|
16 def motives (sequence):
|
|
17 '''return a list of motives for perfect (6nt) or imperfect (7nt with one mismatch) search on import string module'''
|
|
18 sequencevariants = [sequence[0:6]] # initializes the list with the 6mer perfect match
|
|
19 dicsubst= {"A":"TGCN", "T":"AGCN", "G":"TACN", "C":"GATN"}
|
|
20 for pos in enumerate(sequence[:6]):
|
|
21 for subst in dicsubst[pos[1]]:
|
|
22 sequencevariants.append(sequence[:pos[0]]+ subst + sequence[pos[0]+1:7])
|
|
23 return sequencevariants
|
|
24 self.adaptmotifs= motives(self.adapter)
|
|
25
|
|
26 def scanadapt(self, adaptmotives=[], sequence=""):
|
|
27 '''scans sequence for adapter motives'''
|
|
28 if sequence.rfind(adaptmotives[0]) != -1:
|
|
29 return sequence[:sequence.rfind(adaptmotives[0])]
|
|
30 for motif in adaptmotives[1:]:
|
|
31 if sequence.rfind(motif) != -1:
|
|
32 return sequence[:sequence.rfind(motif)]
|
|
33 return sequence
|
|
34
|
|
35 def clip_with_N (self):
|
|
36 '''clips adapter sequences from inputfile.
|
|
37 Reads containing N are retained.'''
|
|
38 iterator = 0
|
|
39 id = 0
|
|
40 F = open (self.inputfile, "r")
|
|
41 O = open (self.outputfile, "w")
|
|
42 for line in F:
|
|
43 iterator += 1
|
|
44 if iterator % 4 == 2:
|
|
45 trim = self.scanadapt (self.adaptmotifs, line.rstrip() )
|
|
46 if self.minsize <= len(trim) <= self.maxsize:
|
|
47 id += 1
|
|
48 print >> O, ">%i\n%s" % (id, trim)
|
|
49 F.close()
|
|
50 O.close()
|
|
51 def clip_without_N (self):
|
|
52 '''clips adapter sequences from inputfile.
|
|
53 Reads containing N are rejected.'''
|
|
54 iterator = 0
|
|
55 id = 0
|
|
56 F = open (self.inputfile, "r")
|
|
57 O = open (self.outputfile, "w")
|
|
58 for line in F:
|
|
59 iterator += 1
|
|
60 if iterator % 4 == 2:
|
|
61 trim = self.scanadapt (self.adaptmotifs, line.rstrip() )
|
|
62 if "N" in trim: continue
|
|
63 if self.minsize <= len(trim) <= self.maxsize:
|
|
64 id += 1
|
|
65 print >> O, ">%i\n%s" % (id, trim)
|
|
66 F.close()
|
|
67 O.close()
|
|
68
|
|
69 def __main__ (inputfile, outputfile, adapter, minsize, maxsize, Nmode):
|
|
70 instanceClip = Clip (inputfile, outputfile, adapter, minsize, maxsize)
|
|
71 if Nmode == "accept":
|
|
72 instanceClip.clip_with_N()
|
|
73 else:
|
|
74 instanceClip.clip_without_N()
|
|
75
|
|
76 if __name__ == "__main__" :
|
|
77 input = sys.argv[1]
|
|
78 output = sys.argv[2]
|
|
79 adapter = sys.argv[3]
|
|
80 minsize = sys.argv[4]
|
|
81 maxsize = sys.argv[5]
|
|
82 Nmode = sys.argv[6]
|
|
83 __main__(input, output, adapter, minsize, maxsize, Nmode)
|