Mercurial > repos > rnateam > graphclust_preprocessing
comparison splitSHAPE.py @ 6:e31c659be8bc draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 6767a5ffb02052c844e9d862c79912f998f39d8e
| author | rnateam |
|---|---|
| date | Mon, 20 Nov 2017 05:01:21 -0500 |
| parents | f4ad5dceb619 |
| children |
comparison
equal
deleted
inserted
replaced
| 5:f4ad5dceb619 | 6:e31c659be8bc |
|---|---|
| 1 import os | 1 import os |
| 2 import re | 2 import re |
| 3 import sys | 3 import sys |
| 4 | 4 |
| 5 shape_file = sys.argv[1] | 5 shape_file = sys.argv[1] |
| 6 win_size = int(sys.argv[2]) | |
| 7 | 6 |
| 8 pattern = re.compile("^>.*$") | 7 pattern = re.compile("^>.*$") |
| 9 toWrite = "" | 8 toWrite = "" |
| 10 | 9 |
| 11 count_for_id = 1 | 10 count_for_id = 1 |
| 16 seq_string = [] | 15 seq_string = [] |
| 17 orig_id = [] | 16 orig_id = [] |
| 18 name_file = "FASTA/data.names" | 17 name_file = "FASTA/data.names" |
| 19 array_all_chunks = [] | 18 array_all_chunks = [] |
| 20 with open(name_file, 'r') as f: | 19 with open(name_file, 'r') as f: |
| 21 content = f.read() | 20 for line in f: |
| 22 lines = content.split('\n')[:-1] | 21 if len(line.strip()) == 0: |
| 23 for line in lines: | 22 continue |
| 24 seq_id.append(int(line.split()[0])) | 23 seq_id.append(int(line.split()[0])) |
| 25 seq_string.append(line.split()[1]) | 24 seq_string.append(line.split()[1]) |
| 26 orig_id_srt = line.split()[3] | 25 orig_id_srt = line.split()[3] |
| 27 orig_id_srt = orig_id_srt.rsplit('_',1)[0] | 26 orig_id_srt = orig_id_srt.rsplit('_',1)[0] |
| 28 orig_id.append(orig_id_srt) | 27 orig_id.append(orig_id_srt) |
| 34 with open(shape_file, 'r') as shape: | 33 with open(shape_file, 'r') as shape: |
| 35 content = shape.read() | 34 content = shape.read() |
| 36 lines = content.split('\n') | 35 lines = content.split('\n') |
| 37 for line in lines: | 36 for line in lines: |
| 38 if pattern.match(line): | 37 if pattern.match(line): |
| 39 line = line.replace('>','').strip() | 38 line = line.replace('>','').split()[0] |
| 40 react_arr=[] | 39 react_arr=[] |
| 41 react_dict[line] = react_arr | 40 react_dict[line] = react_arr |
| 42 continue | 41 continue |
| 43 else: | 42 else: |
| 44 react_arr.append(line) | 43 react_arr.append(line) |
| 45 | 44 |
| 46 toWrite = "" | 45 toWrite = "" |
| 47 chunks = [] | 46 chunks = [] |
| 48 for i in range(len(orig_id)): | 47 for i in range(len(orig_id)): |
| 49 if not orig_id[i] in react_dict: | 48 if not orig_id[i] in react_dict: |
| 50 raise RuntimeError('Error key {} not found'.format(orig_id)) | 49 raise RuntimeError('Error key {} {} not found'.format(i, orig_id[i])) |
| 51 | 50 |
| 52 react_val = react_dict[orig_id[i]] | 51 react_val = react_dict[orig_id[i]] |
| 53 toWrite += '>' + str(seq_id[i]) + " " + seq_string[i] + "\n" | 52 toWrite += '>' + str(seq_id[i]) + " " + seq_string[i] + "\n" |
| 54 chunks = re.findall(r'\d+', seq_string[i]) | 53 chunks = re.findall(r'\d+', seq_string[i]) |
| 55 | 54 |
