Mercurial > repos > galaxyp > percolator
annotate nested_collection.py @ 6:653d724a6b99 draft default tip
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit de8cdf895c3c6113f301a119788701b2465a1b1b"
| author | galaxyp | 
|---|---|
| date | Thu, 13 Aug 2020 07:53:09 +0000 | 
| parents | 022f2c8d3274 | 
| children | 
| rev | line source | 
|---|---|
| 1 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 1 import argparse | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 2 import os | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 3 import re | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 4 from collections import OrderedDict | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 5 | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 6 | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 7 def get_filename_index_with_identifier(realnames, pool_id): | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 8 pool_indices = [] | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 9 for index, fn in enumerate(realnames): | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 10 if re.search(pool_id, fn) is not None: | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 11 pool_indices.append(index) | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 12 return pool_indices | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 13 | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 14 | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 15 def get_batches_of_galaxyfiles(realnames, batchsize, pool_ids): | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 16 """For an amount of input files, pool identifiers and a batch size, | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 17 return batches of files for a list of lists""" | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 18 if pool_ids: | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 19 filegroups = OrderedDict([(p_id, get_filename_index_with_identifier( | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 20 realnames, p_id)) for p_id in pool_ids]) | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 21 else: | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 22 filegroups = {1: range(len(realnames))} | 
| 4 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 23 batch, in_pool_indices = [], [] | 
| 1 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 24 for pool_id, grouped_indices in filegroups.items(): | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 25 if pool_id == 1: | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 26 pool_id = 'pool0' | 
| 4 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 27 for in_pool_index, total_index in enumerate(grouped_indices): | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 28 batch.append(total_index) | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 29 in_pool_indices.append(in_pool_index) | 
| 1 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 30 if batchsize and len(batch) == int(batchsize): | 
| 4 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 31 yield pool_id, batch, in_pool_indices | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 32 batch, in_pool_indices = [], [] | 
| 1 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 33 if len(batch) > 0: | 
| 4 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 34 yield pool_id, batch, in_pool_indices | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 35 batch, in_pool_indices = [], [] | 
| 1 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 36 | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 37 | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 38 def main(): | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 39 parser = argparse.ArgumentParser() | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 40 parser.add_argument('--batchsize', dest='batchsize', default=False) | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 41 parser.add_argument('--real-names', dest='realnames', nargs='+') | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 42 parser.add_argument('--galaxy-files', dest='galaxyfiles', nargs='+') | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 43 parser.add_argument('--pool-ids', dest='poolids', nargs='+', default=False) | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 44 args = parser.parse_args() | 
| 4 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 45 batches = [x for x in get_batches_of_galaxyfiles(args.realnames, args.batchsize, args.poolids)] | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 46 batchdigits = len(str(len(batches))) | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 47 if args.poolids: | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 48 pooldigits = {pid: [] for pid in args.poolids} | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 49 for batchdata in batches: | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 50 pooldigits[batchdata[0]].append(len(batchdata[1])) | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 51 pooldigits = {pid: len(str(sum(batchlengths))) for pid, batchlengths in pooldigits.items()} | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 52 else: | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 53 pooldigits = {'pool0': len(str(len(args.galaxyfiles)))} | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 54 for batchcount, (pool_id, batch, in_pool_indices) in enumerate(batches): | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 55 for fnindex, in_pool_index in zip(batch, in_pool_indices): | 
| 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 56 dsetname = '{pid}_batch{bi:0{bd}d}___inputfn{fi:0{pd}d}_{real}.data'.format(pid=pool_id, bi=batchcount, bd=batchdigits, fi=in_pool_index, pd=pooldigits[pool_id], real=args.realnames[fnindex]) | 
| 1 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 57 print('producing', dsetname) | 
| 4 
022f2c8d3274
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 9db2c1bb610ff3a6940f0a037c0fccf337692c36
 galaxyp parents: 
3diff
changeset | 58 os.symlink(args.galaxyfiles[fnindex], dsetname) | 
| 1 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 59 | 
| 6 
653d724a6b99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit de8cdf895c3c6113f301a119788701b2465a1b1b"
 galaxyp parents: 
4diff
changeset | 60 | 
| 1 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 61 if __name__ == '__main__': | 
| 
23141085ca9e
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit 0a5f9eb82877545be1c924357e585b17e01cfd1c
 galaxyp parents: diff
changeset | 62 main() | 
