Mercurial > repos > dfornika > data_manager_mash_sketch_builder
changeset 17:9e119de8edf4 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mash_sketch_builder/ commit 3a2663e8a2d8b056e041c3c9e550d8e522f44583-dirty"
author | dfornika |
---|---|
date | Mon, 24 Feb 2020 21:11:08 +0000 |
parents | c842c12e81d9 |
children | e081c3731725 |
files | data_manager/mash_sketch_builder.py data_manager/mash_sketch_builder.xml |
diffstat | 2 files changed, 16 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/mash_sketch_builder.py Mon Feb 24 19:17:36 2020 +0000 +++ b/data_manager/mash_sketch_builder.py Mon Feb 24 21:11:08 2020 +0000 @@ -27,6 +27,9 @@ '-p', str(mash_sketch_args["threads"]), str(mash_sketch_args["fasta"]), ] + + if mash_sketch_args["individual_sequences"]: + args = args + ["-i"] subprocess.check_call(['mash', 'sketch'] + args, cwd=target_directory) @@ -50,6 +53,7 @@ parser.add_argument('data_manager_json') parser.add_argument('--kmer-size', dest='kmer_size', type=int, default=35, help='kmer length') parser.add_argument('--sketch-size', dest='sketch_size', type=int, default=31, help='minimizer length') + parser.add_argument('--individual-sequences', dest='individual_sequences', type=boolean, action='store_true' help='Sketch individual sequences (for multi-fasta files)') parser.add_argument('--fasta', dest='fasta', help='Fasta file to sketch') parser.add_argument('--threads', dest='threads', default=1, help='threads') parser.add_argument('--sketch-name', dest='sketch_name', help='Name for sketch') @@ -59,8 +63,6 @@ target_directory = data_manager_input['output_data'][0]['extra_files_path'] - pprint("***" + target_directory + "***" ) - try: os.mkdir( target_directory ) except OSError as exc: @@ -75,6 +77,7 @@ "kmer_size": args.kmer_size, "sketch_size": args.sketch_size, "fasta": args.fasta, + "individual_sequences": args.individual_sequences, "threads": args.threads, }
--- a/data_manager/mash_sketch_builder.xml Mon Feb 24 19:17:36 2020 +0000 +++ b/data_manager/mash_sketch_builder.xml Mon Feb 24 21:11:08 2020 +0000 @@ -18,6 +18,7 @@ --sketch-name '${sketch_name}' --sketch-size '${sketch_size}' --kmer-size '${kmer_size}' + ${individual_sequences} ]]> </command> @@ -40,6 +41,7 @@ <param type="text" name="sketch_name" label="Sketch name" /> <param type="integer" name="sketch_size" value="1000" min="10" max="1000000" label="Sketch size" /> <param type="integer" name="kmer_size" value="21" min="1" max="32" label="kmer size" /> + <param type="boolean" name="individual_sequences" truevalue="--individual-sequences" falsevalue="" label="Sketch individual sequences"/> </inputs> <outputs> <data name="out_file" format="data_manager_json" /> @@ -51,8 +53,15 @@ <output name="out_file" value="mash_sketch_data_manager.json" /> </test> </tests> - <help> - </help> + <help><![CDATA[ + +**What it does** + + Create a sketch file, which is a reduced representation of a sequence or set + of sequences (based on min-hashes) that can be used for fast distance + estimations. For output, one sketch file will be generated, but it can have + multiple sketches within it, divided by sequences or files. + ]]></help> <citations> <citation type="doi">10.1186/s13059-016-0997-x</citation> </citations>