Mercurial > repos > greg > assembly_post_processor
changeset 100:e0db1f719af1 draft
Uploaded
author | greg |
---|---|
date | Mon, 30 Oct 2017 08:51:22 -0400 |
parents | 01ef9d05d455 |
children | 7ca8ab9a46e9 |
files | assembly_post_processor.py test-data/output.pttgf test-data/output_targeted_gene_families_stats.tabular test-data/tool-data/plant_tribes/scaffolds/README.txt |
diffstat | 4 files changed, 10 insertions(+), 72 deletions(-) [+] |
line wrap: on
line diff
--- a/assembly_post_processor.py Fri Oct 27 13:23:10 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,66 +0,0 @@ -#!/usr/bin/env python -import argparse -import os -import shutil - -import utils - -OUTPUT_DIR = 'assemblyPostProcessing_dir' - -parser = argparse.ArgumentParser() -parser.add_argument('--dereplicate', dest='dereplicate', default=None, help='Remove duplicate sequences') -parser.add_argument('--gap_trimming', dest='gap_trimming', type=float, default=0, help='Trim alignments') -parser.add_argument('--gene_family_search', dest='gene_family_search', default=None, help='Targeted gene families') -parser.add_argument('--method', dest='method', default=None, help='Protein clustering method') -parser.add_argument('--min_length', dest='min_length', type=int, default=0, help='Minimum sequence length') -parser.add_argument('--num_threads', dest='num_threads', type=int, help='Number of processors') -parser.add_argument('--output_cds', dest='output_cds', help='Output transcripts.cds') -parser.add_argument('--output_cleaned_cds', dest='output_cleaned_cds', help='Output transcripts.cleaned.cds') -parser.add_argument('--output_cleaned_nr_cds', dest='output_cleaned_nr_cds', default=None, help='Output transcripts.cleaned.nr.cds') -parser.add_argument('--output_cleaned_nr_pep', dest='output_cleaned_nr_pep', default=None, help='Output transcripts.cleaned.nr.pep') -parser.add_argument('--output_cleaned_pep', dest='output_cleaned_pep', help='Output transcripts.cleaned.pep') -parser.add_argument('--output_pep', dest='output_pep', help='Output transcripts.pep') -parser.add_argument('--output_targeted_gene_families_stats', dest='output_targeted_gene_families_stats', default=None, help='Targeted gene families statistics') -parser.add_argument('--prediction_method', dest='prediction_method', help='Coding regions prediction method') -parser.add_argument('--scaffold', dest='scaffold', default=None, help='Gene family scaffold') -parser.add_argument('--score_matrices', dest='score_matrices', default=None, help='Scores matrices') -parser.add_argument('--strand_specific', dest='strand_specific', default=None, help='Strand-specific assembly') -parser.add_argument('--transcripts', dest='transcripts', help='Transcriptome assembly fasta file') - -args = parser.parse_args() - -# Build the command line. -cmd = 'AssemblyPostProcessor' -if args.dereplicate is not None: - cmd += ' --dereplicate' -if args.gap_trimming > 0: - cmd += ' --gap_trimming %4f' % args.gap_trimming -if args.gene_family_search is not None: - cmd += ' --gene_family_search %s' % args.gene_family_search -if args.method is not None: - cmd += ' --method %s' % args.method -if args.min_length > 0: - cmd += ' --min_length %d' % args.min_length -cmd += ' --num_threads %d' % args.num_threads -cmd += ' --prediction_method %s' % args.prediction_method -if args.scaffold is not None: - cmd += ' --scaffold %s' % args.scaffold -if args.score_matrices is not None: - cmd += ' --score_matrices %s' % args.score_matrices -if args.strand_specific is not None: - cmd += ' --strand_specific' -cmd += ' --transcripts %s' % args.transcripts -# Run the command. -utils.run_command(cmd) - -# Handle outputs. -shutil.move(os.path.join(OUTPUT_DIR, 'transcripts.cds'), args.output_cds) -shutil.move(os.path.join(OUTPUT_DIR, 'transcripts.cleaned.cds'), args.output_cleaned_cds) -if args.output_cleaned_nr_cds is not None: - shutil.move(os.path.join(OUTPUT_DIR, 'transcripts.cleaned.nr.cds'), args.output_cleaned_nr_cds) -if args.output_cleaned_nr_pep is not None: - shutil.move(os.path.join(OUTPUT_DIR, 'transcripts.cleaned.nr.pep'), args.output_cleaned_nr_pep) -shutil.move(os.path.join(OUTPUT_DIR, 'transcripts.cleaned.pep'), args.output_cleaned_pep) -shutil.move(os.path.join(OUTPUT_DIR, 'transcripts.pep'), args.output_pep) -if args.output_targeted_gene_families_stats is not None: - shutil.move(os.path.join(OUTPUT_DIR, 'targeted_gene_family_assemblies.stats'), args.output_targeted_gene_families_stats)
--- a/test-data/output.pttgf Fri Oct 27 13:23:10 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<html><head><h3>Targeted gene families: 1 items</h3></head> -<body><p/><table cellpadding="2"> -<tr><b>Directories</th></b> -<tr bgcolor="#D8D8D8"><td><a href="752" type="text/plain">752</a> -</td></tr> -</table></body></html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_targeted_gene_families_stats.tabular Mon Oct 30 08:51:22 2017 -0400 @@ -0,0 +1,7 @@ +# seq_id = sequence identity +# cov = sequence coverage in the orthogroup trimmed protein alignment +# avg_cov = mean coverage of orthogroup backbone sequences in the trimmed protein alignment +# sd_cov = standard deviation (for coverage mean) of orthogroup backbone sequences in the trimmed protein alignment +# len = total-length of conserved regions of sequence in the orthogroup trimmed protein alignment +# avg_len = mean total-lengths of conserved regions of backbone sequences in the trimmed orthogroup protein alignment +# sd_len = standard deviation (for total-lengths mean) of conserved regions of backbone sequences in the trimmed orthogroup protein alignment
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tool-data/plant_tribes/scaffolds/README.txt Mon Oct 30 08:51:22 2017 -0400 @@ -0,0 +1,3 @@ +For functional tests to work, this directory must contain symlinks to the scaffolds data +installed into the Galaxy instance to which planemo points via the --galaxy_root parameter. +This would typically be something like ~/galaxy/tool-data/plant_tribes/scaffolds/22Gv1.1.