# HG changeset patch
# User artbio
# Date 1578262056 0
# Node ID c86ed39b72eb8e4bb7e7d311aece3b0cc2da32ce
# Parent 6aba6cca3fab207cee2c218d255cf4133fbf362c
"planemo upload for repository https://github.com/artbio/tools-artbio/tree/master/tools/rsem commit 10719ebe96c608be124360a4be62ec4d164412b3"
diff -r 6aba6cca3fab -r c86ed39b72eb extract_transcript_to_gene_map_from_trinity.xml
--- a/extract_transcript_to_gene_map_from_trinity.xml Tue Apr 03 18:27:15 2018 -0400
+++ b/extract_transcript_to_gene_map_from_trinity.xml Sun Jan 05 22:07:36 2020 +0000
@@ -3,9 +3,7 @@
macros.xml
-
- rsem
-
+
diff -r 6aba6cca3fab -r c86ed39b72eb macros.xml
--- a/macros.xml Tue Apr 03 18:27:15 2018 -0400
+++ b/macros.xml Sun Jan 05 22:07:36 2020 +0000
@@ -1,6 +1,13 @@
- 0.5.4
+ 1.0.0
+
+
+ rsem
+ bowtie
+ bowtie2
+
+
diff -r 6aba6cca3fab -r c86ed39b72eb purge_gtf_from_multichrom_genes.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/purge_gtf_from_multichrom_genes.py Sun Jan 05 22:07:36 2020 +0000
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+import argparse
+from collections import defaultdict
+
+
+def command_parse():
+ parser = argparse.ArgumentParser(description='Purge GTF file from genes \
+ that are on several chromosomes and list them in a log file')
+ parser.add_argument(
+ '-i', '--input', dest='input', help='input GTF file', required=True)
+ parser.add_argument('-o', '--output', dest='output', help='output file \
+ name', default='output.gtf')
+ parser.add_argument('-l', '--log', dest='log', help='log of purged \
+ genes', default='purged_genes.log')
+ args = parser.parse_args()
+ return args
+
+
+def get_genes(gtf_file):
+ genes = defaultdict(list)
+ with open(gtf_file, 'r') as fh:
+ for line in fh:
+ if line[0] != '#':
+ fields = line[:-1].split("\t")
+ chrom = fields[0]
+ name_gene = fields[-1].split('gene_id "')[-1].split('"; \
+ transcript_id')[0]
+ genes[name_gene].append(chrom)
+ return genes
+
+
+def generate_output(genes, log_file):
+ '''
+ Search for all genes that are present on several chromosomes. This function
+ return a list of these genes in target_genes. It also generate a log tab
+ delimited file with one gene per line and with its list of chromosomes
+ (coma delimited)
+ '''
+ output = open(log_file, 'w')
+ # output.write('#all genes on several chromosomes' + '\n')
+ target_genes = list()
+ for name_gene in genes.keys():
+ genes[name_gene] = set(genes[name_gene])
+ if len(genes[name_gene]) > 1:
+ target_genes.append(name_gene)
+ new_line = '\t'.join([name_gene, ','.join(genes[name_gene])])
+ output.write("%s\n" % new_line)
+ output.close()
+ return target_genes
+
+
+def purge_gtf(target_genes, gtf_file, output_file):
+ '''
+ Remove all lines of the gtf file where the gene_id is gene of target_genes
+ list.
+ '''
+ output_gtf = open(output_file, 'w')
+ with open(gtf_file, 'r') as gtf_handler:
+ for line in gtf_handler:
+ fields = line[:-1].split("\t")
+ gene_name = fields[-1].split('gene_id "')[-1].split('"; \
+ transcript_id')[0]
+ if gene_name not in target_genes:
+ output_gtf.write(line)
+ output_gtf.close()
+
+
+def __main__():
+ args = command_parse()
+ genes = get_genes(args.input)
+ target_genes = generate_output(genes, args.log)
+ purge_gtf(target_genes, args.input, args.output)
+
+
+if __name__ == "__main__":
+ __main__()
diff -r 6aba6cca3fab -r c86ed39b72eb purge_gtf_from_multichrom_genes.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/purge_gtf_from_multichrom_genes.xml Sun Jan 05 22:07:36 2020 +0000
@@ -0,0 +1,47 @@
+
+ from multi-chromosomes genes
+
+ macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+This script removes genes with multichromosome locations from gtf file. This avoid rsem error
+due to ambiguous feature coordinates on multiple chromosomes.
+
+
+
+
+ 10.1186/1471-2105-12-323
+
+
+
diff -r 6aba6cca3fab -r c86ed39b72eb rsem-bwt2.xml
--- a/rsem-bwt2.xml Tue Apr 03 18:27:15 2018 -0400
+++ b/rsem-bwt2.xml Sun Jan 05 22:07:36 2020 +0000
@@ -3,10 +3,7 @@
macros.xml
-
- rsem
- bowtie2
-
+
@@ -24,8 +21,6 @@
#if $job.polya.polya_length:
--polyA-length $job.polya.polya_length
#end if
- #elif $job.polya.polya_use == 'none':
- --no-polyA
#end if
$job.ntog
#if $job.transcript_to_gene_map:
@@ -58,7 +53,7 @@
#if $run_rsem.input.fastq.fastq1.is_of_type('fastq.gz') or $run_rsem.input.fastq.fastq1.is_of_type('fastqsanger.gz'):
gunzip < '$run_rsem.input.fastq.fastq1' > uncomp_pair1.fastq &&
gunzip < '$run_rsem.input.fastq.fastq2' > uncomp_pair2.fastq &&
- #elif $run_rsem.input.fastq.singlefastq.is_of_type('fastq') or $run_rsem.input.fastq.singlefastq.is_of_type('fastqsanger'):
+ #elif $run_rsem.input.fastq.fastq1.is_of_type('fastq') or $run_rsem.input.fastq.fastq1.is_of_type('fastqsanger'):
ln -f -s '$run_rsem.input.fastq.fastq1' 'uncomp_pair1.fastq' &&
ln -f -s '$run_rsem.input.fastq.fastq2' 'uncomp_pair2.fastq' &&
#end if
@@ -163,7 +158,7 @@
-
+
@@ -228,7 +223,7 @@
-
+
@@ -326,23 +321,23 @@
-
-
-
+
+
+
-
+
-
+
-
-
+
+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
-
-
+
+