# HG changeset patch # User fubar # Date 1370584253 14400 # Node ID b227f5edbe529d93119c5892d5a583c25a987b7a # Parent 76297e1ef552491c0e0d43224f4c5bd9dddf41b3 Uploaded diff -r 76297e1ef552 -r b227f5edbe52 htseq_bams_to_count_matrix/generatetest.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_bams_to_count_matrix/generatetest.sh Fri Jun 07 01:50:53 2013 -0400 @@ -0,0 +1,1 @@ +python ../htseqsams2mx.py -g rn4_chr20_100k.gtf -o test.xls --samf "'rn4chr20test1.bam','col1'" --samf "'rn4chr20test2.bam','col2'" diff -r 76297e1ef552 -r b227f5edbe52 htseq_bams_to_count_matrix/htseqsams2mx.py --- a/htseq_bams_to_count_matrix/htseqsams2mx.py Thu Jun 06 22:14:16 2013 -0400 +++ b/htseq_bams_to_count_matrix/htseqsams2mx.py Fri Jun 07 01:50:53 2013 -0400 @@ -60,52 +60,17 @@ self.msg = msg -def keynat(s=None): - ''' - borrowed from http://code.activestate.com/recipes/285264-natural-string-sorting/ - A natural sort helper function for sort() and sorted() - without using regular expressions or exceptions. - >>> items = ('Z', 'a', '10th', '1st', '9') sorted(items) - ['10th', '1st', '9', 'Z', 'a'] - >>> sorted(items, key=keynat) - ['1st', '9', '10th', 'a', 'Z'] - ''' - if type(s) == type([]) or type(s) == type(()) : - s = s[0] - it = type(1) - r = [] - for c in s: - if c.isdigit(): - d = int(c) - if r and type( r[-1] ) == it: - r[-1] = r[-1] * 10 + d - else: - r.append(d) - else: - r.append(c.lower()) - return r - - -def sort_table(table, cols): - """ sort a table by multiple columns - table: a list of lists (or tuple of tuples) where each inner list - represents a row - cols: a list (or tuple) specifying the column numbers to sort by - e.g. (1,0) would sort by column 1, then by column 0 +def htseqMX(gff_filename,sam_filenames,colnames,opts): """ - for col in reversed(cols): - table = sorted(table, key=operator.itemgetter(col)) - return table - - - -def htseqMX(gff_filename,sam_filenames,colnames,opts): - + Code taken from count.py in Simon Anders HTSeq distribution + Wrapped in a loop to accept multiple bam/sam files and their names from galaxy to + produce a matrix of contig counts by sample for downstream use in edgeR and DESeq tools + """ class UnknownChrom( Exception ): pass def my_showwarning( message, category, filename, lineno = None, line = None ): - sys.stderr.write( "Warning: %s\n" % message ) + sys.stdout.write( "Warning: %s\n" % message ) def invert_strand( iv ): iv2 = iv.copy() @@ -156,12 +121,19 @@ sys.stdout.write( "Warning: No features of type '%s' found.\n" % opts.feature_type ) for sami,sam_filename in enumerate(sam_filenames): colname = colnames[sami] + isbam = sam_filename.endswith('.bam') try: - read_seq = HTSeq.SAM_Reader( sam_filename ) + if isbam: + read_seq = HTSeq.BAM_Reader( sam_filename ) + else: + read_seq = HTSeq.SAM_Reader( sam_filename ) first_read = iter(read_seq).next() pe_mode = first_read.paired_end except: - sys.stderr.write( "Error occured when reading first line of sam file %s\n" % sam_filename ) + if isbam: + sys.stderr.write( "Error occured when reading first line of bam file %s colname=%s \n" % (sam_filename,colname) ) + else: + sys.stderr.write( "Error occured when reading first line of sam file %s colname=%s \n" % (sam_filename,colname )) raise try: @@ -262,7 +234,7 @@ raise if not opts.quiet: - sys.stdout.write( "%d sam %s processed.\n" % ( sami, "lines " if not pe_mode else "line pairs" ) ) + sys.stdout.write( "%d sam %s processed.\n" % ( seqi, "lines " if not pe_mode else "line pairs" ) ) return counts,empty,ambiguous,lowqual,notaligned,nonunique warnings.showwarning = my_showwarning @@ -280,7 +252,7 @@ def usage(): - print >> sys.stderr, """Usage: python htseqsams2mx.py -w -g -o [-i] [-c] --samf "," --samf "..." """ + print >> sys.stdout, """Usage: python htseqsams2mx.py -w -g -o [-i] [-c] --samf "," --samf "..." """ sys.exit(1) if __name__ == "__main__": @@ -345,15 +317,23 @@ contigs = counts.keys() contigs.sort() totalc = 0 + emptycontigs = 0 for contig in contigs: - totalc += sum(counts[contig]) - crow = [contig,] + ['%d' % x for x in counts[contig]] - res.append('\t'.join(crow)) + thisc = sum(counts[contig]) + if thisc > 0: # no output for empty contigs + totalc += thisc + crow = [contig,] + ['%d' % x for x in counts[contig]] + res.append('\t'.join(crow)) + else: + emptycontigs += 1 outf = open(opts.outfname,'w') outf.write('\n'.join(res)) outf.write('\n') outf.close() walltime = int(time.time() - starttime) - accumulatornames = ('walltimeseconds','contigs','emptyread','ambiguous','lowqual','notaligned','nonunique') - notes = ['%s=%d' % (accumulatornames[i],x) for i,x in enumerate((len(contigs),empty,ambiguous,lowqual,notaligned,nonunique))] + accumulatornames = ('walltimeseconds','totreadscounted','ncontigs','emptyreads','ambiguousreads','lowqualreads', + 'notalignedreads','nonuniquereads','emptycontigs') + accums = (walltime,totalc,len(contigs),empty,ambiguous,lowqual,notaligned,nonunique,emptycontigs) + notes = ['%s=%d' % (accumulatornames[i],x) for i,x in enumerate(accums)] print >> sys.stdout, ','.join(notes) + sys.exit(0) diff -r 76297e1ef552 -r b227f5edbe52 htseq_bams_to_count_matrix/htseqsams2mx.xml --- a/htseq_bams_to_count_matrix/htseqsams2mx.xml Thu Jun 06 22:14:16 2013 -0400 +++ b/htseq_bams_to_count_matrix/htseqsams2mx.xml Fri Jun 07 01:50:53 2013 -0400 @@ -1,26 +1,39 @@ - - for DGE - + + using HTSeq code + + + + htseq freetype numpy matplotlib - - + + htseqsams2mx.py -g "$gfffile" -o "$outfile" -m "$model" --id_attribute "$id_attr" --feature_type "$feature_type" --samf "'$firstsamf','${firstsamf.name}'" + #if secondsamfile: + --samf "'$secondsamf','${secondsamf.name}'" + #end if + #if thirdsamfile: + --samf "'$thirdsamf','${thirdsamf.name}'" + #end if + #if fourthsamfile: + --samf "'$fourthsamf','${fourthsamf.name}'" + #end if #for $s in $samfiles: --samf "'${s.samf}','${s.samf.name}'" #end for - + - + @@ -28,65 +41,98 @@ + help="If in doubt, use gene name or if you need the id in your GTF, gene id"> + help="GTF feature type to count over - exon is a good choice with gene name as the contig to count over"> - - - + + + + + + + + + + + + + + + + + + + + + -**Warning** - -This code will count reads overlapping contigs supplied in the gff file. - - -**Note** - -htseqsams2mx is an experimental tool currently under test - -There is much discussion about whether to count optical/pcr duplicates. If you set the ignore flag to True, any reads in the input BAM files marked as -duplicates by upstream processing such as the Picard MarkDuplicates tool will NOT be counted. The 'right' setting depends on your data and coverage. For extremely deep -coverage, true duplicate reads are inevitable and ignoring them may be throwing away useful real data. In most cases, counting them is probably a reasonable -choice - any induced bias is likely to be non-differential between samples, whereas it's not at all clear whether that's the case if they are ignored. - ----- - **What this tool does** -Counts reads in multiple sample aligned sam format files using HTSeq counting over a gene model supplied as a GFF file - -The output is a tabular file containing the count matrix and suitable for downstream processing. +Counts reads in multiple sam/bam format mapped files and generates a matrix ideal for edgeR and other count based tools +It uses HTSeq to count your sam reads over a gene model supplied as a GTF file +The output is a tabular text (columnar - spreadsheet) file containing the +count matrix for downstream processing. Each row contains the counts from each sample for each +of the non-emtpy GTF input file contigs matching the GTF attribute choice above. +You probably want to use gene level GTF output attribute and count reads that overlap +GTF exons for RNA-seq. Or you can count over exons by using transcript level output names or ids. Etc. ---- +**Tool author's plea on the importance of replicates** + +If you want the downstream p values to inform you about your data in terms of rejecting or accepting the null hypothesis +under random sampling from the universe of possible biological/experimental replicates from which your data was drawn, +which is what published p values are often assumed to do, then you need biological +(or for cell culture material experimental) replicates. + +Using technical or no replicates means the downstream p values are not interpretable the way most people would assume +they are - ie as the probability of obtaining a result as or more extreme as your experimental data +in millions of experiments conducted under the null hypothesis. + +There is no way around this and it is scientific fraud to ignore this issue and publish bogus p values derived from +technical or no replicates without making the lack of biological or experimental error in the p value calculations +clear to your readers. + +See your stats 101 notes on the central limit theorem and test statistics for a refresher or talk to a +statistician if this makes no sense please. + **Attribution** +This Galaxy tool relies on HTSeq_ from http://www-huber.embl.de/users/anders/HTSeq/doc/index.html +for the tricky work of counting. That code includes the following attribution: -This Galaxy wrapper was written for a revised version by Ross Lazarus and is licensed under the LGPL_ like other rgenetics artefacts +## Written by Simon Anders (sanders@fs.tum.de), European Molecular Biology +## Laboratory (EMBL). (c) 2010. Released under the terms of the GNU General +## Public License v3. Part of the 'HTSeq' framework, version HTSeq-0.5.4p3 + +It will be automatically installed if you use the toolshed as in general, you probably should. +HTSeq_ must be installed with this tool if you install manually. + +Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is +licensed to you under the LGPL_ like other rgenetics artefacts .. _LGPL: http://www.gnu.org/copyleft/lesser.html - +.. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html - - - - - diff -r 76297e1ef552 -r b227f5edbe52 htseq_bams_to_count_matrix/test-data/htseqsams2mx_test1_out.xls --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_bams_to_count_matrix/test-data/htseqsams2mx_test1_out.xls Fri Jun 07 01:50:53 2013 -0400 @@ -0,0 +1,4 @@ +Contig col1 col2 +Clic2 494 944 +F1M7K0_RAT 3 2 +Tmlhe 164 172 diff -r 76297e1ef552 -r b227f5edbe52 htseq_bams_to_count_matrix/test-data/rn4_chr20_100k.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_bams_to_count_matrix/test-data/rn4_chr20_100k.gtf Fri Jun 07 01:50:53 2013 -0400 @@ -0,0 +1,62 @@ +chr20 protein_coding CDS 801 1238 . + 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; protein_id "ENSRNOP00000000957"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding exon 801 1238 . + . exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding CDS 1742 1976 . + 0 exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; protein_id "ENSRNOP00000000957"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding exon 1742 1976 . + . exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding CDS 2016 2177 . + 2 exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; protein_id "ENSRNOP00000000957"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding exon 2016 2177 . + . exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding CDS 2263 2342 . + 2 exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; protein_id "ENSRNOP00000000957"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding exon 2263 2342 . + . exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding CDS 2345 2533 . + 0 exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; protein_id "ENSRNOP00000000957"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding exon 2345 2533 . + . exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding CDS 19528 19708 . + 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 19528 19708 . + 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; protein_id "ENSRNOP00000000956"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 19528 19708 . + . exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 19528 19708 . + . exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding start_codon 19528 19530 . + 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding start_codon 19528 19530 . + 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding CDS 21979 22014 . + 2 exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 21979 22014 . + . exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 25349 25525 . + 2 exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 25349 25525 . + 2 exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; protein_id "ENSRNOP00000000956"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 25349 25525 . + . exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 25349 25525 . + . exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding CDS 35197 35476 . + 2 exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 35197 35476 . + 2 exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; protein_id "ENSRNOP00000000956"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 35197 35476 . + . exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 35197 35476 . + . exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding CDS 36764 36883 . + 1 exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 36764 36883 . + 1 exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; protein_id "ENSRNOP00000000956"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 36764 36883 . + . exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 36764 36883 . + . exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding CDS 49040 49276 . + 1 exon_number "6"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 49040 49276 . + 1 exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; protein_id "ENSRNOP00000000956"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 49040 49276 . + . exon_number "6"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 49040 49276 . + . exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding CDS 55193 55331 . + 1 exon_number "7"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 55193 55331 . + 1 exon_number "6"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; protein_id "ENSRNOP00000000956"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 55193 55331 . + . exon_number "7"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 55193 55331 . + . exon_number "6"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding CDS 55883 56011 . + 0 exon_number "8"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 55883 56011 . + 0 exon_number "7"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; protein_id "ENSRNOP00000000956"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 55883 56124 . + . exon_number "8"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 55883 56124 . + . exon_number "7"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding stop_codon 56012 56014 . + 0 exon_number "8"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding stop_codon 56012 56014 . + 0 exon_number "7"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 66518 66785 . + . exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding CDS 66729 66785 . + 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; protein_id "ENSRNOP00000000955"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding start_codon 66729 66731 . + 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding CDS 75931 76040 . + 0 exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; protein_id "ENSRNOP00000000955"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding exon 75931 76040 . + . exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding CDS 76165 76290 . + 1 exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; protein_id "ENSRNOP00000000955"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding exon 76165 76290 . + . exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding CDS 79941 80047 . + 1 exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; protein_id "ENSRNOP00000000955"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding exon 79941 80047 . + . exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding CDS 80692 80873 . + 2 exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; protein_id "ENSRNOP00000000955"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding exon 80692 80873 . + . exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding CDS 81142 81294 . + 0 exon_number "6"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; protein_id "ENSRNOP00000000955"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding exon 81142 81536 . + . exon_number "6"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding stop_codon 81295 81297 . + 0 exon_number "6"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding exon 92810 93748 . - . exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000029622"; gene_name "Olr1668"; p_id "P5423"; transcript_id "ENSRNOT00000047483"; transcript_name "Olr1668"; tss_id "TSS17091"; +chr20 protein_coding stop_codon 92810 92812 . - 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000029622"; gene_name "Olr1668"; p_id "P5423"; transcript_id "ENSRNOT00000047483"; transcript_name "Olr1668"; tss_id "TSS17091"; +chr20 protein_coding CDS 92813 93748 . - 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000029622"; gene_name "Olr1668"; p_id "P5423"; protein_id "ENSRNOP00000042115"; transcript_id "ENSRNOT00000047483"; transcript_name "Olr1668"; tss_id "TSS17091"; +chr20 protein_coding start_codon 93746 93748 . - 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000029622"; gene_name "Olr1668"; p_id "P5423"; transcript_id "ENSRNOT00000047483"; transcript_name "Olr1668"; tss_id "TSS17091"; diff -r 76297e1ef552 -r b227f5edbe52 htseq_bams_to_count_matrix/test-data/rn4chr20test1.bam Binary file htseq_bams_to_count_matrix/test-data/rn4chr20test1.bam has changed diff -r 76297e1ef552 -r b227f5edbe52 htseq_bams_to_count_matrix/test-data/rn4chr20test2.bam Binary file htseq_bams_to_count_matrix/test-data/rn4chr20test2.bam has changed diff -r 76297e1ef552 -r b227f5edbe52 htseq_bams_to_count_matrix/tool_dependencies.xml.notworkingcomplex --- a/htseq_bams_to_count_matrix/tool_dependencies.xml.notworkingcomplex Thu Jun 06 22:14:16 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ - - - - - -