Mercurial > repos > fubar > htseq_bams_to_count_matrix
changeset 14:b227f5edbe52 draft
Uploaded
author | fubar |
---|---|
date | Fri, 07 Jun 2013 01:50:53 -0400 |
parents | 76297e1ef552 |
children | 9b2f58a260e9 |
files | htseq_bams_to_count_matrix/generatetest.sh htseq_bams_to_count_matrix/htseqsams2mx.py htseq_bams_to_count_matrix/htseqsams2mx.xml htseq_bams_to_count_matrix/test-data/htseqsams2mx_test1_out.xls htseq_bams_to_count_matrix/test-data/rn4_chr20_100k.gtf htseq_bams_to_count_matrix/test-data/rn4chr20test1.bam htseq_bams_to_count_matrix/test-data/rn4chr20test2.bam htseq_bams_to_count_matrix/tool_dependencies.xml.notworkingcomplex |
diffstat | 8 files changed, 182 insertions(+), 95 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_bams_to_count_matrix/generatetest.sh Fri Jun 07 01:50:53 2013 -0400 @@ -0,0 +1,1 @@ +python ../htseqsams2mx.py -g rn4_chr20_100k.gtf -o test.xls --samf "'rn4chr20test1.bam','col1'" --samf "'rn4chr20test2.bam','col2'"
--- a/htseq_bams_to_count_matrix/htseqsams2mx.py Thu Jun 06 22:14:16 2013 -0400 +++ b/htseq_bams_to_count_matrix/htseqsams2mx.py Fri Jun 07 01:50:53 2013 -0400 @@ -60,52 +60,17 @@ self.msg = msg -def keynat(s=None): - ''' - borrowed from http://code.activestate.com/recipes/285264-natural-string-sorting/ - A natural sort helper function for sort() and sorted() - without using regular expressions or exceptions. - >>> items = ('Z', 'a', '10th', '1st', '9') sorted(items) - ['10th', '1st', '9', 'Z', 'a'] - >>> sorted(items, key=keynat) - ['1st', '9', '10th', 'a', 'Z'] - ''' - if type(s) == type([]) or type(s) == type(()) : - s = s[0] - it = type(1) - r = [] - for c in s: - if c.isdigit(): - d = int(c) - if r and type( r[-1] ) == it: - r[-1] = r[-1] * 10 + d - else: - r.append(d) - else: - r.append(c.lower()) - return r - - -def sort_table(table, cols): - """ sort a table by multiple columns - table: a list of lists (or tuple of tuples) where each inner list - represents a row - cols: a list (or tuple) specifying the column numbers to sort by - e.g. (1,0) would sort by column 1, then by column 0 +def htseqMX(gff_filename,sam_filenames,colnames,opts): """ - for col in reversed(cols): - table = sorted(table, key=operator.itemgetter(col)) - return table - - - -def htseqMX(gff_filename,sam_filenames,colnames,opts): - + Code taken from count.py in Simon Anders HTSeq distribution + Wrapped in a loop to accept multiple bam/sam files and their names from galaxy to + produce a matrix of contig counts by sample for downstream use in edgeR and DESeq tools + """ class UnknownChrom( Exception ): pass def my_showwarning( message, category, filename, lineno = None, line = None ): - sys.stderr.write( "Warning: %s\n" % message ) + sys.stdout.write( "Warning: %s\n" % message ) def invert_strand( iv ): iv2 = iv.copy() @@ -156,12 +121,19 @@ sys.stdout.write( "Warning: No features of type '%s' found.\n" % opts.feature_type ) for sami,sam_filename in enumerate(sam_filenames): colname = colnames[sami] + isbam = sam_filename.endswith('.bam') try: - read_seq = HTSeq.SAM_Reader( sam_filename ) + if isbam: + read_seq = HTSeq.BAM_Reader( sam_filename ) + else: + read_seq = HTSeq.SAM_Reader( sam_filename ) first_read = iter(read_seq).next() pe_mode = first_read.paired_end except: - sys.stderr.write( "Error occured when reading first line of sam file %s\n" % sam_filename ) + if isbam: + sys.stderr.write( "Error occured when reading first line of bam file %s colname=%s \n" % (sam_filename,colname) ) + else: + sys.stderr.write( "Error occured when reading first line of sam file %s colname=%s \n" % (sam_filename,colname )) raise try: @@ -262,7 +234,7 @@ raise if not opts.quiet: - sys.stdout.write( "%d sam %s processed.\n" % ( sami, "lines " if not pe_mode else "line pairs" ) ) + sys.stdout.write( "%d sam %s processed.\n" % ( seqi, "lines " if not pe_mode else "line pairs" ) ) return counts,empty,ambiguous,lowqual,notaligned,nonunique warnings.showwarning = my_showwarning @@ -280,7 +252,7 @@ def usage(): - print >> sys.stderr, """Usage: python htseqsams2mx.py -w <halfwindowsize> -g <gfffile.gff> -o <outfilename> [-i] [-c] --samf "<sam1.sam>,<sam1.column_header>" --samf "...<samN.column_header>" """ + print >> sys.stdout, """Usage: python htseqsams2mx.py -w <halfwindowsize> -g <gfffile.gff> -o <outfilename> [-i] [-c] --samf "<sam1.sam>,<sam1.column_header>" --samf "...<samN.column_header>" """ sys.exit(1) if __name__ == "__main__": @@ -345,15 +317,23 @@ contigs = counts.keys() contigs.sort() totalc = 0 + emptycontigs = 0 for contig in contigs: - totalc += sum(counts[contig]) - crow = [contig,] + ['%d' % x for x in counts[contig]] - res.append('\t'.join(crow)) + thisc = sum(counts[contig]) + if thisc > 0: # no output for empty contigs + totalc += thisc + crow = [contig,] + ['%d' % x for x in counts[contig]] + res.append('\t'.join(crow)) + else: + emptycontigs += 1 outf = open(opts.outfname,'w') outf.write('\n'.join(res)) outf.write('\n') outf.close() walltime = int(time.time() - starttime) - accumulatornames = ('walltimeseconds','contigs','emptyread','ambiguous','lowqual','notaligned','nonunique') - notes = ['%s=%d' % (accumulatornames[i],x) for i,x in enumerate((len(contigs),empty,ambiguous,lowqual,notaligned,nonunique))] + accumulatornames = ('walltimeseconds','totreadscounted','ncontigs','emptyreads','ambiguousreads','lowqualreads', + 'notalignedreads','nonuniquereads','emptycontigs') + accums = (walltime,totalc,len(contigs),empty,ambiguous,lowqual,notaligned,nonunique,emptycontigs) + notes = ['%s=%d' % (accumulatornames[i],x) for i,x in enumerate(accums)] print >> sys.stdout, ','.join(notes) + sys.exit(0)
--- a/htseq_bams_to_count_matrix/htseqsams2mx.xml Thu Jun 06 22:14:16 2013 -0400 +++ b/htseq_bams_to_count_matrix/htseqsams2mx.xml Fri Jun 07 01:50:53 2013 -0400 @@ -1,26 +1,39 @@ -<tool id="htseqsams2mx" name="Multiple SAMs to count matrix" version="0.2"> - <description>for DGE</description> - <requirements> +<tool id="htseqsams2mx" name="SAM/BAM to count matrix" version="0.2"> + <description>using HTSeq code</description> + <stdio> + <exit_code range="666" level="warning" + description="Exit code 666 encountered" /> + </stdio> + <requirements> <requirement type="package" version="0.5.4p3">htseq</requirement> <requirement type="package" version="2.4.11">freetype</requirement> <requirement type="package" version="1.7.1">numpy</requirement> <requirement type="package" version="1.2.1">matplotlib</requirement> - </requirements> - <command interpreter="python"> + </requirements> + <command interpreter="python"> htseqsams2mx.py -g "$gfffile" -o "$outfile" -m "$model" --id_attribute "$id_attr" --feature_type "$feature_type" --samf "'$firstsamf','${firstsamf.name}'" + #if secondsamfile: + --samf "'$secondsamf','${secondsamf.name}'" + #end if + #if thirdsamfile: + --samf "'$thirdsamf','${thirdsamf.name}'" + #end if + #if fourthsamfile: + --samf "'$fourthsamf','${fourthsamf.name}'" + #end if #for $s in $samfiles: --samf "'${s.samf}','${s.samf.name}'" #end for - </command> + </command> <inputs> <param format="gtf" name="gfffile" type="data" label="Gene model (GFF) file to count reads over from your current history" size="100" /> <param name="mapqMin" label="Filter reads with mapq below than this value" help="0 to count any mapping quality read. Otherwise only reads at or above specified mapq will be counted" type="integer" value="5"/> <param name="title" label="Name for this job's output file" type="text" size="80" value="bams to DGE matrix"/> - <param name="forceName" value="false" type="boolean" label="Force replacement to chr:start-offset as the name for each contig in the output" - truevalue="true" falsevalue="false" checked="no" help="Leave as false to use the contig names as supplied in your bed file" /> + <param name="stranded" value="false" type="boolean" label="Reads are stranded - use strand in counting" display="checkbox" + truevalue="yes" falsevalue="no" checked="no" help="Check this ONLY if you know your sequences are strand specific" /> <param name="model" type="select" label="Model for counting reads over the supplied gene model- see HTSeq docs" help="If in doubt, union is a reasonable default but intersection-strict avoids double counting over overlapping exons"> <option value="union" selected="true">union</option> @@ -28,65 +41,98 @@ <option value="intersection-nonempty">intersection-nonempty</option> </param> <param name="id_attr" type="select" label="GTF attribute to output as the name for each contig - see HTSeq docs" - help="If in doubt, gene name is the only option right now"> + help="If in doubt, use gene name or if you need the id in your GTF, gene id"> <option value="gene_name" selected="true">gene name</option> <option value="gene_id">gene id</option> <option value="transcript_id">transcript id</option> <option value="transcript_name">transcript name</option> </param> <param name="feature_type" type="select" label="GTF feature type for counting reads over the supplied gene model- see HTSeq docs" - help="exon is all"> + help="GTF feature type to count over - exon is a good choice with gene name as the contig to count over"> <option value="exon" selected="true">exon</option> <option value="CDS">CDS</option> <option value="UTR">UTR</option> <option value="transcript">transcript</option> </param> - <param name="firstsamf" type="data" label="SAM file from your history to count reads overlapping gene model regions" format="sam" /> - <repeat name="samfiles" title="Additional SAM files from your history to count reads overlapping gene model regions"> - <param name="samf" type="data" label="Additional SAM file from your history" format="sam" size="100"/> + <param name="firstsamf" type="data" label="First bam/sam file from your history to count reads overlapping gene model regions" + format="sam,bam" optional="true"/> + <param name="secondsamf" type="data" label="Another bam/sam file from your history to count reads overlapping gene model regions" + format="sam,bam" optional="true"/> + <param name="thirdsamf" type="data" label="Another bam/sam file from your history to count reads overlapping gene model regions" + format="sam,bam" optional="true"/> + <param name="fourthsamf" type="data" label="Another bam/sam file from your history to count reads overlapping gene model regions" + format="sam,bam" optional="true"/> + <repeat name="samfiles" title="Use this to add all needed additional bam/sam files from your history to count reads overlapping gene model regions"> + <param name="samf" type="data" label="Additional bam/sam file from your history" format="sam,bam" size="100" optional="true"/> </repeat> </inputs> <outputs> <data format="tabular" name="outfile" label="${title}_htseqsams2mx.xls" /> </outputs> + <tests> + <test> + <param name="feature_type" value="exon" /> + <param name="firstsamf" dbkey='rn4' ftype="bam" value="rn4chr20test1.bam" /> + <param name="firstsamf.name" value="rn4chr20test1.bam" /> + <param name="id_attr" value="gene_id" /> + <param name="model" value="union" /> + <param name="stranded" value="no" /> + <param name="title" value="htseq test" /> + <param name="mapqmin" value="0" /> + <param name="secondsamfile" value="rn4chr20test2.bam" ftype="bam" dbkey="rn4" /> + <param name="secondsamfile.name" value="rn4chr20test2.bam" /> + <output name="outfile" file="htseqsams2mx_test1_out.xls" ftype="tabular" lines_diff="30"/> + </test> + </tests> <help> -**Warning** - -This code will count reads overlapping contigs supplied in the gff file. - - -**Note** - -htseqsams2mx is an experimental tool currently under test - -There is much discussion about whether to count optical/pcr duplicates. If you set the ignore flag to True, any reads in the input BAM files marked as -duplicates by upstream processing such as the Picard MarkDuplicates tool will NOT be counted. The 'right' setting depends on your data and coverage. For extremely deep -coverage, true duplicate reads are inevitable and ignoring them may be throwing away useful real data. In most cases, counting them is probably a reasonable -choice - any induced bias is likely to be non-differential between samples, whereas it's not at all clear whether that's the case if they are ignored. - ----- - **What this tool does** -Counts reads in multiple sample aligned sam format files using HTSeq counting over a gene model supplied as a GFF file - -The output is a tabular file containing the count matrix and suitable for downstream processing. +Counts reads in multiple sam/bam format mapped files and generates a matrix ideal for edgeR and other count based tools +It uses HTSeq to count your sam reads over a gene model supplied as a GTF file +The output is a tabular text (columnar - spreadsheet) file containing the +count matrix for downstream processing. Each row contains the counts from each sample for each +of the non-emtpy GTF input file contigs matching the GTF attribute choice above. +You probably want to use gene level GTF output attribute and count reads that overlap +GTF exons for RNA-seq. Or you can count over exons by using transcript level output names or ids. Etc. ---- +**Tool author's plea on the importance of replicates** + +If you want the downstream p values to inform you about your data in terms of rejecting or accepting the null hypothesis +under random sampling from the universe of possible biological/experimental replicates from which your data was drawn, +which is what published p values are often assumed to do, then you need biological +(or for cell culture material experimental) replicates. + +Using technical or no replicates means the downstream p values are not interpretable the way most people would assume +they are - ie as the probability of obtaining a result as or more extreme as your experimental data +in millions of experiments conducted under the null hypothesis. + +There is no way around this and it is scientific fraud to ignore this issue and publish bogus p values derived from +technical or no replicates without making the lack of biological or experimental error in the p value calculations +clear to your readers. + +See your stats 101 notes on the central limit theorem and test statistics for a refresher or talk to a +statistician if this makes no sense please. + **Attribution** +This Galaxy tool relies on HTSeq_ from http://www-huber.embl.de/users/anders/HTSeq/doc/index.html +for the tricky work of counting. That code includes the following attribution: -This Galaxy wrapper was written for a revised version by Ross Lazarus and is licensed under the LGPL_ like other rgenetics artefacts +## Written by Simon Anders (sanders@fs.tum.de), European Molecular Biology +## Laboratory (EMBL). (c) 2010. Released under the terms of the GNU General +## Public License v3. Part of the 'HTSeq' framework, version HTSeq-0.5.4p3 + +It will be automatically installed if you use the toolshed as in general, you probably should. +HTSeq_ must be installed with this tool if you install manually. + +Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is +licensed to you under the LGPL_ like other rgenetics artefacts .. _LGPL: http://www.gnu.org/copyleft/lesser.html - +.. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html </help> </tool> - - - - -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_bams_to_count_matrix/test-data/htseqsams2mx_test1_out.xls Fri Jun 07 01:50:53 2013 -0400 @@ -0,0 +1,4 @@ +Contig col1 col2 +Clic2 494 944 +F1M7K0_RAT 3 2 +Tmlhe 164 172
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htseq_bams_to_count_matrix/test-data/rn4_chr20_100k.gtf Fri Jun 07 01:50:53 2013 -0400 @@ -0,0 +1,62 @@ +chr20 protein_coding CDS 801 1238 . + 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; protein_id "ENSRNOP00000000957"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding exon 801 1238 . + . exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding CDS 1742 1976 . + 0 exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; protein_id "ENSRNOP00000000957"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding exon 1742 1976 . + . exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding CDS 2016 2177 . + 2 exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; protein_id "ENSRNOP00000000957"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding exon 2016 2177 . + . exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding CDS 2263 2342 . + 2 exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; protein_id "ENSRNOP00000000957"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding exon 2263 2342 . + . exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding CDS 2345 2533 . + 0 exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; protein_id "ENSRNOP00000000957"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding exon 2345 2533 . + . exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000730"; gene_name "F1M7K0_RAT"; p_id "P14715"; transcript_id "ENSRNOT00000000957"; transcript_name "F1M7K0_RAT"; tss_id "TSS11562"; +chr20 protein_coding CDS 19528 19708 . + 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 19528 19708 . + 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; protein_id "ENSRNOP00000000956"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 19528 19708 . + . exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 19528 19708 . + . exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding start_codon 19528 19530 . + 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding start_codon 19528 19530 . + 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding CDS 21979 22014 . + 2 exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 21979 22014 . + . exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 25349 25525 . + 2 exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 25349 25525 . + 2 exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; protein_id "ENSRNOP00000000956"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 25349 25525 . + . exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 25349 25525 . + . exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding CDS 35197 35476 . + 2 exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 35197 35476 . + 2 exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; protein_id "ENSRNOP00000000956"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 35197 35476 . + . exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 35197 35476 . + . exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding CDS 36764 36883 . + 1 exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 36764 36883 . + 1 exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; protein_id "ENSRNOP00000000956"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 36764 36883 . + . exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 36764 36883 . + . exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding CDS 49040 49276 . + 1 exon_number "6"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 49040 49276 . + 1 exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; protein_id "ENSRNOP00000000956"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 49040 49276 . + . exon_number "6"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 49040 49276 . + . exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding CDS 55193 55331 . + 1 exon_number "7"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 55193 55331 . + 1 exon_number "6"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; protein_id "ENSRNOP00000000956"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 55193 55331 . + . exon_number "7"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 55193 55331 . + . exon_number "6"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding CDS 55883 56011 . + 0 exon_number "8"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; protein_id "ENSRNOP00000044070"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding CDS 55883 56011 . + 0 exon_number "7"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; protein_id "ENSRNOP00000000956"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 55883 56124 . + . exon_number "8"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding exon 55883 56124 . + . exon_number "7"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding stop_codon 56012 56014 . + 0 exon_number "8"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P13601"; transcript_id "ENSRNOT00000049573"; transcript_name "Tmlhe"; tss_id "TSS451"; +chr20 protein_coding stop_codon 56012 56014 . + 0 exon_number "7"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000729"; gene_name "Tmlhe"; p_id "P3227"; transcript_id "ENSRNOT00000000956"; transcript_name "TMLH_RAT"; tss_id "TSS451"; +chr20 protein_coding exon 66518 66785 . + . exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding CDS 66729 66785 . + 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; protein_id "ENSRNOP00000000955"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding start_codon 66729 66731 . + 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding CDS 75931 76040 . + 0 exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; protein_id "ENSRNOP00000000955"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding exon 75931 76040 . + . exon_number "2"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding CDS 76165 76290 . + 1 exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; protein_id "ENSRNOP00000000955"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding exon 76165 76290 . + . exon_number "3"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding CDS 79941 80047 . + 1 exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; protein_id "ENSRNOP00000000955"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding exon 79941 80047 . + . exon_number "4"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding CDS 80692 80873 . + 2 exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; protein_id "ENSRNOP00000000955"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding exon 80692 80873 . + . exon_number "5"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding CDS 81142 81294 . + 0 exon_number "6"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; protein_id "ENSRNOP00000000955"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding exon 81142 81536 . + . exon_number "6"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding stop_codon 81295 81297 . + 0 exon_number "6"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000000728"; gene_name "Clic2"; p_id "P19357"; transcript_id "ENSRNOT00000000955"; transcript_name "Clic2"; tss_id "TSS24592"; +chr20 protein_coding exon 92810 93748 . - . exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000029622"; gene_name "Olr1668"; p_id "P5423"; transcript_id "ENSRNOT00000047483"; transcript_name "Olr1668"; tss_id "TSS17091"; +chr20 protein_coding stop_codon 92810 92812 . - 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000029622"; gene_name "Olr1668"; p_id "P5423"; transcript_id "ENSRNOT00000047483"; transcript_name "Olr1668"; tss_id "TSS17091"; +chr20 protein_coding CDS 92813 93748 . - 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000029622"; gene_name "Olr1668"; p_id "P5423"; protein_id "ENSRNOP00000042115"; transcript_id "ENSRNOT00000047483"; transcript_name "Olr1668"; tss_id "TSS17091"; +chr20 protein_coding start_codon 93746 93748 . - 0 exon_number "1"; gene_biotype "protein_coding"; gene_id "ENSRNOG00000029622"; gene_name "Olr1668"; p_id "P5423"; transcript_id "ENSRNOT00000047483"; transcript_name "Olr1668"; tss_id "TSS17091";
--- a/htseq_bams_to_count_matrix/tool_dependencies.xml.notworkingcomplex Thu Jun 06 22:14:16 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="package_htseq_0_5_4" version="0.5.4"> - <repository name="package_htseq_0_5_4" owner="fubar" toolshed="http://testtoolshed.g2.bx.psu.edu" changeset_revision="7574983a942a"/> - </package> -</tool_dependency>