changeset 0:a63b082a26eb

Imported from capsule None
author devteam
date Mon, 19 May 2014 10:59:37 -0400
parents
children d9f0a11824e9
files fasta_concatenate_by_species.py fasta_concatenate_by_species.xml test-data/cf_maf2fasta.dat test-data/fasta_concatenate_out.fasta utils/__init__.py utils/maf_utilities.py utils/odict.py
diffstat 6 files changed, 947 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta_concatenate_by_species.py	Mon May 19 10:59:37 2014 -0400
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+#Dan Blankenberg
+"""
+Takes a Multiple Alignment FASTA file and concatenates 
+sequences for each species, resulting in one sequence 
+alignment per species.
+"""
+
+import sys, tempfile
+from utils.maf_utilities import iter_fasta_alignment
+from utils.odict import odict
+
+def __main__():
+    input_filename = sys.argv[1]
+    output_filename = sys.argv[2]
+    species = odict()
+    cur_size = 0
+    for components in iter_fasta_alignment( input_filename ):
+        species_not_written = species.keys()
+        for component in components:
+            if component.species not in species:
+                species[component.species] = tempfile.TemporaryFile()
+                species[component.species].write( "-" * cur_size )
+            species[component.species].write( component.text )
+            try:
+                species_not_written.remove( component.species )
+            except ValueError:
+                #this is a new species
+                pass
+        for spec in species_not_written:
+            species[spec].write( "-" * len( components[0].text ) )
+        cur_size += len( components[0].text )
+    out = open( output_filename, 'wb' )
+    for spec, f in species.iteritems():
+        f.seek( 0 )
+        out.write( ">%s\n%s\n" % ( spec, f.read() ) )
+    out.close()
+
+if __name__ == "__main__" : __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta_concatenate_by_species.xml	Mon May 19 10:59:37 2014 -0400
@@ -0,0 +1,72 @@
+<tool id="fasta_concatenate0" name="Concatenate" version="0.0.0">
+  <description>FASTA alignment by species</description>
+  <command interpreter="python">fasta_concatenate_by_species.py $input1 $out_file1</command>
+  <inputs>
+    <param name="input1" type="data" format="fasta" label="FASTA alignment"/>
+  </inputs>
+  <outputs>
+    <data name="out_file1" format="fasta"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input1" value="cf_maf2fasta.dat" />
+      <output name="out_file1" file="fasta_concatenate_out.fasta" />
+    </test>
+  </tests>
+  <help>
+  
+**What it does**
+  
+This tools attempts to parse FASTA headers to determine the species for each sequence in a multiple FASTA alignment.
+It then linearly concatenates the sequences for each species in the file, creating one sequence per determined species.
+
+-------
+
+**Example**
+
+Starting FASTA::
+  
+  >hg18.chr1(+):10016339-10016341|hg18_0
+  GT
+  >panTro2.chr1(+):10195380-10195382|panTro2_0
+  GT
+  >rheMac2.chr1(+):13119747-13119749|rheMac2_0
+  GT
+  >mm8.chr4(-):148269679-148269681|mm8_0
+  GT
+  >canFam2.chr5(+):66213635-66213637|canFam2_0
+  GT
+  
+  >hg18.chr1(-):100323677-100323679|hg18_1
+  GT
+  >panTro2.chr1(-):101678671-101678673|panTro2_1
+  GT
+  >rheMac2.chr1(-):103154011-103154013|rheMac2_1
+  GT
+  >mm8.chr3(+):116620616-116620618|mm8_1
+  GT
+  >canFam2.chr6(+):52954092-52954094|canFam2_1
+  GT
+  
+
+
+becomes::
+  
+  >hg18
+  GTGT
+  >panTro2
+  GTGT
+  >rheMac2
+  GTGT
+  >mm8
+  GTGT
+  >canFam2
+  GTGT
+
+
+.. class:: warningmark 
+
+ This tool will only work properly on files with Galaxy style FASTA headers.
+
+</help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cf_maf2fasta.dat	Mon May 19 10:59:37 2014 -0400
@@ -0,0 +1,134 @@
+>hg17.chr7(+):127471195-127471526|hg17_0
+gtttgccatcttttgctgctctagggaatccagcagctgtcaccatgtaaacaagcccaggctagaccaGTTACCCTCATC---ATCTTAGCTGATAGCCAGCCAGCCACCACAGGCAtgagtcaggccatattgctggacccacagaattatgagctaaataaatagtcttgggttaagccactaagttttaggcatagtgtgttatgtaTCTCACAAACATATAAGACTGTGTGTTTGTTGACTGGAGGAAGAGATGCTATAAAGACCACCTTTTAAAACTTCCCAAATACTGCCACTGATGTCCTGATGGAGG-------------------------------------------------------TATGAA---------AACATCCACTAA
+>panTro1.chr6(+):129885076-129885407|panTro1_0
+gtttgccatcttttgctgctcttgggaatccagcagctgtcaccatgtaaacaagcccaggctagaccaGTTACCCTCATC---ATCTTAGCTGATAGCCAGCCAGCCACCACAGGCAtgagtcaggccatattgctggacccacagaattatgagctaaataaatagtcttgggttaagccactaagttttaggcatagtgtgttatgtaTCTCACAAACATATAAGACTGTGTGTTTGTTGACTGGAGGAAGAGATGCTATAAAGACCACCTTTTGAAACTTCCCAAATACTGCCACTGATGTCCTGATGGAGG-------------------------------------------------------TATGAA---------AACATCCACTAA
+>rheMac2.chr3(+):165787989-165788319|rheMac2_0
+gcttgccatcttttgatgctcttgggaatccagcagctgtcaccat-taaacaagcccaggctagaccaGTTACCCTCATC---ATCTTAGCTGATAGCCAGCCAGCCACCATAGGCAtgagtcaggccatagtgctggacccacagaattatgagctaaataagtagtgttgggttaagtcactaagttttaggcatagtgtgttatgtagcTCACAAACATATAAGACTGTGTGTTTTTTGACTGGAGGAAGAGATGCCATAAAGACCACCTTTTGAAACTTCTCAAATACTGCCATTGATGTGCTGATGGAGG-------------------------------------------------------TATGAA---------AACATCCACTAA
+>rn3.chr4(+):56178191-56178473|rn3_0
+CTTCACTCTCATTTGCTGTT----------------CTGTCACTATGGAGACAAACACAGGCTAGCCCAGTTACTATCTTGATCACAGCAGCTGT----CAGCTAGCTGCCACTCACAGGAATAAGGCCATACCATT-GATCCACTGAACCTTGATCTAGGAATTTGGC----------------------TGGGGCCAGTTTGCGGTGTCACTCATGA--CTCTAAGATTGTGTGTTTG----CTCCAGGAAGAGACGGCAAGAGGATTACCTTTAAAAGGTTCGG-AGTCTAGCTGTAGACAGCCCAATGGG---------------------------------------------------------TATAAC---------AATACTCACTAA
+>mm7.chr6(+):28984529-28984886|mm7_0
+CTCCACTCTCGTTTGCTGTT----------------CTGTCACCATGGAAACAAACG-AGGGTGGTCCAGTTACTATCTTG---ACTGCAGCTGG----CAGTCAGTTGCCACT--CAGGAATAAGGCTATGCCATT-GATCCACTGAACCGTGATCTGGAAACCTGGCTGTTGTTT-------CAAGCCTTGGGGCCAGTTTGCGGTGTTACTCATGA--CTCTAAGATCGTGTGCTTG----CTGCAGGAAGAGACAGCAAGGGGGTTACATTTAAAAAGCCCCC-AGTTTAGCTATAGGCAGGCCAACAGGTGTAAAAATACTCACTAGTAATGGGCTGAACTCATGGAGGTAGCATTAGTGAGACACTGTAACTGTTTTTTTAAAAATCACTAA
+
+>hg17.chr7(+):127471526-127471584|hg17_1
+AATTTGTGGTTTATTCATTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGG
+>mm7.chr6(+):28984886-28984940|mm7_1
+----AACGTTTCATTGATTGCTCATCATTTAAAAAAAGAAATTCCTCAGTGGAAGAGG
+>rheMac2.chr3(+):165788319-165788377|rheMac2_1
+AATTTGTGGTTTATTTATTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGG
+>panTro1.chr6(+):129885407-129885465|panTro1_1
+AATTTGTGGTTTATTCGTTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGG
+
+>hg17.chr7(+):127471584-127471688|hg17_2
+GAGATATTT-GGggaaatttt-gtatagactagctt--tcacgatgttagggaattattattgtgtgataatggtcttgcagttac-acagaaattcttcctta-ttttt
+>panTro1.chr6(+):129885465-129885569|panTro1_2
+GAGACATTT-GGggaaatttt-gtatagactagctt--tcacgatgttagggagttattattgtgtgataatggtcttgcagttac-acagaaattcttcctta-ttttt
+>rheMac2.chr3(+):165788377-165788482|rheMac2_2
+GAGATATTT-GGggaaatttg-gtatagactagctt--tcatgatgtaagggagttatttttgtgtgataatggccctacagttac-acagaaattcttccttatttttt
+>canFam2.chr14(-):11090703-11090811|canFam2_2
+gagatattt-gggggaatttgaatgtagtgttgctcttttgtgatgctaagaaattataattgtctgatgatagtctcgtggttatgggggaaatgcttcctta-ttttt
+>bosTau2.chr4(-):50243931-50244034|bosTau2_2
+-agacattg-ggtaaaattcaaatgcagactagctc----atgatgttaaagaattactcttgtgtggtaatggtcttgtgatagagatagaaatgcttcctta-ttttt
+>rn3.chr4(+):56182200-56182295|rn3_2
+----TATTTGGGGGAAATATG-ATGTGCA----CTT--CCATGATCTTAAAGAATTGCTACTGTTTGATAGTGATCTTATGGTTAA-ATAAAAAAAAT--CTTA-GTTGT
+>dasNov1.scaffold_256527(+):298-392|dasNov1_2
+GAGACATTT-GGAGAAATTTG-----------Aatt--tcatgatgttaaggaattacttttgtatgatgatggtcttgtggctat-gtagaatttcttccgtg-tttta
+
+>hg17.chr7(+):127471688-127471871|hg17_3
+tgggaagcaccaaagta-------gggataaaatgtcatgatgtgtgcaatacactttaaaatgtttttgccaaaa----------taattaa-------------------------tgaagc--aaatatg---gaaaataataattattaaatctaggt-----gatgggtatattgtagttcactatagtattgcacacttttctgtatgtttaaatttttcattta--------------------------aaaa-
+>panTro1.chr6(+):129885569-129885752|panTro1_3
+tgggaaacaccaaagta-------gggataaaatgtcatgatgtgtgcaatacgctttaaaatatttttgccaaaa----------taattaa-------------------------tgaagc--aaatatg---gaaaataataattattaaatctaggt-----gatgggtatattgtagttcactatagtattgcacacttttctgtatgtttaaaattttcattta--------------------------aaaa-
+>rheMac2.chr3(+):165788482-165788684|rheMac2_3
+tgggaagcacaaaagta-------gggataaaatgtcatgatgtgtacaatatgctttaaaatatttttgccaaaa----------taattaa-------------------------tgaagc--aaatatg---gaaaataataactgttaaatctaggt-----gttgggtatattgcagttcattatgttattgcacacttttctgtgtgtttaaaattttcatttaaaaatatgttttaaaaatg-------aaaa-
+>rn3.chr4(+):56182295-56182489|rn3_3
+TAGAAAATACTCAAATATTTAGGGGCGTGACAATGTCACAGTGTCTGCAATTTGCTTTAAAGATTTTT-----AAA----------TATTTAAAAAAGTTTTAATAATTTTGAAAAACTGAAGCTACACTATG---GGAAGTGGTAATTGTTACATATGGGT-----AATAAGTAT-----AATTCGTTATATTAT-------TTTTC------TTAGAATTTTTCATTTG--------------------------AAAA-
+>bosTau2.chr4(-):50243792-50243930|bosTau2_3
+agataaacacttaagtattta---aggatgaaacgccctgatgtttgtaatttgctttagaatattttagccaaaa----------gaattaa-------------------------tgatgc--aaatatg--caaaaagagta--cgttaaacctaa-----------------------------------------------------atttgCGATTttcattta--------------------------aaaa-
+>canFam2.chr14(-):11090345-11090505|canFam2_3
+agacacaaactgaagtattta---aggatgaaatgtcatgatgtttgcaattggctttaaaatattttagccaaaa-----------agtaaa-------------------------tgaagc--AAATATG--GGAAGACAATAATCATTAAATCTAGGT-----GATGCATAC---------------------------TTTTCCATATGTTTGAAATTTTCATTTA--------------------------AAAA-
+>dasNov1.scaffold_256527(+):393-625|dasNov1_3
+agacgcatgctgaagcatgta---aggataaaatgtcgtggtgtttgtaatttattctaaaacattttagccaaaaacaaataaataaataaa-------------------------tgaagc--aaatatgggggaaatgtttaattgttaaatctagatttaacacggtatataccgtgcttcattatactagtctctacttttccatgtgtttgaaattttCATTAAAATGTTTGTTTGTTGTCTGTTTTAATGAAAT
+
+>hg17.chr7(+):127471871-127471910|hg17_4
+actttgagctagacaccaggctatgagcta-ggagcatag
+>rheMac2.chr3(+):165788684-165788723|rheMac2_4
+actttgagctagataccaggttatgagcta-ggagcatag
+>panTro1.chr6(+):129885752-129885791|panTro1_4
+actttgagctagacaccaggctatgagcta-ggagcatag
+>bosTau2.chr4(-):50243734-50243773|bosTau2_4
+tcttcgtgcaacgcacggggctatcaatgt-gggatacag
+>canFam2.chr14(-):11090081-11090120|canFam2_4
+ACATCAtgctagatcctggactatgagctg-ggtatatag
+>dasNov1.scaffold_256527(+):625-665|dasNov1_4
+CCTTTGTGCTAGCCACTGGGATGAAAGCTAGGGAACACAG
+
+>hg17.chr7(+):127471910-127472074|hg17_5
+caatgaccaa----------------------------------------------------------------------------------------------atagactcctaccaa-ctc-aaagaatgcacattctCTG-GGAAACATGTTTCCATTAGGAAGCCTCGAATGCAATGTGACTGTGGTCTCCAGGACCTG-TGTGATCCTGGCTTTTCCTGTTCCCTCCG---CATCATCACTGCAGGTGTGTTTTCCCAAG
+>panTro1.chr6(+):129885791-129885955|panTro1_5
+caatgaccaa----------------------------------------------------------------------------------------------atagactcctaccaa-ctc-aaagaatgcacattctCTG-GGAAACATGTTTCCATTAGGAAGCCTCGAATGCAATGTGACTGTGGTCTCCAGGACATG-TGTGATCCTGGCTTTTCCTGTTCCCTCTG---CATCATCACTGCAGGTGTATTTTCCCAAG
+>rheMac2.chr3(+):165788723-165788885|rheMac2_5
+caatgaccaa----------------------------------------------------------------------------------------------atagacccctaccga-ctc-aaagaatgtacattctTTG-GGAAACATGTTTCCATCAGAAAATCTCAAATGCAATGTGACTGGGGTCTCCAGGACCTG-TGTGAGCCTGGCTTTTCCTGTTCCCTCCA---CATCATCACTGCAGGTGTATTTTCCC--G
+>mm7.chr6(+):28990714-28990875|mm7_5
+caaaaaccaa------------------------------------------------------------------------------------------------aaaaACCTATAGC-CTC-ACAGGGTGGGTTGTCTTTG-AGGAACATGCATCCGCTAGAAAGTCCCAAGTACACTATGACAGTTG--CCCAGGCCCCGCCTTAAACCTGGTTTTCCTGGTTTCTTTCA---CATCATTACCACGAATATATTTCCTCAAG
+>rn3.chr4(+):56183448-56183705|rn3_5
+--ATGACCAATATACACTGTTTACATGTATAGCATTGTGAATGGAGACATAAAAAGATAATCTAGCTTTGTGCTAGGTAGGTGCTGAGCTCTTAACAGTGCTGGGCAGAAACCTATAAC-CTC-ACAGGGTGGGTTGTCTTTG-AGGAGCGTGCTAACCCTAGGAAGTCTCAAATACAATGTGATGGTTGCCCCCAGGCACCACCTTGAACCTGGTCTTCCTGGTTTCTTTCA---CACCATTACCACAAATACATTTTCTCAGG
+>bosTau2.chr4(-):50243566-50243734|bosTau2_5
+atgtgaacaa---------------------------------------------------------------------------------------------aacggacccgtgtgggactcggcggagcacacagattttgcgggagCACGTTCCCGTTAGGAAGTCTCTGATGCAATACGACCGGTGCCTTCAGGACCTG-TG--AGGCTGACTTTCCTTA-CCCCTCCACACCATCATCAAGGCAGGTGTGATTTTCCAGG
+>canFam2.chr14(-):11089913-11090081|canFam2_5
+cagtgaacaa---------------------------------------------------------------------------------------------aacagagccctgcagt-cttgatggagcacacaacctttg-gggaaCATGTTTCCATAAGAAAGTCTCCAATGTGATCTGA-TGGTGCCGCCAGGACCTA-TGTCAGCCTACCGTTCCATGTCCCCTCCACACCATCATCACTGCAGGTGTGTTTTCCCACA
+>dasNov1.scaffold_256527(+):665-786|dasNov1_5
+CAGTGAGCAA-----------------------------------------------------------------------------------------------CAGCCTGGCTCCGT-CC--GGGGGCCGCTCAGCAGCTC-GGGAGCGTGGAGACG---GGAAGTCTGTCACGCGATGCG-----------CTGGGCCCG------------CTGTTCCCGCCCCCCTCC---CCCC----------------TTTCCCAAG
+
+>hg17.chr7(+):127472074-127472258|hg17_6
+TTTTAAA------CATTTACCTTCCCAGTGGCCTTGCGTCTAGAGGAATCCCTGTATAGTGGT-ACATGAATATAACACATAACAAA-AATCATCTCTATGGTGTGTGTTGTTCCTGGGGTTCAattcagcaaatttt-ccc-tgggcacccatgtgttcttggcactggaaaagtaccgggactgaaacagtt
+>panTro1.chr6(+):129885955-129886139|panTro1_6
+TTTTAAA------CATTTACCTTCCCAGTGGCCTTGCGTCTAGAGGAATCCCTGTATAGTGGT-ACATGAATATAACACATAACAAA-AATCATCTCTATGGTGTGTGTTGTTCCTGGGGTTCAattcagcaaatttt-tcc-tgggcacccatgtgttcttggcactggaaaagtaccgggactgaaacagtt
+>rheMac2.chr3(+):165788885-165789069|rheMac2_6
+TTTTAAA------CATTTACTCTCCCAGTAGCCTTGCATCTCGAGGAATCCCTGTATAGTGGT-ACATGAATATAACACATAACAAA-AATCATCTGTACGGTGTGTGTTGTTCCTGGGGTTCAattcagcaaatttt-tcc-tgggcacccctgtgttcttggcactggaaaagtaccaggacttaaatagta
+>mm7.chr6(+):28990875-28991025|mm7_6
+TTTAAAGAAAGTACCCCCTCCTTTCCAGT-GCCTCAAATCTAGAAGAATATTCATAGTGAAGT-GC------------------------ACAGCCGGGTGGTGCATGGTA-ATCTGGAAGTCACCTCTGCAAATCTT-TCC----------------TGTTGGTGCTGTGAAGGCACCAGGACTTCAAGAGTA
+>rn3.chr4(+):56183705-56183879|rn3_6
+TTTAAAAGAAGT-CCCACTCCTTTCCAGT-GCCCTAGATCTAGAAGCACATTCATAATGATGT-ACAC-----TAACCC----------GACAGCTGTGTGGTATATGGTA-TCCCGGAAGTCACCTCAGCAAACCTT-TCCCGGGGAACCTACATGGTGTTGGTGCTGTGAAGGTACCAGGTTGTCAAGGGTA
+>canFam2.chr14(-):11089743-11089913|canFam2_6
+TTTTAAA------TATCTGC-TTCCCGGTGGCCTTGAGTCTAGAGGAGTCCCCCCACTATGGTGGCACTAATACTGAAGGTCAGAAATAATCAGTTCTGTGGTGCATGTTGCCCCTGAGGTTCTGTTCGGGAAACTTC-TTC-TGAGCAC----ATGCACCTGGCACTGCAAACGTACCAGGA-----------
+>dasNov1.scaffold_256527(+):786-964|dasNov1_6
+TTTTAAA------AATTTACCTTCCCAGTGGCGGTGAATCCGGAGGAATACGGAAACTGGGGC-GCACTACCATGACACGTGTCAAA-AATCAGTTCCGTGGTCCGTGGAGGGCCTGGGGTTC------GAAAATCTTGTCC-CGAGCACCCCCGTGCGCCTGGCACCGCGACAGTGACAGGACTGAAGCGTG-
+
+>hg17.chr7(+):127472258-127472280|hg17_7
+gatggccca-atccctgtcctct-
+>panTro1.chr6(+):129886139-129886161|panTro1_7
+gatggccca-atccctgtcctct-
+>rheMac2.chr3(+):165789069-165789091|rheMac2_7
+gatggccca-atccctgtcctct-
+>mm7.chr6(+):28991025-28991048|mm7_7
+AATGGCAGAGGGCTCTGTTCTCT-
+>rn3.chr4(+):56183879-56183902|rn3_7
+AATGGCAGAGGCCCCTGTTCTCT-
+>canFam2.chr14(-):11089526-11089548|canFam2_7
+GGAGACTTG-ATGCCTGCCTTCC-
+>dasNov1.scaffold_256527(+):964-987|dasNov1_7
+GACGGCCAG-ACCTCTGCCCTCGG
+
+>hg17.chr7(+):127472280-127472681|hg17_8
+taaaacctaagggaggagaTGGAAAG-GGGCACCCAACCCAGACTGAGAGACAGGAATTAGCTGCAAGGGGAACTAGGAAAAGCTTCTTTA---AGGATG--GAGAGGCCCTA-GTGGAATGGGGAGATTCTTCCGGGAGAAGCGATGGATGCACAGTTGGGCATCCCCACAGACGGACTGGAAAGAAAAAAGGCCTGGAGGAATCA------ATGTGC-AATGTATGTGTGTTCCCTGGTTcaagggctgg-gaactttctcta--aagggccaggtagaaaacattttaggctttctaagccaagg---caaaattgaggat-attacatgggtacttatacaacaagaataaacaatt---tacacaa-ttttttgttgacagaattcaaaa---ctttat----agacac---agaaatgcaaatttcctgt
+>panTro1.chr6(+):129886161-129886562|panTro1_8
+taaaacctaagggaggagaTGGAAAG-GGGCACCCAACCCAGACTGAGAGACAGGAATTAGCTGCAAGGGGAACTAGGAAAAGCTTCTTTA---AGGATG--GAGAGACCCTA-GTGGAATGGGGAGATTCTTCCGGGAGAAGCGATGGATGCGCAGTTGGGCATCCCCACAGACGGACTGGAAAGAAAAAAGGCCTGGAGGAATCA------ATGTGC-AATGTATGTGTGTTCCCTGGTTcaagggctgg-gaactttctcta--aagggccaggtagaaaacattttaggctttctaagccaagg---caaaattgaggat-attacatgggtacttatacaacaagaataaacaatt---tacacaa-ttttttgttgacagaattcaaaa---ctttat----agacac---agaaatgtaaatttcctgt
+>rheMac2.chr3(+):165789091-165789492|rheMac2_8
+taaaacctaatggaggagatggaATG-GGTCACCCAACCCGGACTGAGAGACAGGAATTAGCTGCAAGGGTAACCAGGACAAGCTTCTCTA---ATGATG--GAGAGACCCTA-GTGGAATGGGGAGATTCTTCTGGGAGAAGCGATGGATTCGTAGTTGGGCATCCCCACAGAGGGACTGGAAAGAAAAAAGACCTGGAGGAACCA------ATGTGC-AATGTATGTGTGTTTCCTGGTTcaagggctggcaaactttctcta--aagggccagatagaaaacattttaggctttgtaagccaagg---caaaatcgaggag-attacatgggtacttatacaacaagaataaacaatt---tccacaa--tttttattcacagaattcaaaa---ctttat----agacac---agaaatgtaaatttcctgt
+>rn3.chr4(+):56183902-56184219|rn3_8
+------------------------------------GTCCATAGTCAAAG------------------------------AAGCCTCTCAG---ATGGAG--AGCAGGGCCTATGCAAAAGAGGGGGCTTCTGTAGGCAGAAGGGATGGACTAGCCTCCGGACATAGCCATAGAGAGGCTGGCAGGACTGAGACCCAGGAGAAGCCAGCGCAGGTGTGCGGGCGTGTGTATATTTCATAGTTTGCAGGTTGG----------------------------CAAACAATTCCTGCTTTGCAGGCCAAGA---GGAAACTGAAGGTGACCCCGTGAGTGCTTAC---ACAAGAGAAAACAAG-------ACAA-TTTTTGGTTGACCAAATTCAGAA---CTTTATTTGAGGATGC---TAAAGTTTAAATTTCTTTT
+>canFam2.chr14(-):11089143-11089523|canFam2_8
+TACAGCCTGTGGGCAGAGGTGGGAAGAGGTCACGCAAGCCAGTTGGAATGAGGGGAGTTGGCTGGAAAGGTGACCAGGACAAGCTACTTCAACCAGGAAG--AAGAGACCCCG-GTG----------------CTTGGAGAAGGCCTGATTGAGCAGTCCTGCATGCCCGCCCAC-GACTGGCAGGAATAAAGACCCAGAAGAGCTA------ACGTGC-AATGTA------TTTTCTAGTTCCAgggttggcaaactttctctct-aagggtgggatgataaacattttaggcttttcagaccaaga---ggcgacatcagag-ggtatgtaggt---------acaagagggaaaagttgcccccggaa-ttttttg--gataaaattcaaaa---ctttacttagggatgc---caaaatgtaaacttcatat
+>dasNov1.scaffold_256527(+):987-1401|dasNov1_8
+CTAAATCTCGCGGAGAAGGTGGAACA-GGTTACCCAAACCCGACCGAG-GAGGCGAGTTG---GAAACGGCGACTGGGACAAGCTCCCTCA---GAGACGGAGAGAGACCCCA-GTGGAAGGGGGGAGAGGCTCTTAGGGAAACGATGGGGGGACCCGCCCGCACCCGCACAGAGGCGCTGGCAGGCACAGCGGCCCCGAGGAGCCC------AGGAGC-AGGGC-TGTGT-TCCCCTGCATcaggggttggcaaactttttctgcaaagggccagatagtaaatattttaggctttgcaaaccaagaagtagaaagggaggcc-attatgtacgtatttatatagcaagagagaacattt---cccacaatttttttattgacagaatttaaaacttctttattgatgaacaccaaagaaacttgaatttcatat
+
+>hg17.chr7(+):127472681-127472715|hg17_9
+aattttcccat---gagaactattcttcttttgtttt
+>rheMac2.chr3(+):165789492-165789526|rheMac2_9
+aattttcacat---aagaactattcttcttttgtttt
+>panTro1.chr6(+):129886562-129886596|panTro1_9
+aattttcccgt---gagaactattcttcttttgtttt
+>canFam2.chr14(-):11089108-11089143|canFam2_9
+aatggtcatgt--ccataactattcttcttttatttt
+>dasNov1.scaffold_256527(+):1401-1433|dasNov1_9
+aattttcacatatcacgaagtatttttttttt-----
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_concatenate_out.fasta	Mon May 19 10:59:37 2014 -0400
@@ -0,0 +1,16 @@
+>hg17
+gtttgccatcttttgctgctctagggaatccagcagctgtcaccatgtaaacaagcccaggctagaccaGTTACCCTCATC---ATCTTAGCTGATAGCCAGCCAGCCACCACAGGCAtgagtcaggccatattgctggacccacagaattatgagctaaataaatagtcttgggttaagccactaagttttaggcatagtgtgttatgtaTCTCACAAACATATAAGACTGTGTGTTTGTTGACTGGAGGAAGAGATGCTATAAAGACCACCTTTTAAAACTTCCCAAATACTGCCACTGATGTCCTGATGGAGG-------------------------------------------------------TATGAA---------AACATCCACTAAAATTTGTGGTTTATTCATTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGGGAGATATTT-GGggaaatttt-gtatagactagctt--tcacgatgttagggaattattattgtgtgataatggtcttgcagttac-acagaaattcttcctta-ttttttgggaagcaccaaagta-------gggataaaatgtcatgatgtgtgcaatacactttaaaatgtttttgccaaaa----------taattaa-------------------------tgaagc--aaatatg---gaaaataataattattaaatctaggt-----gatgggtatattgtagttcactatagtattgcacacttttctgtatgtttaaatttttcattta--------------------------aaaa-actttgagctagacaccaggctatgagcta-ggagcatagcaatgaccaa----------------------------------------------------------------------------------------------atagactcctaccaa-ctc-aaagaatgcacattctCTG-GGAAACATGTTTCCATTAGGAAGCCTCGAATGCAATGTGACTGTGGTCTCCAGGACCTG-TGTGATCCTGGCTTTTCCTGTTCCCTCCG---CATCATCACTGCAGGTGTGTTTTCCCAAGTTTTAAA------CATTTACCTTCCCAGTGGCCTTGCGTCTAGAGGAATCCCTGTATAGTGGT-ACATGAATATAACACATAACAAA-AATCATCTCTATGGTGTGTGTTGTTCCTGGGGTTCAattcagcaaatttt-ccc-tgggcacccatgtgttcttggcactggaaaagtaccgggactgaaacagttgatggccca-atccctgtcctct-taaaacctaagggaggagaTGGAAAG-GGGCACCCAACCCAGACTGAGAGACAGGAATTAGCTGCAAGGGGAACTAGGAAAAGCTTCTTTA---AGGATG--GAGAGGCCCTA-GTGGAATGGGGAGATTCTTCCGGGAGAAGCGATGGATGCACAGTTGGGCATCCCCACAGACGGACTGGAAAGAAAAAAGGCCTGGAGGAATCA------ATGTGC-AATGTATGTGTGTTCCCTGGTTcaagggctgg-gaactttctcta--aagggccaggtagaaaacattttaggctttctaagccaagg---caaaattgaggat-attacatgggtacttatacaacaagaataaacaatt---tacacaa-ttttttgttgacagaattcaaaa---ctttat----agacac---agaaatgcaaatttcctgtaattttcccat---gagaactattcttcttttgtttt
+>panTro1
+gtttgccatcttttgctgctcttgggaatccagcagctgtcaccatgtaaacaagcccaggctagaccaGTTACCCTCATC---ATCTTAGCTGATAGCCAGCCAGCCACCACAGGCAtgagtcaggccatattgctggacccacagaattatgagctaaataaatagtcttgggttaagccactaagttttaggcatagtgtgttatgtaTCTCACAAACATATAAGACTGTGTGTTTGTTGACTGGAGGAAGAGATGCTATAAAGACCACCTTTTGAAACTTCCCAAATACTGCCACTGATGTCCTGATGGAGG-------------------------------------------------------TATGAA---------AACATCCACTAAAATTTGTGGTTTATTCGTTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGGGAGACATTT-GGggaaatttt-gtatagactagctt--tcacgatgttagggagttattattgtgtgataatggtcttgcagttac-acagaaattcttcctta-ttttttgggaaacaccaaagta-------gggataaaatgtcatgatgtgtgcaatacgctttaaaatatttttgccaaaa----------taattaa-------------------------tgaagc--aaatatg---gaaaataataattattaaatctaggt-----gatgggtatattgtagttcactatagtattgcacacttttctgtatgtttaaaattttcattta--------------------------aaaa-actttgagctagacaccaggctatgagcta-ggagcatagcaatgaccaa----------------------------------------------------------------------------------------------atagactcctaccaa-ctc-aaagaatgcacattctCTG-GGAAACATGTTTCCATTAGGAAGCCTCGAATGCAATGTGACTGTGGTCTCCAGGACATG-TGTGATCCTGGCTTTTCCTGTTCCCTCTG---CATCATCACTGCAGGTGTATTTTCCCAAGTTTTAAA------CATTTACCTTCCCAGTGGCCTTGCGTCTAGAGGAATCCCTGTATAGTGGT-ACATGAATATAACACATAACAAA-AATCATCTCTATGGTGTGTGTTGTTCCTGGGGTTCAattcagcaaatttt-tcc-tgggcacccatgtgttcttggcactggaaaagtaccgggactgaaacagttgatggccca-atccctgtcctct-taaaacctaagggaggagaTGGAAAG-GGGCACCCAACCCAGACTGAGAGACAGGAATTAGCTGCAAGGGGAACTAGGAAAAGCTTCTTTA---AGGATG--GAGAGACCCTA-GTGGAATGGGGAGATTCTTCCGGGAGAAGCGATGGATGCGCAGTTGGGCATCCCCACAGACGGACTGGAAAGAAAAAAGGCCTGGAGGAATCA------ATGTGC-AATGTATGTGTGTTCCCTGGTTcaagggctgg-gaactttctcta--aagggccaggtagaaaacattttaggctttctaagccaagg---caaaattgaggat-attacatgggtacttatacaacaagaataaacaatt---tacacaa-ttttttgttgacagaattcaaaa---ctttat----agacac---agaaatgtaaatttcctgtaattttcccgt---gagaactattcttcttttgtttt
+>rheMac2
+gcttgccatcttttgatgctcttgggaatccagcagctgtcaccat-taaacaagcccaggctagaccaGTTACCCTCATC---ATCTTAGCTGATAGCCAGCCAGCCACCATAGGCAtgagtcaggccatagtgctggacccacagaattatgagctaaataagtagtgttgggttaagtcactaagttttaggcatagtgtgttatgtagcTCACAAACATATAAGACTGTGTGTTTTTTGACTGGAGGAAGAGATGCCATAAAGACCACCTTTTGAAACTTCTCAAATACTGCCATTGATGTGCTGATGGAGG-------------------------------------------------------TATGAA---------AACATCCACTAAAATTTGTGGTTTATTTATTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGGGAGATATTT-GGggaaatttg-gtatagactagctt--tcatgatgtaagggagttatttttgtgtgataatggccctacagttac-acagaaattcttccttatttttttgggaagcacaaaagta-------gggataaaatgtcatgatgtgtacaatatgctttaaaatatttttgccaaaa----------taattaa-------------------------tgaagc--aaatatg---gaaaataataactgttaaatctaggt-----gttgggtatattgcagttcattatgttattgcacacttttctgtgtgtttaaaattttcatttaaaaatatgttttaaaaatg-------aaaa-actttgagctagataccaggttatgagcta-ggagcatagcaatgaccaa----------------------------------------------------------------------------------------------atagacccctaccga-ctc-aaagaatgtacattctTTG-GGAAACATGTTTCCATCAGAAAATCTCAAATGCAATGTGACTGGGGTCTCCAGGACCTG-TGTGAGCCTGGCTTTTCCTGTTCCCTCCA---CATCATCACTGCAGGTGTATTTTCCC--GTTTTAAA------CATTTACTCTCCCAGTAGCCTTGCATCTCGAGGAATCCCTGTATAGTGGT-ACATGAATATAACACATAACAAA-AATCATCTGTACGGTGTGTGTTGTTCCTGGGGTTCAattcagcaaatttt-tcc-tgggcacccctgtgttcttggcactggaaaagtaccaggacttaaatagtagatggccca-atccctgtcctct-taaaacctaatggaggagatggaATG-GGTCACCCAACCCGGACTGAGAGACAGGAATTAGCTGCAAGGGTAACCAGGACAAGCTTCTCTA---ATGATG--GAGAGACCCTA-GTGGAATGGGGAGATTCTTCTGGGAGAAGCGATGGATTCGTAGTTGGGCATCCCCACAGAGGGACTGGAAAGAAAAAAGACCTGGAGGAACCA------ATGTGC-AATGTATGTGTGTTTCCTGGTTcaagggctggcaaactttctcta--aagggccagatagaaaacattttaggctttgtaagccaagg---caaaatcgaggag-attacatgggtacttatacaacaagaataaacaatt---tccacaa--tttttattcacagaattcaaaa---ctttat----agacac---agaaatgtaaatttcctgtaattttcacat---aagaactattcttcttttgtttt
+>rn3
+CTTCACTCTCATTTGCTGTT----------------CTGTCACTATGGAGACAAACACAGGCTAGCCCAGTTACTATCTTGATCACAGCAGCTGT----CAGCTAGCTGCCACTCACAGGAATAAGGCCATACCATT-GATCCACTGAACCTTGATCTAGGAATTTGGC----------------------TGGGGCCAGTTTGCGGTGTCACTCATGA--CTCTAAGATTGTGTGTTTG----CTCCAGGAAGAGACGGCAAGAGGATTACCTTTAAAAGGTTCGG-AGTCTAGCTGTAGACAGCCCAATGGG---------------------------------------------------------TATAAC---------AATACTCACTAA--------------------------------------------------------------TATTTGGGGGAAATATG-ATGTGCA----CTT--CCATGATCTTAAAGAATTGCTACTGTTTGATAGTGATCTTATGGTTAA-ATAAAAAAAAT--CTTA-GTTGTTAGAAAATACTCAAATATTTAGGGGCGTGACAATGTCACAGTGTCTGCAATTTGCTTTAAAGATTTTT-----AAA----------TATTTAAAAAAGTTTTAATAATTTTGAAAAACTGAAGCTACACTATG---GGAAGTGGTAATTGTTACATATGGGT-----AATAAGTAT-----AATTCGTTATATTAT-------TTTTC------TTAGAATTTTTCATTTG--------------------------AAAA-------------------------------------------ATGACCAATATACACTGTTTACATGTATAGCATTGTGAATGGAGACATAAAAAGATAATCTAGCTTTGTGCTAGGTAGGTGCTGAGCTCTTAACAGTGCTGGGCAGAAACCTATAAC-CTC-ACAGGGTGGGTTGTCTTTG-AGGAGCGTGCTAACCCTAGGAAGTCTCAAATACAATGTGATGGTTGCCCCCAGGCACCACCTTGAACCTGGTCTTCCTGGTTTCTTTCA---CACCATTACCACAAATACATTTTCTCAGGTTTAAAAGAAGT-CCCACTCCTTTCCAGT-GCCCTAGATCTAGAAGCACATTCATAATGATGT-ACAC-----TAACCC----------GACAGCTGTGTGGTATATGGTA-TCCCGGAAGTCACCTCAGCAAACCTT-TCCCGGGGAACCTACATGGTGTTGGTGCTGTGAAGGTACCAGGTTGTCAAGGGTAAATGGCAGAGGCCCCTGTTCTCT-------------------------------------GTCCATAGTCAAAG------------------------------AAGCCTCTCAG---ATGGAG--AGCAGGGCCTATGCAAAAGAGGGGGCTTCTGTAGGCAGAAGGGATGGACTAGCCTCCGGACATAGCCATAGAGAGGCTGGCAGGACTGAGACCCAGGAGAAGCCAGCGCAGGTGTGCGGGCGTGTGTATATTTCATAGTTTGCAGGTTGG----------------------------CAAACAATTCCTGCTTTGCAGGCCAAGA---GGAAACTGAAGGTGACCCCGTGAGTGCTTAC---ACAAGAGAAAACAAG-------ACAA-TTTTTGGTTGACCAAATTCAGAA---CTTTATTTGAGGATGC---TAAAGTTTAAATTTCTTTT-------------------------------------
+>mm7
+CTCCACTCTCGTTTGCTGTT----------------CTGTCACCATGGAAACAAACG-AGGGTGGTCCAGTTACTATCTTG---ACTGCAGCTGG----CAGTCAGTTGCCACT--CAGGAATAAGGCTATGCCATT-GATCCACTGAACCGTGATCTGGAAACCTGGCTGTTGTTT-------CAAGCCTTGGGGCCAGTTTGCGGTGTTACTCATGA--CTCTAAGATCGTGTGCTTG----CTGCAGGAAGAGACAGCAAGGGGGTTACATTTAAAAAGCCCCC-AGTTTAGCTATAGGCAGGCCAACAGGTGTAAAAATACTCACTAGTAATGGGCTGAACTCATGGAGGTAGCATTAGTGAGACACTGTAACTGTTTTTTTAAAAATCACTAA----AACGTTTCATTGATTGCTCATCATTTAAAAAAAGAAATTCCTCAGTGGAAGAGG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------caaaaaccaa------------------------------------------------------------------------------------------------aaaaACCTATAGC-CTC-ACAGGGTGGGTTGTCTTTG-AGGAACATGCATCCGCTAGAAAGTCCCAAGTACACTATGACAGTTG--CCCAGGCCCCGCCTTAAACCTGGTTTTCCTGGTTTCTTTCA---CATCATTACCACGAATATATTTCCTCAAGTTTAAAGAAAGTACCCCCTCCTTTCCAGT-GCCTCAAATCTAGAAGAATATTCATAGTGAAGT-GC------------------------ACAGCCGGGTGGTGCATGGTA-ATCTGGAAGTCACCTCTGCAAATCTT-TCC----------------TGTTGGTGCTGTGAAGGCACCAGGACTTCAAGAGTAAATGGCAGAGGGCTCTGTTCTCT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+>canFam2
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------gagatattt-gggggaatttgaatgtagtgttgctcttttgtgatgctaagaaattataattgtctgatgatagtctcgtggttatgggggaaatgcttcctta-tttttagacacaaactgaagtattta---aggatgaaatgtcatgatgtttgcaattggctttaaaatattttagccaaaa-----------agtaaa-------------------------tgaagc--AAATATG--GGAAGACAATAATCATTAAATCTAGGT-----GATGCATAC---------------------------TTTTCCATATGTTTGAAATTTTCATTTA--------------------------AAAA-ACATCAtgctagatcctggactatgagctg-ggtatatagcagtgaacaa---------------------------------------------------------------------------------------------aacagagccctgcagt-cttgatggagcacacaacctttg-gggaaCATGTTTCCATAAGAAAGTCTCCAATGTGATCTGA-TGGTGCCGCCAGGACCTA-TGTCAGCCTACCGTTCCATGTCCCCTCCACACCATCATCACTGCAGGTGTGTTTTCCCACATTTTAAA------TATCTGC-TTCCCGGTGGCCTTGAGTCTAGAGGAGTCCCCCCACTATGGTGGCACTAATACTGAAGGTCAGAAATAATCAGTTCTGTGGTGCATGTTGCCCCTGAGGTTCTGTTCGGGAAACTTC-TTC-TGAGCAC----ATGCACCTGGCACTGCAAACGTACCAGGA-----------GGAGACTTG-ATGCCTGCCTTCC-TACAGCCTGTGGGCAGAGGTGGGAAGAGGTCACGCAAGCCAGTTGGAATGAGGGGAGTTGGCTGGAAAGGTGACCAGGACAAGCTACTTCAACCAGGAAG--AAGAGACCCCG-GTG----------------CTTGGAGAAGGCCTGATTGAGCAGTCCTGCATGCCCGCCCAC-GACTGGCAGGAATAAAGACCCAGAAGAGCTA------ACGTGC-AATGTA------TTTTCTAGTTCCAgggttggcaaactttctctct-aagggtgggatgataaacattttaggcttttcagaccaaga---ggcgacatcagag-ggtatgtaggt---------acaagagggaaaagttgcccccggaa-ttttttg--gataaaattcaaaa---ctttacttagggatgc---caaaatgtaaacttcatataatggtcatgt--ccataactattcttcttttatttt
+>bosTau2
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------agacattg-ggtaaaattcaaatgcagactagctc----atgatgttaaagaattactcttgtgtggtaatggtcttgtgatagagatagaaatgcttcctta-tttttagataaacacttaagtattta---aggatgaaacgccctgatgtttgtaatttgctttagaatattttagccaaaa----------gaattaa-------------------------tgatgc--aaatatg--caaaaagagta--cgttaaacctaa-----------------------------------------------------atttgCGATTttcattta--------------------------aaaa-tcttcgtgcaacgcacggggctatcaatgt-gggatacagatgtgaacaa---------------------------------------------------------------------------------------------aacggacccgtgtgggactcggcggagcacacagattttgcgggagCACGTTCCCGTTAGGAAGTCTCTGATGCAATACGACCGGTGCCTTCAGGACCTG-TG--AGGCTGACTTTCCTTA-CCCCTCCACACCATCATCAAGGCAGGTGTGATTTTCCAGG-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+>dasNov1
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAGACATTT-GGAGAAATTTG-----------Aatt--tcatgatgttaaggaattacttttgtatgatgatggtcttgtggctat-gtagaatttcttccgtg-ttttaagacgcatgctgaagcatgta---aggataaaatgtcgtggtgtttgtaatttattctaaaacattttagccaaaaacaaataaataaataaa-------------------------tgaagc--aaatatgggggaaatgtttaattgttaaatctagatttaacacggtatataccgtgcttcattatactagtctctacttttccatgtgtttgaaattttCATTAAAATGTTTGTTTGTTGTCTGTTTTAATGAAATCCTTTGTGCTAGCCACTGGGATGAAAGCTAGGGAACACAGCAGTGAGCAA-----------------------------------------------------------------------------------------------CAGCCTGGCTCCGT-CC--GGGGGCCGCTCAGCAGCTC-GGGAGCGTGGAGACG---GGAAGTCTGTCACGCGATGCG-----------CTGGGCCCG------------CTGTTCCCGCCCCCCTCC---CCCC----------------TTTCCCAAGTTTTAAA------AATTTACCTTCCCAGTGGCGGTGAATCCGGAGGAATACGGAAACTGGGGC-GCACTACCATGACACGTGTCAAA-AATCAGTTCCGTGGTCCGTGGAGGGCCTGGGGTTC------GAAAATCTTGTCC-CGAGCACCCCCGTGCGCCTGGCACCGCGACAGTGACAGGACTGAAGCGTG-GACGGCCAG-ACCTCTGCCCTCGGCTAAATCTCGCGGAGAAGGTGGAACA-GGTTACCCAAACCCGACCGAG-GAGGCGAGTTG---GAAACGGCGACTGGGACAAGCTCCCTCA---GAGACGGAGAGAGACCCCA-GTGGAAGGGGGGAGAGGCTCTTAGGGAAACGATGGGGGGACCCGCCCGCACCCGCACAGAGGCGCTGGCAGGCACAGCGGCCCCGAGGAGCCC------AGGAGC-AGGGC-TGTGT-TCCCCTGCATcaggggttggcaaactttttctgcaaagggccagatagtaaatattttaggctttgcaaaccaagaagtagaaagggaggcc-attatgtacgtatttatatagcaagagagaacattt---cccacaatttttttattgacagaatttaaaacttctttattgatgaacaccaaagaaacttgaatttcatataattttcacatatcacgaagtatttttttttt-----
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils/maf_utilities.py	Mon May 19 10:59:37 2014 -0400
@@ -0,0 +1,601 @@
+#!/usr/bin/env python
+"""
+Provides wrappers and utilities for working with MAF files and alignments.
+"""
+#Dan Blankenberg
+import pkg_resources; pkg_resources.require( "bx-python" )
+import bx.align.maf
+import bx.intervals
+import bx.interval_index_file
+import sys, os, string, tempfile
+import logging
+from copy import deepcopy
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+log = logging.getLogger(__name__)
+
+
+GAP_CHARS = [ '-' ]
+SRC_SPLIT_CHAR = '.'
+
+def src_split( src ):
+    fields = src.split( SRC_SPLIT_CHAR, 1 )
+    spec = fields.pop( 0 )
+    if fields:
+        chrom = fields.pop( 0 )
+    else:
+        chrom = spec
+    return spec, chrom
+
+def src_merge( spec, chrom, contig = None ):
+    if None in [ spec, chrom ]:
+        spec = chrom = spec or chrom
+    return bx.align.maf.src_merge( spec, chrom, contig )
+
+def get_species_in_block( block ):
+    species = []
+    for c in block.components:
+        spec, chrom = src_split( c.src )
+        if spec not in species:
+            species.append( spec )
+    return species
+
+def tool_fail( msg = "Unknown Error" ):
+    print >> sys.stderr, "Fatal Error: %s" % msg
+    sys.exit()
+
+#an object corresponding to a reference layered alignment
+class RegionAlignment( object ):
+
+    DNA_COMPLEMENT = string.maketrans( "ACGTacgt", "TGCAtgca" )
+    MAX_SEQUENCE_SIZE = sys.maxint #Maximum length of sequence allowed
+
+    def __init__( self, size, species = [] ):
+        assert size <= self.MAX_SEQUENCE_SIZE, "Maximum length allowed for an individual sequence has been exceeded (%i > %i)." % ( size, self.MAX_SEQUENCE_SIZE )
+        self.size = size
+        self.sequences = {}
+        if not isinstance( species, list ):
+            species = [species]
+        for spec in species:
+            self.add_species( spec )
+
+    #add a species to the alignment
+    def add_species( self, species ):
+        #make temporary sequence files
+        self.sequences[species] = tempfile.TemporaryFile()
+        self.sequences[species].write( "-" * self.size )
+
+    #returns the names for species found in alignment, skipping names as requested
+    def get_species_names( self, skip = [] ):
+        if not isinstance( skip, list ): skip = [skip]
+        names = self.sequences.keys()
+        for name in skip:
+            try: names.remove( name )
+            except: pass
+        return names
+
+    #returns the sequence for a species
+    def get_sequence( self, species ):
+        self.sequences[species].seek( 0 )
+        return self.sequences[species].read()
+
+    #returns the reverse complement of the sequence for a species
+    def get_sequence_reverse_complement( self, species ):
+        complement = [base for base in self.get_sequence( species ).translate( self.DNA_COMPLEMENT )]
+        complement.reverse()
+        return "".join( complement )
+
+    #sets a position for a species
+    def set_position( self, index, species, base ):
+        if len( base ) != 1: raise Exception( "A genomic position can only have a length of 1." )
+        return self.set_range( index, species, base )
+    #sets a range for a species
+    def set_range( self, index, species, bases ):
+        if index >= self.size or index < 0: raise Exception( "Your index (%i) is out of range (0 - %i)." % ( index, self.size - 1 ) )
+        if len( bases ) == 0: raise Exception( "A set of genomic positions can only have a positive length." )
+        if species not in self.sequences.keys(): self.add_species( species )
+        self.sequences[species].seek( index )
+        self.sequences[species].write( bases )
+
+    #Flush temp file of specified species, or all species
+    def flush( self, species = None ):
+        if species is None:
+            species = self.sequences.keys()
+        elif not isinstance( species, list ):
+            species = [species]
+        for spec in species:
+            self.sequences[spec].flush()
+
+class GenomicRegionAlignment( RegionAlignment ):
+
+    def __init__( self, start, end, species = [] ):
+        RegionAlignment.__init__( self, end - start, species )
+        self.start = start
+        self.end = end
+
+class SplicedAlignment( object ):
+
+    DNA_COMPLEMENT = string.maketrans( "ACGTacgt", "TGCAtgca" )
+
+    def __init__( self, exon_starts, exon_ends, species = [] ):
+        if not isinstance( exon_starts, list ):
+            exon_starts = [exon_starts]
+        if not isinstance( exon_ends, list ):
+            exon_ends = [exon_ends]
+        assert len( exon_starts ) == len( exon_ends ), "The number of starts does not match the number of sizes."
+        self.exons = []
+        for i in range( len( exon_starts ) ):
+            self.exons.append( GenomicRegionAlignment( exon_starts[i], exon_ends[i], species ) )
+
+    #returns the names for species found in alignment, skipping names as requested
+    def get_species_names( self, skip = [] ):
+        if not isinstance( skip, list ): skip = [skip]
+        names = []
+        for exon in self.exons:
+            for name in exon.get_species_names( skip = skip ):
+                if name not in names:
+                    names.append( name )
+        return names
+
+    #returns the sequence for a species
+    def get_sequence( self, species ):
+        sequence = tempfile.TemporaryFile()
+        for exon in self.exons:
+            if species in exon.get_species_names():
+                sequence.write( exon.get_sequence( species ) )
+            else:
+                sequence.write( "-" * exon.size )
+        sequence.seek( 0 )
+        return sequence.read()
+
+    #returns the reverse complement of the sequence for a species
+    def get_sequence_reverse_complement( self, species ):
+        complement = [base for base in self.get_sequence( species ).translate( self.DNA_COMPLEMENT )]
+        complement.reverse()
+        return "".join( complement )
+
+    #Start and end of coding region
+    @property
+    def start( self ):
+        return self.exons[0].start
+    @property
+    def end( self ):
+        return self.exons[-1].end
+
+#Open a MAF index using a UID
+def maf_index_by_uid( maf_uid, index_location_file ):
+    for line in open( index_location_file ):
+        try:
+            #read each line, if not enough fields, go to next line
+            if line[0:1] == "#" : continue
+            fields = line.split('\t')
+            if maf_uid == fields[1]:
+                try:
+                    maf_files = fields[4].replace( "\n", "" ).replace( "\r", "" ).split( "," )
+                    return bx.align.maf.MultiIndexed( maf_files, keep_open = True, parse_e_rows = False )
+                except Exception, e:
+                    raise Exception( 'MAF UID (%s) found, but configuration appears to be malformed: %s' % ( maf_uid, e ) )
+        except:
+            pass
+    return None
+
+#return ( index, temp_index_filename ) for user maf, if available, or build one and return it, return None when no tempfile is created
+def open_or_build_maf_index( maf_file, index_filename, species = None ):
+    try:
+        return ( bx.align.maf.Indexed( maf_file, index_filename = index_filename, keep_open = True, parse_e_rows = False ), None )
+    except:
+        return build_maf_index( maf_file, species = species )
+
+def build_maf_index_species_chromosomes( filename, index_species = None ):
+    species = []
+    species_chromosomes = {}
+    indexes = bx.interval_index_file.Indexes()
+    blocks = 0
+    try:
+        maf_reader = bx.align.maf.Reader( open( filename ) )
+        while True:
+            pos = maf_reader.file.tell()
+            block = maf_reader.next()
+            if block is None:
+                break
+            blocks += 1
+            for c in block.components:
+                spec = c.src
+                chrom = None
+                if "." in spec:
+                    spec, chrom = spec.split( ".", 1 )
+                if spec not in species:
+                    species.append( spec )
+                    species_chromosomes[spec] = []
+                if chrom and chrom not in species_chromosomes[spec]:
+                    species_chromosomes[spec].append( chrom )
+                if index_species is None or spec in index_species:
+                    forward_strand_start = c.forward_strand_start
+                    forward_strand_end = c.forward_strand_end
+                    try:
+                        forward_strand_start = int( forward_strand_start )
+                        forward_strand_end = int( forward_strand_end )
+                    except ValueError:
+                        continue #start and end are not integers, can't add component to index, goto next component
+                        #this likely only occurs when parse_e_rows is True?
+                        #could a species exist as only e rows? should the
+                    if forward_strand_end > forward_strand_start:
+                        #require positive length; i.e. certain lines have start = end = 0 and cannot be indexed
+                        indexes.add( c.src, forward_strand_start, forward_strand_end, pos, max=c.src_size )
+    except Exception, e:
+        #most likely a bad MAF
+        log.debug( 'Building MAF index on %s failed: %s' % ( filename, e ) )
+        return ( None, [], {}, 0 )
+    return ( indexes, species, species_chromosomes, blocks )
+
+#builds and returns ( index, index_filename ) for specified maf_file
+def build_maf_index( maf_file, species = None ):
+    indexes, found_species, species_chromosomes, blocks = build_maf_index_species_chromosomes( maf_file, species )
+    if indexes is not None:
+        fd, index_filename = tempfile.mkstemp()
+        out = os.fdopen( fd, 'w' )
+        indexes.write( out )
+        out.close()
+        return ( bx.align.maf.Indexed( maf_file, index_filename = index_filename, keep_open = True, parse_e_rows = False ), index_filename )
+    return ( None, None )
+
+def component_overlaps_region( c, region ):
+    if c is None: return False
+    start, end = c.get_forward_strand_start(), c.get_forward_strand_end()
+    if region.start >= end or region.end <= start:
+        return False
+    return True
+
+def chop_block_by_region( block, src, region, species = None, mincols = 0 ):
+    # This chopping method was designed to maintain consistency with how start/end padding gaps have been working in Galaxy thus far:
+    #   behavior as seen when forcing blocks to be '+' relative to src sequence (ref) and using block.slice_by_component( ref, slice_start, slice_end )
+    #   whether-or-not this is the 'correct' behavior is questionable, but this will at least maintain consistency
+    # comments welcome
+    slice_start = block.text_size #max for the min()
+    slice_end = 0 #min for the max()
+    old_score = block.score #save old score for later use
+    # We no longer assume only one occurance of src per block, so we need to check them all
+    for c in iter_components_by_src( block, src ):
+        if component_overlaps_region( c, region ):
+            if c.text is not None:
+                rev_strand = False
+                if c.strand == "-":
+                    #We want our coord_to_col coordinates to be returned from positive stranded component
+                    rev_strand = True
+                    c = c.reverse_complement()
+                start = max( region.start, c.start )
+                end = min( region.end, c.end )
+                start = c.coord_to_col( start )
+                end = c.coord_to_col( end )
+                if rev_strand:
+                    #need to orient slice coordinates to the original block direction
+                    slice_len = end - start
+                    end = len( c.text ) - start
+                    start = end - slice_len
+                slice_start = min( start, slice_start )
+                slice_end = max( end, slice_end )
+
+    if slice_start < slice_end:
+        block = block.slice( slice_start, slice_end )
+        if block.text_size > mincols:
+            # restore old score, may not be accurate, but it is better than 0 for everything?
+            block.score = old_score
+            if species is not None:
+                block = block.limit_to_species( species )
+                block.remove_all_gap_columns()
+            return block
+    return None
+
+def orient_block_by_region( block, src, region, force_strand = None ):
+    #loop through components matching src,
+    #make sure each of these components overlap region
+    #cache strand for each of overlaping regions
+    #if force_strand / region.strand not in strand cache, reverse complement
+    ### we could have 2 sequences with same src, overlapping region, on different strands, this would cause no reverse_complementing
+    strands = [ c.strand for c in iter_components_by_src( block, src ) if component_overlaps_region( c, region ) ]
+    if strands and ( force_strand is None and region.strand not in strands ) or ( force_strand is not None and force_strand not in strands ):
+        block = block.reverse_complement()
+    return block
+
+def get_oriented_chopped_blocks_for_region( index, src, region, species = None, mincols = 0, force_strand = None ):
+    for block, idx, offset in get_oriented_chopped_blocks_with_index_offset_for_region( index, src, region, species, mincols, force_strand ):
+        yield block
+def get_oriented_chopped_blocks_with_index_offset_for_region( index, src, region, species = None, mincols = 0, force_strand = None ):
+    for block, idx, offset in get_chopped_blocks_with_index_offset_for_region( index, src, region, species, mincols ):
+        yield orient_block_by_region( block, src, region, force_strand ), idx, offset
+
+#split a block with multiple occurances of src into one block per src
+def iter_blocks_split_by_src( block, src ):
+    for src_c in iter_components_by_src( block, src ):
+        new_block = bx.align.Alignment( score=block.score, attributes=deepcopy( block.attributes ) )
+        new_block.text_size = block.text_size
+        for c in block.components:
+            if c == src_c or c.src != src:
+                new_block.add_component( deepcopy( c ) ) #components have reference to alignment, dont want to loose reference to original alignment block in original components
+        yield new_block
+
+#split a block into multiple blocks with all combinations of a species appearing only once per block
+def iter_blocks_split_by_species( block, species = None ):
+    def __split_components_by_species( components_by_species, new_block ):
+        if components_by_species:
+            #more species with components to add to this block
+            components_by_species = deepcopy( components_by_species )
+            spec_comps = components_by_species.pop( 0 )
+            for c in spec_comps:
+                newer_block = deepcopy( new_block )
+                newer_block.add_component( deepcopy( c ) )
+                for value in __split_components_by_species( components_by_species, newer_block ):
+                    yield value
+        else:
+            #no more components to add, yield this block
+            yield new_block
+
+    #divide components by species
+    spec_dict = {}
+    if not species:
+        species = []
+        for c in block.components:
+            spec, chrom = src_split( c.src )
+            if spec not in spec_dict:
+                spec_dict[ spec ] = []
+                species.append( spec )
+            spec_dict[ spec ].append( c )
+    else:
+        for spec in species:
+            spec_dict[ spec ] = []
+            for c in iter_components_by_src_start( block, spec ):
+                spec_dict[ spec ].append( c )
+
+    empty_block = bx.align.Alignment( score=block.score, attributes=deepcopy( block.attributes ) ) #should we copy attributes?
+    empty_block.text_size = block.text_size
+    #call recursive function to split into each combo of spec/blocks
+    for value in __split_components_by_species( spec_dict.values(), empty_block ):
+        sort_block_components_by_block( value, block ) #restore original component order
+        yield value
+
+
+#generator yielding only chopped and valid blocks for a specified region
+def get_chopped_blocks_for_region( index, src, region, species = None, mincols = 0 ):
+    for block, idx, offset in get_chopped_blocks_with_index_offset_for_region( index, src, region, species, mincols ):
+        yield block
+def get_chopped_blocks_with_index_offset_for_region( index, src, region, species = None, mincols = 0 ):
+    for block, idx, offset in index.get_as_iterator_with_index_and_offset( src, region.start, region.end ):
+        block = chop_block_by_region( block, src, region, species, mincols )
+        if block is not None:
+            yield block, idx, offset
+
+#returns a filled region alignment for specified regions
+def get_region_alignment( index, primary_species, chrom, start, end, strand = '+', species = None, mincols = 0, overwrite_with_gaps = True ):
+    if species is not None: alignment = RegionAlignment( end - start, species )
+    else: alignment = RegionAlignment( end - start, primary_species )
+    return fill_region_alignment( alignment, index, primary_species, chrom, start, end, strand, species, mincols, overwrite_with_gaps )
+
+#reduces a block to only positions exisiting in the src provided
+def reduce_block_by_primary_genome( block, species, chromosome, region_start ):
+    #returns ( startIndex, {species:texts}
+    #where texts' contents are reduced to only positions existing in the primary genome
+    src = "%s.%s" % ( species, chromosome )
+    ref = block.get_component_by_src( src )
+    start_offset = ref.start - region_start
+    species_texts = {}
+    for c in block.components:
+        species_texts[ c.src.split( '.' )[0] ] = list( c.text )
+    #remove locations which are gaps in the primary species, starting from the downstream end
+    for i in range( len( species_texts[ species ] ) - 1, -1, -1 ):
+        if species_texts[ species ][i] == '-':
+            for text in species_texts.values():
+                text.pop( i )
+    for spec, text in species_texts.items():
+        species_texts[spec] = ''.join( text )
+    return ( start_offset, species_texts )
+
+#fills a region alignment
+def fill_region_alignment( alignment, index, primary_species, chrom, start, end, strand = '+', species = None, mincols = 0, overwrite_with_gaps = True ):
+    region = bx.intervals.Interval( start, end )
+    region.chrom = chrom
+    region.strand = strand
+    primary_src = "%s.%s" % ( primary_species, chrom )
+
+    #Order blocks overlaping this position by score, lowest first
+    blocks = []
+    for block, idx, offset in index.get_as_iterator_with_index_and_offset( primary_src, start, end ):
+        score = float( block.score )
+        for i in range( 0, len( blocks ) ):
+            if score < blocks[i][0]:
+                blocks.insert( i, ( score, idx, offset ) )
+                break
+        else:
+            blocks.append( ( score, idx, offset ) )
+
+    #gap_chars_tuple = tuple( GAP_CHARS )
+    gap_chars_str = ''.join( GAP_CHARS )
+    #Loop through ordered blocks and layer by increasing score
+    for block_dict in blocks:
+        for block in iter_blocks_split_by_species( block_dict[1].get_at_offset( block_dict[2] ) ): #need to handle each occurance of sequence in block seperately
+            if component_overlaps_region( block.get_component_by_src( primary_src ), region ):
+                block = chop_block_by_region( block, primary_src, region, species, mincols ) #chop block
+                block = orient_block_by_region( block, primary_src, region ) #orient block
+                start_offset, species_texts = reduce_block_by_primary_genome( block, primary_species, chrom, start )
+                for spec, text in species_texts.items():
+                    #we should trim gaps from both sides, since these are not positions in this species genome (sequence)
+                    text = text.rstrip( gap_chars_str )
+                    gap_offset = 0
+                    while True in [ text.startswith( gap_char ) for gap_char in GAP_CHARS ]: #python2.4 doesn't accept a tuple for .startswith()
+                    #while text.startswith( gap_chars_tuple ):
+                        gap_offset += 1
+                        text = text[1:]
+                        if not text:
+                            break
+                    if text:
+                        if overwrite_with_gaps:
+                            alignment.set_range( start_offset + gap_offset, spec, text )
+                        else:
+                            for i, char in enumerate( text ):
+                                if char not in GAP_CHARS:
+                                    alignment.set_position( start_offset + gap_offset + i, spec, char )
+    return alignment
+
+#returns a filled spliced region alignment for specified region with start and end lists
+def get_spliced_region_alignment( index, primary_species, chrom, starts, ends, strand = '+', species = None, mincols = 0, overwrite_with_gaps = True ):
+    #create spliced alignment object
+    if species is not None: alignment = SplicedAlignment( starts, ends, species )
+    else: alignment = SplicedAlignment( starts, ends, [primary_species] )
+    for exon in alignment.exons:
+        fill_region_alignment( exon, index, primary_species, chrom, exon.start, exon.end, strand, species, mincols, overwrite_with_gaps )
+    return alignment
+
+#loop through string array, only return non-commented lines
+def line_enumerator( lines, comment_start = '#' ):
+    i = 0
+    for line in lines:
+        if not line.startswith( comment_start ):
+            i += 1
+            yield ( i, line )
+
+#read a GeneBed file, return list of starts, ends, raw fields
+def get_starts_ends_fields_from_gene_bed( line ):
+    #Starts and ends for exons
+    starts = []
+    ends = []
+
+    fields = line.split()
+    #Requires atleast 12 BED columns
+    if len(fields) < 12:
+        raise Exception( "Not a proper 12 column BED line (%s)." % line )
+    chrom     = fields[0]
+    tx_start  = int( fields[1] )
+    tx_end    = int( fields[2] )
+    name      = fields[3]
+    strand    = fields[5]
+    if strand != '-': strand='+' #Default strand is +
+    cds_start = int( fields[6] )
+    cds_end   = int( fields[7] )
+
+    #Calculate and store starts and ends of coding exons
+    region_start, region_end = cds_start, cds_end
+    exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) )
+    exon_starts = map( ( lambda x: x + tx_start ), exon_starts )
+    exon_ends = map( int, fields[10].rstrip( ',' ).split( ',' ) )
+    exon_ends = map( ( lambda x, y: x + y ), exon_starts, exon_ends );
+    for start, end in zip( exon_starts, exon_ends ):
+        start = max( start, region_start )
+        end = min( end, region_end )
+        if start < end:
+            starts.append( start )
+            ends.append( end )
+    return ( starts, ends, fields )
+
+def iter_components_by_src( block, src ):
+    for c in block.components:
+        if c.src == src:
+            yield c
+
+def get_components_by_src( block, src ):
+    return [ value for value in iter_components_by_src( block, src ) ]
+
+def iter_components_by_src_start( block, src ):
+    for c in block.components:
+        if c.src.startswith( src ):
+            yield c
+
+def get_components_by_src_start( block, src ):
+    return [ value for value in iter_components_by_src_start( block, src ) ]
+
+def sort_block_components_by_block( block1, block2 ):
+    #orders the components in block1 by the index of the component in block2
+    #block1 must be a subset of block2
+    #occurs in-place
+    return block1.components.sort( cmp = lambda x, y: block2.components.index( x ) - block2.components.index( y ) )
+
+def get_species_in_maf( maf_filename ):
+    species = []
+    for block in bx.align.maf.Reader( open( maf_filename ) ):
+        for spec in get_species_in_block( block ):
+            if spec not in species:
+                species.append( spec )
+    return species
+
+def parse_species_option( species ):
+    if species:
+        species = species.split( ',' )
+        if 'None' not in species:
+            return species
+    return None #provided species was '', None, or had 'None' in it
+
+def remove_temp_index_file( index_filename ):
+    try: os.unlink( index_filename )
+    except: pass
+
+#Below are methods to deal with FASTA files
+
+def get_fasta_header( component, attributes = {}, suffix = None ):
+    header = ">%s(%s):%i-%i|" % ( component.src, component.strand, component.get_forward_strand_start(), component.get_forward_strand_end() )
+    for key, value in attributes.iteritems():
+        header = "%s%s=%s|" % ( header, key, value )
+    if suffix:
+        header = "%s%s" % ( header, suffix )
+    else:
+        header = "%s%s" % ( header, src_split( component.src )[ 0 ] )
+    return header
+
+def get_attributes_from_fasta_header( header ):
+    if not header: return {}
+    attributes = {}
+    header = header.lstrip( '>' )
+    header = header.strip()
+    fields = header.split( '|' )
+    try:
+        region = fields[0]
+        region = region.split( '(', 1 )
+        temp = region[0].split( '.', 1 )
+        attributes['species'] = temp[0]
+        if len( temp ) == 2:
+            attributes['chrom'] = temp[1]
+        else:
+            attributes['chrom'] = temp[0]
+        region = region[1].split( ')', 1 )
+        attributes['strand'] = region[0]
+        region = region[1].lstrip( ':' ).split( '-' )
+        attributes['start'] = int( region[0] )
+        attributes['end'] = int( region[1] )
+    except:
+        #fields 0 is not a region coordinate
+        pass
+    if len( fields ) > 2:
+        for i in xrange( 1, len( fields ) - 1 ):
+            prop = fields[i].split( '=', 1 )
+            if len( prop ) == 2:
+                attributes[ prop[0] ] = prop[1]
+    if len( fields ) > 1:
+        attributes['__suffix__'] = fields[-1]
+    return attributes
+
+def iter_fasta_alignment( filename ):
+    class fastaComponent:
+        def __init__( self, species, text = "" ):
+            self.species = species
+            self.text = text
+        def extend( self, text ):
+            self.text = self.text + text.replace( '\n', '' ).replace( '\r', '' ).strip()
+    #yields a list of fastaComponents for a FASTA file
+    f = open( filename, 'rb' )
+    components = []
+    #cur_component = None
+    while True:
+        line = f.readline()
+        if not line:
+            if components:
+                yield components
+            return
+        line = line.strip()
+        if not line:
+            if components:
+                yield components
+            components = []
+        elif line.startswith( '>' ):
+            attributes = get_attributes_from_fasta_header( line )
+            components.append( fastaComponent( attributes['species'] ) )
+        elif components:
+            components[-1].extend( line )
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils/odict.py	Mon May 19 10:59:37 2014 -0400
@@ -0,0 +1,85 @@
+"""
+Ordered dictionary implementation.
+"""
+
+from UserDict import UserDict
+
+class odict(UserDict):
+    """
+    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747
+
+    This dictionary class extends UserDict to record the order in which items are
+    added. Calling keys(), values(), items(), etc. will return results in this
+    order.
+    """
+    def __init__( self, dict = None ):
+        self._keys = []
+        UserDict.__init__( self, dict )
+
+    def __delitem__( self, key ):
+        UserDict.__delitem__( self, key )
+        self._keys.remove( key )
+
+    def __setitem__( self, key, item ):
+        UserDict.__setitem__( self, key, item )
+        if key not in self._keys:
+            self._keys.append( key )
+
+    def clear( self ):
+        UserDict.clear( self )
+        self._keys = []
+
+    def copy(self):
+        new = odict()
+        new.update( self )
+        return new
+
+    def items( self ):
+        return zip( self._keys, self.values() )
+
+    def keys( self ):
+        return self._keys[:]
+
+    def popitem( self ):
+        try:
+            key = self._keys[-1]
+        except IndexError:
+            raise KeyError( 'dictionary is empty' )
+        val = self[ key ]
+        del self[ key ]
+        return ( key, val )
+
+    def setdefault( self, key, failobj=None ):
+        if key not in self._keys:
+            self._keys.append( key )
+        return UserDict.setdefault( self, key, failobj )
+
+    def update( self, dict ):
+        for ( key, val ) in dict.items():
+            self.__setitem__( key, val )
+
+    def values( self ):
+        return map( self.get, self._keys )
+
+    def iterkeys( self ):
+        return iter( self._keys )
+
+    def itervalues( self ):
+        for key in self._keys:
+            yield self.get( key )
+
+    def iteritems( self ):
+        for key in self._keys:
+            yield key, self.get( key )
+
+    def __iter__( self ):
+        for key in self._keys:
+            yield key
+
+    def reverse( self ):
+        self._keys.reverse()
+
+    def insert( self, index, key, item ):
+        if key not in self._keys:
+            self._keys.insert( index, key )
+            UserDict.__setitem__( self, key, item )