# HG changeset patch # User yhoogstrate # Date 1394016175 18000 # Node ID 9a2a88d1dd4a670b6e4e17f075c49e9f3d938dfa # Parent 48c78adade0340f55b482bf50a598cc025715a92 Uploaded diff -r 48c78adade03 -r 9a2a88d1dd4a test-data/generate_reads.py --- a/test-data/generate_reads.py Wed Mar 05 05:42:06 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,118 +0,0 @@ -#!/usr/bin/env python - - -import random -import math - - -class Region: - def __init__(self,start,stop,sequence): - self.start = start - self.stop = stop - self.sequence = sequence.strip().replace("\n","").replace(" ","") - if(len(self.sequence) != self.getSpanningLength()): - print "ERROR: sequence length: "+str(len(self.sequence))+", while spanning region is: "+str(self.getSpanningLength()) - import sys - sys.exit() - - def getSpanningLength(self): - return abs(self.stop-self.start+1) - -class ReadSynthesizer: - def __init__(self,chromosome): - self.regions = [] - self.chromosome = chromosome - - def addRegion(self,region): - self.regions.append(region) - - def produceReads(self,readDensity = 1,read_length = 50): - """ - Produces uniform reads by walking iteratively over self.regions - """ - - mRNA = self.getTotalmRNA() - spanning_length = self.getRegionSpanningLength() - n = spanning_length['total'] - read_length + 1 - - j = 0 - k = 0 - - for i in range(n): - # "alpha is playing the role of k and beta is playing the role of theta" - dd = max(0,int(round(random.lognormvariate(math.log(readDensity),0.5))))# Notice this is NOT a binomial distribution!! - - for d in range(dd): - sequence = mRNA[i:i+read_length] - - if(random.randint(0,1) == 0): - strand = 0 - else: - strand = 16 - flag = strand + 0 - - print "read_"+str(j)+"."+str(i)+"."+str(d)+"\t"+str(flag)+"\t"+self.chromosome+"\t"+str(self.regions[j].start + k)+"\t60\t"+self.getMappingString(read_length,j,k)+"\t*\t0\t0\t"+str(sequence.upper())+"\t*" - - spanning_length['iter'][j] -= 1 - if(k >= self.regions[j].getSpanningLength()-1): - j += 1 - k = 0 - else: - k += 1 - - def getMappingString(self,length,j,offset): - m = 0 - - out = "" - - for i in range(length): - k = i + offset - - if(k >= self.regions[j].getSpanningLength()): - j += 1 - - out += str(m)+"M" - out += (str(self.regions[j].start - self.regions[j-1].stop-1))+"N" - m = 1 - - offset = -k - else: - m += 1 - - out += str(m) + "M" - - - return out - - def getRegionSpanningLength(self): - length = {'total':0,'iter':[]} - for r in self.regions: - l = r.getSpanningLength() - length['iter'].append(l) - length['total'] += l - return length - - def getTotalmRNA(self): - mRNA = "" - for r in self.regions: - mRNA += r.sequence - return mRNA - -#rs = ReadSynthesizer('chr6') -#rs.addRegion(Region(100,149,'ccaggactggtttctgtaagaaacagcaggagctgtggcagcggcgaaag')) -#rs.addRegion(Region(151,152,'at')) -#rs.produceReads(3,50) - - -rs = ReadSynthesizer('chr6') -rs.addRegion(Region(154360546,154360969,'ccaggactggtttctgtaagaaacagcaggagctgtggcagcggcgaaaggaagcggctgaggcgcttggaacccgaaaagtctcggtgctcctggctacctcgcacagcggtgcccgcccggccgtcagtaccatggacagcagcgctgcccccacgaacgccagcaattgcactgatgccttggcgtactcaagttgctccccagcacccagccccggttcctgggtcaacttgtcccacttagatggcGacctgtccgacccatgcggtccgaaccgcaccgacctgggcgggagagacagcctgtgccctccgaccggcagtccctccatgatcacggccatcacgatcatggccctctactccatcgtgtgcgtggtggggctcttcggaaacttcctggtcatgtatgtgattgtcag')) -rs.addRegion(Region(154410961,154411313,'atacaccaagatgaagactgccaccaacatctacattttcaaccttgctctggcagatgccttagccaccagtaccctgcccttccagagtgtgaattacctaatgggaacatggccatttggaaccatcctttgcaagatagtgatctccatagattactataacatgttcaccagcatattcaccctctgcaccatgagtgttgatcgatacattgcagtctgccaccctgtcaaggccttagatttccgtactccccgaaatgccaaaattatcaatgtctgcaactggatcctctcttcagccattggtcttcctgtaatgttcatggctacaacaaaatacaggcaag')) -rs.addRegion(Region(154412087,154412607,'gttccatagattgtacactaacattctctcatccaacctggtactgggaaaacctgctgaagatctgtgttttcatcttcgccttcattatgccagtgctcatcattaccgtgtgctatggactgatgatcttgcgcctcaagagtgtccgcatgctctctggctccaaagaaaaggacaggaatcttcgaaggatcaccaggatggtgctggtggtggtggctgtgttcatcgtctgctggactcccattcacatttacgtcatcattaaagccttggttacaatcccagaaactacgttccagactgtttcttggcacttctgcattgctctaggttacacaaacagctgcctcaacccagtcctttatgcatttctggatgaaaacttcaaacgatgcttcagagagttctgtatcccaacctcttccaacattgagcaacaaaactccactcgaattcgtcagaacactagagaccacccctccacggccaatacagtggatagaactaatcatcag')) -rs.addRegion(Region(154428600,154428787,'gtggaattgaacctggactgtcactgtgaaaatgcaaagccttggccactgagctacaatgcagggcagtctccatttcccttcccaggaagagtctagagcattaattttgagtttgcaaaggcttgtaactatttcatatgatttttagagctgactatgacatgaaccctaaaattcctgttccc')) -rs.produceReads(3,50) - - - - - - diff -r 48c78adade03 -r 9a2a88d1dd4a tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Wed Mar 05 05:42:06 2014 -0500 +++ b/tool_data_table_conf.xml.sample Wed Mar 05 05:42:55 2014 -0500 @@ -2,7 +2,7 @@ - name, dbkey, display_name, value + value, dbkey, name, path
\ No newline at end of file diff -r 48c78adade03 -r 9a2a88d1dd4a varscan_mpileup2snp_from_bam.xml --- a/varscan_mpileup2snp_from_bam.xml Wed Mar 05 05:42:06 2014 -0500 +++ b/varscan_mpileup2snp_from_bam.xml Wed Mar 05 05:42:55 2014 -0500 @@ -119,10 +119,10 @@ - - + + - + @@ -130,10 +130,10 @@ - - + + - + diff -r 48c78adade03 -r 9a2a88d1dd4a x.tar.bz2