# HG changeset patch
# User yhoogstrate
# Date 1394016175 18000
# Node ID 9a2a88d1dd4a670b6e4e17f075c49e9f3d938dfa
# Parent 48c78adade0340f55b482bf50a598cc025715a92
Uploaded
diff -r 48c78adade03 -r 9a2a88d1dd4a test-data/generate_reads.py
--- a/test-data/generate_reads.py Wed Mar 05 05:42:06 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,118 +0,0 @@
-#!/usr/bin/env python
-
-
-import random
-import math
-
-
-class Region:
- def __init__(self,start,stop,sequence):
- self.start = start
- self.stop = stop
- self.sequence = sequence.strip().replace("\n","").replace(" ","")
- if(len(self.sequence) != self.getSpanningLength()):
- print "ERROR: sequence length: "+str(len(self.sequence))+", while spanning region is: "+str(self.getSpanningLength())
- import sys
- sys.exit()
-
- def getSpanningLength(self):
- return abs(self.stop-self.start+1)
-
-class ReadSynthesizer:
- def __init__(self,chromosome):
- self.regions = []
- self.chromosome = chromosome
-
- def addRegion(self,region):
- self.regions.append(region)
-
- def produceReads(self,readDensity = 1,read_length = 50):
- """
- Produces uniform reads by walking iteratively over self.regions
- """
-
- mRNA = self.getTotalmRNA()
- spanning_length = self.getRegionSpanningLength()
- n = spanning_length['total'] - read_length + 1
-
- j = 0
- k = 0
-
- for i in range(n):
- # "alpha is playing the role of k and beta is playing the role of theta"
- dd = max(0,int(round(random.lognormvariate(math.log(readDensity),0.5))))# Notice this is NOT a binomial distribution!!
-
- for d in range(dd):
- sequence = mRNA[i:i+read_length]
-
- if(random.randint(0,1) == 0):
- strand = 0
- else:
- strand = 16
- flag = strand + 0
-
- print "read_"+str(j)+"."+str(i)+"."+str(d)+"\t"+str(flag)+"\t"+self.chromosome+"\t"+str(self.regions[j].start + k)+"\t60\t"+self.getMappingString(read_length,j,k)+"\t*\t0\t0\t"+str(sequence.upper())+"\t*"
-
- spanning_length['iter'][j] -= 1
- if(k >= self.regions[j].getSpanningLength()-1):
- j += 1
- k = 0
- else:
- k += 1
-
- def getMappingString(self,length,j,offset):
- m = 0
-
- out = ""
-
- for i in range(length):
- k = i + offset
-
- if(k >= self.regions[j].getSpanningLength()):
- j += 1
-
- out += str(m)+"M"
- out += (str(self.regions[j].start - self.regions[j-1].stop-1))+"N"
- m = 1
-
- offset = -k
- else:
- m += 1
-
- out += str(m) + "M"
-
-
- return out
-
- def getRegionSpanningLength(self):
- length = {'total':0,'iter':[]}
- for r in self.regions:
- l = r.getSpanningLength()
- length['iter'].append(l)
- length['total'] += l
- return length
-
- def getTotalmRNA(self):
- mRNA = ""
- for r in self.regions:
- mRNA += r.sequence
- return mRNA
-
-#rs = ReadSynthesizer('chr6')
-#rs.addRegion(Region(100,149,'ccaggactggtttctgtaagaaacagcaggagctgtggcagcggcgaaag'))
-#rs.addRegion(Region(151,152,'at'))
-#rs.produceReads(3,50)
-
-
-rs = ReadSynthesizer('chr6')
-rs.addRegion(Region(154360546,154360969,'ccaggactggtttctgtaagaaacagcaggagctgtggcagcggcgaaaggaagcggctgaggcgcttggaacccgaaaagtctcggtgctcctggctacctcgcacagcggtgcccgcccggccgtcagtaccatggacagcagcgctgcccccacgaacgccagcaattgcactgatgccttggcgtactcaagttgctccccagcacccagccccggttcctgggtcaacttgtcccacttagatggcGacctgtccgacccatgcggtccgaaccgcaccgacctgggcgggagagacagcctgtgccctccgaccggcagtccctccatgatcacggccatcacgatcatggccctctactccatcgtgtgcgtggtggggctcttcggaaacttcctggtcatgtatgtgattgtcag'))
-rs.addRegion(Region(154410961,154411313,'atacaccaagatgaagactgccaccaacatctacattttcaaccttgctctggcagatgccttagccaccagtaccctgcccttccagagtgtgaattacctaatgggaacatggccatttggaaccatcctttgcaagatagtgatctccatagattactataacatgttcaccagcatattcaccctctgcaccatgagtgttgatcgatacattgcagtctgccaccctgtcaaggccttagatttccgtactccccgaaatgccaaaattatcaatgtctgcaactggatcctctcttcagccattggtcttcctgtaatgttcatggctacaacaaaatacaggcaag'))
-rs.addRegion(Region(154412087,154412607,'gttccatagattgtacactaacattctctcatccaacctggtactgggaaaacctgctgaagatctgtgttttcatcttcgccttcattatgccagtgctcatcattaccgtgtgctatggactgatgatcttgcgcctcaagagtgtccgcatgctctctggctccaaagaaaaggacaggaatcttcgaaggatcaccaggatggtgctggtggtggtggctgtgttcatcgtctgctggactcccattcacatttacgtcatcattaaagccttggttacaatcccagaaactacgttccagactgtttcttggcacttctgcattgctctaggttacacaaacagctgcctcaacccagtcctttatgcatttctggatgaaaacttcaaacgatgcttcagagagttctgtatcccaacctcttccaacattgagcaacaaaactccactcgaattcgtcagaacactagagaccacccctccacggccaatacagtggatagaactaatcatcag'))
-rs.addRegion(Region(154428600,154428787,'gtggaattgaacctggactgtcactgtgaaaatgcaaagccttggccactgagctacaatgcagggcagtctccatttcccttcccaggaagagtctagagcattaattttgagtttgcaaaggcttgtaactatttcatatgatttttagagctgactatgacatgaaccctaaaattcctgttccc'))
-rs.produceReads(3,50)
-
-
-
-
-
-
diff -r 48c78adade03 -r 9a2a88d1dd4a tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Wed Mar 05 05:42:06 2014 -0500
+++ b/tool_data_table_conf.xml.sample Wed Mar 05 05:42:55 2014 -0500
@@ -2,7 +2,7 @@
- name, dbkey, display_name, value
+ value, dbkey, name, path
\ No newline at end of file
diff -r 48c78adade03 -r 9a2a88d1dd4a varscan_mpileup2snp_from_bam.xml
--- a/varscan_mpileup2snp_from_bam.xml Wed Mar 05 05:42:06 2014 -0500
+++ b/varscan_mpileup2snp_from_bam.xml Wed Mar 05 05:42:55 2014 -0500
@@ -119,10 +119,10 @@
-
-
+
+
-
+
@@ -130,10 +130,10 @@
-
-
+
+
-
+
diff -r 48c78adade03 -r 9a2a88d1dd4a x.tar.bz2