annotate kmersvm/nullseq.xml @ 5:f99b5099ea55 draft

Uploaded
author test-svm
date Sun, 05 Aug 2012 16:50:57 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
1 <tool id="kmersvm_nullseq" name="Generate Null Sequence">
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
2 <description>using random sampling from genomic DNA</description>
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
3 <command interpreter="python">scripts/nullseq_generate.py -q
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
4 #if str($excluded) !="None":
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
5 -e $excluded
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
6 #end if
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
7 -x $fold -r $rseed -g $gc_err -t $rpt_err $input $dbkey ${indices_path.fields.path}
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
8 </command>
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
9 <inputs>
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
10 <param name="fold" type="integer" value="1" label="# of Fold-Increase" />
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
11 <param name="gc_err" type="float" value="0.02" label="Allowable GC Error" />
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
12 <param name="rpt_err" type="float" value="0.02" label="Allowable Repeat Error" />
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
13 <param name="rseed" type="integer" value="1" label="Random Number Seed" />
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
14 <param format="interval" name="input" type="data" label="BED File of Positive Regions" />
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
15 <validator type="unspecified_build" />
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
16 <validator type="dataset_metadata_in_file" filename="nullseq_indices.loc" metadata_name="dbkey" metadata_column="0" message="Sequences are currently unavailable for the specified build." />
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
17 <param name="excluded" optional="true" format="interval" type="data" value="None" label="Excluded Regions (optional)" />
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
18 <param name="indices_path" type="select" label="Available Datasets">
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
19 <options from_file="nullseq_indices.loc">
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
20 <column name="dbkey" index="0"/>
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
21 <column name="value" index="0"/>
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
22 <column name="name" index="1"/>
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
23 <column name="path" index="2"/>
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
24 <!--filter type="data_meta" ref="input" key="dbkey" column="0" /-->
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
25 </options>
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
26 </param>
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
27 </inputs>
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
28 <outputs>
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
29 <data format="interval" name="nullseq_output" from_work_dir="nullseq_output.bed" />
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
30 </outputs>
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
31 <help>
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
32
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
33 **What it does**
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
34
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
35 Takes an input BED file and generates a set of sequences for use as negative data (null sequences) in Train SVM similar in length, GC content and repeat fraction. Uses random sampling for efficiency.
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
36
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
37 **Parameters**
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
38
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
39 Fold-Increase: Size of desired null sequence data set expressed as multiple of the size of the input data set.
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
40
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
41 GC Error, Repeat Error: Acceptable difference between a positive sequence and its corresponding null sequence in terms of GC content, repeat content.
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
42
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
43 Random Number Seed: Seed for random number generator.
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
44
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
45 Excluded Regions: Submitted regions will be excluded from null sequence generation.
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
46
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
47 ----
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
48
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
49 **Example**
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
50
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
51 Given a BED file containing::
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
52
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
53 chr1 10212203 10212303
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
54 chr1 103584748 103584848
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
55 chr1 105299130 105299230
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
56 chr1 106367772 106367872
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
57
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
58 Tool will output BED file matched in length, GC content and repeat content::
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
59
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
60 chr1 3089935 3090035
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
61 chr1 5031335 5031435
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
62 chr1 5103742 5103842
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
63 chr1 5650372 5650472
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
64
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
65 </help>
f99b5099ea55 Uploaded
test-svm
parents:
diff changeset
66 </tool>