annotate chipmunkv6_wrapper.xml @ 2:379a6c377ed1 draft default tip

Uploaded
author jbrayet
date Wed, 10 Feb 2016 11:14:19 -0500
parents 0293edf40308
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
1 <tool id="chipmunk_v6" name="(di)ChIPmunk" version="6.0">
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
2 <description>De novo motif finding</description>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
3 <requirements>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
4 <container type="docker">institutcuriengsintegration/chipmunk:6.0</container>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
5 </requirements>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
6 <command interpreter="bash">chipmunkv6_wrapper.sh -f ${input_file} -n ${motif_number_selector} -s $chipmunk_version['version'] -m $minw -v $maxw -z $mode -o ${log_outfile} -i ${image_output} -x $name -r ${summary_file} -t ${seq_type}</command>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
7
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
8 <inputs>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
9 <param name="name" type="text" value="ChIPseq" label="Name" />
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
10 <param name="input_file" type="data" format="fasta" label="Sequences"/>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
11
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
12 <!-- choose between mono, and dichipmunk-->
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
13 <conditional name="chipmunk_version">
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
14 <param name="chipmunk_version_selector" type="select" label="ChIPMunk version" help="Read about ChIPmunk versions below.">
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
15 <option value="mono_chipmunk" selected="true">MonoChIPMunk</option>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
16 <option value="di_chipmunk">DiChIPMunk</option>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
17 </param>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
18 <when value="mono_chipmunk">
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
19 <param name="version" type="hidden" value="Mono"/>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
20 </when>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
21 <when value="di_chipmunk">
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
22 <param name="version" type="hidden" value="Di"/>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
23 </when>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
24 </conditional>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
25 <!--chipmunk usage-->
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
26
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
27 <param name="seq_type" type="select" label="Type of the sequence set" help="Read about it below.">
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
28 <option value="s" selected="true">Simple</option>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
29 <option value="p" >Peak data</option>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
30 </param>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
31
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
32 <param name="motif_number_selector" type="select" label="Number of different motifs to search">
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
33 <option value="1">1</option>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
34 <option value="2">2</option>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
35 <option value="3" selected="true">3</option>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
36 <option value="4">4</option>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
37 </param>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
38
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
39 <param name="minw" type="integer" value="10" label="Min width of motif to search"/>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
40 <param name="maxw" type="integer" value="15" label="Max width of motif to search"/>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
41 <param name="mode" type="select" label="Mode for additional motif finding" help="use 'mask' to mask already identified motifs in your sequences and 'filter' to filter out the whole sequences with already identified motifs">
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
42 <option value="filter">filter</option>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
43 <option value="mask" selected="true">mask</option>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
44 </param>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
45
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
46
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
47 <!-- description of the outputs, log_file, processed_outputs, image_outputsm use montage to put out motif for the name sample-->
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
48 </inputs>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
49 <outputs>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
50 <data format="txt" name="log_outfile" label="Detailed ChIPMunk log for ${name} (log)"/>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
51 <data format="txt" name="summary_file" label="Summary information for ${name}(txt)"/>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
52 <data format="png" name="image_output" label="Motifs for ${name}(png)"/>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
53 </outputs>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
54
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
55 <help>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
56 **What it does**
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
57
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
58 (di)ChIPmunk detects over-represented non-overlapping motifs in fasta sequences.
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
59
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
60 **Which ChipMunk should you choose** : Mononucleotides Vs Dinucleotides
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
61
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
62 - Mononucleotide version is to be used when:
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
63 (a) you do not know anything about motifs in your data (“draft” run)
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
64 (b) you plan to use other tools for downstream analysis (most of the existing tools will be able to utilize only mononucleotide matrices).
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
65
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
66 - Dinucleotide version is better suited to produce a more precise representation of the optimal TFBS binding model. This would allow to properly estimate the number of sequences containing motif hits. e.g. to measure the percentage of “the most reliable” ChIP-Seq peaks in a given dataset.
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
67
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
68 In terms of the consensus sequence, in general you should get very similar results from the mono- and dinucleotide versions.
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
69
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
70
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
71 **Type of the sequence set**
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
72
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
73 **Simple** : for simple mutil-fasta to be searched in a double-strand DNA mode (the most common choice)::
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
74
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
75 > header1
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
76 ACTGTGTGAAA
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
77 > header2
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
78 AGTGTGTGTGTG
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
79
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
80 You can omit fasta headers since ChIPMunk would simply skip them.
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
81
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
82
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
83 **Peak** : for peak data with the positional prefences profile (often provided in wiggle-files, .wig). The profile of each sequence should be places in the fasta-header like::
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
84
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
85 > 1.0 2.0 3.0 2.0 1.5 2.0
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
86 AGTAAC
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
87 > 1.0 2.0 3.0 2.0 1.5
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
88 CAGTA
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
89
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
90
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
91 See **"Peak multi-fasta generator"** in the tool pannel, if you wish to generate peak data.
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
92
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
93 NOTE that When base coverage information is available, it is highly recommaned to use peak data. This is extremely important for ChIPMunk performance.
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
94
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
95 **Cite ChIPMunk**
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
96
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
97 If you want to cite ChIPMunk in your research please refer to [1] for the basic mononucleotide version and to [2] for the dinucleotide version :
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
98
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
99 [1] Deep and wide digging for binding motifs in ChIP-Seq data. Kulakovskiy IV, Boeva VA, Favorov AV,Makeev VJ. Bioinformatics. 2010 Oct 15;26(20):2622-3. doi: 10.1093/bioinformatics/btq488. Epub 2010 Aug24.
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
100
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
101
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
102 [2] From binding motifs in ChIP-Seq data to improved models of transcription factor binding sites.Kulakovskiy I, Levitsky V, Oshchepkov D, Bryzgalov L, Vorontsov I, Makeev V. J Bioinform Comput Biol.2013 Feb;11(1):1340004. doi: 10.1142/S0219720013400040. Epub 2013 Jan 16.
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
103
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
104
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
105
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
106
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
107
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
108
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
109
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
110 </help>
0293edf40308 Uploaded
jbrayet
parents:
diff changeset
111 </tool>