# HG changeset patch # User rico # Date 1333653283 14400 # Node ID 5f6ee4f6fd0b088d7e19db3e6ef9c7b42e9f33a8 # Parent 18dad59e1529fc47359b6b0a8bf64c76a95a26ff Uploaded diff -r 18dad59e1529 -r 5f6ee4f6fd0b find_intervals.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/find_intervals.xml Thu Apr 05 15:14:43 2012 -0400 @@ -0,0 +1,100 @@ + + genomic intervals + + + find_intervals.py "$input" "$input.metadata.dbkey" "$output" "$output.extra_files_path" + + #if $override_metadata.choice == "0" + "$input.metadata.ref" "$input.metadata.rPos" + #else + "$override_metadata.ref_col" "$override_metadata.rpos_col" + #end if + + "$score_col" "$shuffles" + + #if $cutoff.type == 'percentage' + "$cutoff.cutoff_pct" + #else + "=$cutoff.cutoff_val" + #end if + + "$out_format" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +The user selects a SNP table and specifies the columns containing (1) +chromosome, (2) position, (3) scores (such as an Fst-value for the SNP), (4) +a percentage or raw score for the "cutoff" and (5) the number of times the +data should be randomized (only intervals with score exceeding the maximum for +the randomized data are reported). If a percentage (e.g. 95%) is specified +for #3, then that percentile of the scores is used as the cutoff; this may +not work well if many SNPs have the same score. The program subtracts the +cutoff from every score, then finds genomic intervals (i.e., consecutive runs +of SNPs) whose total score cannot be increased by adding or subtracting one +or more SNPs at the ends of the interval. + +