|
0
|
1 #!/bin/bash
|
|
|
2
|
|
|
3 source $(dirname $0)/util.sh
|
|
|
4 source $1
|
|
|
5
|
|
|
6 RDP_SEQS="/shared/silo_researcher/Matsen_F/MatsenGrp/micro_refset/rdp/10_31/tax_filter/filtered/rdp_10_31.filter.fasta"
|
|
|
7 RDP_SEQINFO="/shared/silo_researcher/Matsen_F/MatsenGrp/micro_refset/rdp/10_31/tax_filter/filtered/rdp_10_31.filter.seq_info.csv"
|
|
|
8
|
|
|
9 sqlite3 -csv -header ${CLASS_DB} <<EOF > usearch_meta.csv
|
|
|
10 SELECT pn.name, CAST(pn.mass AS INT) count, tax_id, tax_name, taxa.rank
|
|
|
11 FROM multiclass_concat
|
|
|
12 JOIN taxa USING (tax_id)
|
|
|
13 JOIN placement_names pn USING (placement_id, name)
|
|
|
14 WHERE want_rank = 'species';
|
|
|
15 EOF
|
|
|
16
|
|
|
17 romp -v usearch_clusters \
|
|
|
18 --usearch-quietly \
|
|
|
19 --query-group tax_id \
|
|
|
20 --query-duplication count \
|
|
|
21 --database-name seqname \
|
|
|
22 --database-group tax_id \
|
|
|
23 ${INPUT_SEQS} \
|
|
|
24 usearch_meta.csv \
|
|
|
25 ${RDP_SEQS} \
|
|
|
26 ${RDP_SEQINFO} \
|
|
|
27 ${USEARCH_HITS} \
|
|
|
28 ${USEARCH_GROUPS}
|