本文介绍通用的 sql 语句优化的经验
# sql 优化经验
# 介绍
在 SQL 优化方面,有多种策略可以提高查询性能和减少资源消耗。
# SQL 执行计划常用操作符
PRJT2
关系的 “投影”(project) 运算,用于选择表达式项的计算;广泛用于查询,排序,函数索引创建等。
SQL>explain select c1 + c2 from t1;
explain select c1 + c2 from t1;
#NSET2: [6, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(2), is_atom(FALSE)
#CSCN2: [6, 1, 0]; INDEX33555457(T1)NSET2
结果集 (result set) 收集,一般是查询计划的顶层节点。
SQL>explain select c1 + c2 from t1;
explain select c1 + c2 from t1;
#NSET2: [6, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(2), is_atom(FALSE)
#CSCN2: [6, 1, 0]; INDEX33555457(T1)SLCT2
关系的 “选择”(select)运算,用于查询条件的过滤。
explain select * from t1 where c1 >1;
#NSET2: [6, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(3), is_atom(FALSE)
#SLCT2: [6, 1, 0]; T1.C1 > 1
#CSCN2: [6, 1, 0]; INDEX33555457(T1)NEST LOOP INNER JOIN2
简写为 NLI2,嵌套循环内连接;没有索引可用,且无法用 HASH,(如不等值比较),则可能使用 NLI2
explain select * from t1, t2 where t1.c1 >t2.c2;
#NSET2: [19, 1, 0]
#PRJT2: [19, 1, 0]; exp_num(4), is_atom(FALSE)
#SLCT2: [19, 1, 0]; T1.C1 > T2.C2
#NEST LOOP INNER JOIN2: [19, 1, 0];
#CSCN2: [6, 1, 0]; INDEX33555457(T1)
#CSCN2: [6, 1, 0]; INDEX33555458(T2)MERGE INNER JOIN3
简写为 MI3,归并内连接;有索引可用时,有可能使用 MI3。
explain select a.c1, b.c1 from tx a, tx b where a.c1 = b.c1;
#NSET2: [29, 100000, 0]
#PRJT2: [29, 100000, 0]; exp_num(2), is_atom(FALSE)
#MERGE INNER JOIN3: [29, 100000, 0];
#CSCN2: [23, 100000, 0]; INDEX33555463(TX)
#CSCN2: [23, 100000, 0]; INDEX33555463(TX)NEST LOOP FULL JOIN2
简写为 NLFO2,嵌套循环全外连接。一般不等值连接时使用。
explain select *from t1 full join t2 on t1.c1 <> t2.c1;
#NSET2: [19, 1, 0]
#PRJT2: [19, 1, 0]; exp_num(4), is_atom(FALSE)
#NEST LOOP FULL JOIN2: [19, 1, 0]; join condition(T1.C1 <> T2.C1)
#CSCN2: [6, 1, 0]; INDEX33555457(T1)
#CSCN2: [6, 1, 0]; INDEX33555458(T2)NEST LOOP LEFT JOIN2
简写为 NLLO2,嵌套循环左外连接。一般不等值连接时使用。
explain select *from t1 left join t2 on t1.c1 <> t2.c1;
#NSET2: [19, 1, 0]
#PRJT2: [19, 1, 0]; exp_num(4), is_atom(FALSE)
#NEST LOOP LEFT JOIN2: [19, 1, 0]; join condition(T1.C1 <> T2.C1)
#CSCN2: [6, 1, 0]; INDEX33555457(T1)
#CSCN2: [6, 1, 0]; INDEX33555458(T2)HASH LEFT JOIN2
简写为 HLO2,HASH 左外连接。一般等值连接时使用。
explain select *from t1 left join t2 on t1.c1 = t2.c1;
#NSET2: [13, 1, 0]
#PRJT2: [13, 1, 0]; exp_num(4), is_atom(FALSE)
#HASH LEFT JOIN2: [13, 1, 0]; key_num(1),
#CSCN2: [6, 1, 0]; INDEX33555457(T1)
#CSCN2: [6, 1, 0]; INDEX33555458(T2)HASH RIGHT JOIN2
简写为 HRO2,HASH 右外连接。下面的例子,做一个 tx 与 t2 的左连接。因为 t2 的行数很小,tx 比较大,所以自动转为右连接,在 t2 上建 hash 的代价比较小。
explain select * from tx left join t2 on t2.c1 = tx.c1 + 2;
#NSET2: [39, 100000, 0]
#PRJT2: [39, 100000, 0]; exp_num(4), is_atom(FALSE)
#HASH RIGHT JOIN2: [39, 100000, 0]; key_num(1),
#CSCN2: [6, 1, 0]; INDEX33555458(T2)
#CSCN2: [23, 100000, 0]; INDEX33555463(TX)HASH FULL JOIN2
简写为 HFO2,HASH 全外连接。等值连接时可用;
explain select *from t1 full join t2 on t1.c1 = t2.c1;
#NSET2: [13, 1, 0]
#PRJT2: [13, 1, 0]; exp_num(4), is_atom(FALSE)
#HASH FULL JOIN2: [13, 1, 0]; key_num(1),
#CSCN2: [6, 1, 0]; INDEX33555457(T1)
#CSCN2: [6, 1, 0]; INDEX33555458(T2)TOPN2
取前 N 个记录;explain select top 10 * from t1;
#NSET2: [6, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(3), is_atom(FALSE)
#TOPN2: [6, 1, 0]; top_num(10)
#CSCN2: [6, 1, 0]; INDEX33555457(T1)UNION ALL
Union all 运算,
explain select *from t1 union all select *from t2;
#NSET2: [13, 2, 0]
#PRJT2: [13, 2, 0]; exp_num(2), is_atom(FALSE)
#UNION ALL: [13, 2, 0]
#PRJT2: [6, 1, 0]; exp_num(2), is_atom(FALSE)
#CSCN2: [6, 1, 0]; INDEX33555457(T1)
#PRJT2: [6, 1, 0]; exp_num(2), is_atom(FALSE)
#CSCN2: [6, 1, 0]; INDEX33555458(T2)UNION
集合的并运算,
explain select *from t1 union select *from t2;
#NSET2: [13, 2, 0]
#PRJT2: [13, 2, 0]; exp_num(2), is_atom(FALSE)
#UNION: [13, 2, 0]
#PRJT2: [6, 1, 0]; exp_num(2), is_atom(FALSE)
#CSCN2: [6, 1, 0]; INDEX33555457(T1)
#PRJT2: [6, 1, 0]; exp_num(2), is_atom(FALSE)
#CSCN2: [6, 1, 0]; INDEX33555458(T2)EXCEPT / EXCEPT ALL
集合的差运算
explain select * from t1 except select * from t2;
#NSET2: [13, 1, 0]
#PRJT2: [13, 1, 0]; exp_num(2), is_atom(FALSE)
#EXCEPT: [13, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(2), is_atom(FALSE)
#CSCN2: [6, 1, 0]; INDEX33555480(T1)
#PRJT2: [6, 1, 0]; exp_num(2), is_atom(FALSE)
#CSCN2: [6, 1, 0]; INDEX33555476(T2)INTER SECT/ INTERSECT ALL
集合的交运算
explain select * from t1 intersect select * from t2;
#NSET2: [13, 1, 0]
#PRJT2: [13, 1, 0]; exp_num(2), is_atom(FALSE)
#INTERSECT: [13, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(2), is_atom(FALSE)
#CSCN2: [6, 1, 0]; INDEX33555480(T1)
#PRJT2: [6, 1, 0]; exp_num(2), is_atom(FALSE)
#CSCN2: [6, 1, 0]; INDEX33555476(T2)INSERET/VINS2
插入记录,其中 VINS2 用于列存储表
explain insert into t1 values (1, 1);
#INSERT : [0, 0, 0]; table(T1), type(values)HAGR2
Hash aggregate; HASH 分组,并计算聚集函数
explain select count (*) from tx group by c2;
#NSET2: [23, 1000, 0]
#PRJT2: [23, 1000, 0]; exp_num(1), is_atom(FALSE)
#HAGR2: [23, 1000, 0]; grp_num(1), sfun_num(1)
#CSCN2: [23, 100000, 0]; INDEX33555479(TX)SAGR2
Stream Aggregate; 如果输入流是有序的,则使用流分组,并计算聚集函数
explain select count (*) from tx group by c1;
#NSET2: [23, 1000, 0]
#PRJT2: [23, 1000, 0]; exp_num(1), is_atom(FALSE)
#SAGR2: [23, 1000, 0]; grp_num(1), sfun_num(1)
#CSCN2: [23, 100000, 0]; INDEX33555479(TX)FAGR2
Fast aggregate; 如果没有 where 条件,且取 count (), 或者基于索引的 MAX/MIN 值,则可以快速取得集函数的值
explain select count () from t1;
#NSET2: [6, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(1), is_atom(FALSE)
#FAGR2: [6, 1, 0]; sfun_num(1)AAGR2
简单聚集;如果没有分组(group by), 则总的就一个组,直接计算聚集函数
explain select count (*) from tx where c2 = 10;
#NSET2: [22, 1, 0]
#PRJT2: [22, 1, 0]; exp_num(1), is_atom(FALSE)
#AAGR2: [22, 1, 0]; grp_num(0), sfun_num(1)
#SLCT2: [22, 2500, 0]; exp_cast(TX.C2) = var1
#CSCN2: [22, 100000, 0]; INDEX33555479(TX)HASH LEFT SEMI JOIN2
HASH 左半连接; 扫描左表建立 hash 表,扫描右表探测 HASH 表,最后输出被探测到的左表的行
explain select * from t1 where c1 in (select c2 from tx);
#NSET2: [35, 1, 0]
#PRJT2: [35, 1, 0]; exp_num(3), is_atom(FALSE)
#HASH LEFT SEMI JOIN2: [35, 1, 0];
#CSCN2: [6, 1, 0]; INDEX33555480(T1)
#PRJT2: [22, 100000, 0]; exp_num(1), is_atom(FALSE)
#CSCN2: [22, 100000, 0]; INDEX33555479(TX)CSCN2/VSCN2
聚集索引扫描(cluster index scan); VSCN2 用于列存储表
explain select *from t1;
#NSET2: [6, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(3), is_atom(FALSE)
#CSCN2: [6, 1, 0]; INDEX33555480(T1)DSCN
Dynamic table scan; 动态视图扫描
explain select * from vthreads; #NSET2: [6, 1, 0] #PRJT2: [6, 1, 0]; exp_num(3), is_atom(FALSE) #DSCN: [6, 1, 0]; SYSINDEXVTHREADS(V$THREADS)DELETE/VDEL2
删除数据,其中 VDEL2 用于列存储表
explain delete from t1 where c1 = 1;
#DELETE : [0, 0, 0]; table(T1), type(select)
#TEMP TABLE SPOOL: [6, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(1), is_atom(FALSE)
#SLCT2: [6, 1, 0]; T1.C1 = 1
#CSCN2: [6, 1, 0]; INDEX33555480(T1)SORT3
排序
explain select * from t1 order by c1;
#NSET2: [6, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(3), is_atom(FALSE)
#SORT3: [6, 1, 0]; key_num(1), is_distinct(FALSE)
#CSCN2: [6, 1, 0]; INDEX33555480(T1)TEMP TABLE SPOOL
临时表,临时存放数据;如 delete/update 时,临时存放 ROWID, PK 等定位信息
explain delete from t1 where c1 = 1;
#DELETE : [0, 0, 0]; table(T1), type(select)
#TEMP TABLE SPOOL: [6, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(1), is_atom(FALSE)
#SLCT2: [6, 1, 0]; T1.C1 = 1
#CSCN2: [6, 1, 0]; INDEX33555480(T1)PIPE2
管道;先做一遍右儿子,然后执行左儿子,并把左儿子的数据向上送,直到左儿子不再有数据。
explain select (select 2) from t1;
#NSET2: [6, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(2), is_atom(FALSE)
#PIPE2: [6, 1, 0]
#CSCN2: [6, 1, 0]; INDEX33555480(T1)
#SPOOL2: [0, 1, 0]; key_num(1), spool_num(0)
#PRJT2: [0, 1, 0]; exp_num(1), is_atom(TRUE)
#CSCN2: [0, 1, 0]; SYSINDEXSYSDUAL(SYSDUAL)SPOOL2
也是临时表;和 TEMP TABLE SPOOL (NTTS) 不同的是,它的数据集不向父亲节点传送,而是被编号,用编号和 KEY 来定位访问;而 TEMP TABLE SPOOL 的数据,主动传递给父亲节点;
explain select (select 2) from t1;
#NSET2: [6, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(2), is_atom(FALSE)
#PIPE2: [6, 1, 0]
#CSCN2: [6, 1, 0]; INDEX33555480(T1)
#SPOOL2: [0, 1, 0]; key_num(1), spool_num(0)
#PRJT2: [0, 1, 0]; exp_num(1), is_atom(TRUE)
#CSCN2: [0, 1, 0]; SYSINDEXSYSDUAL(SYSDUAL)CSEK2
聚集索引数据定位;(cluster index seek)
explain select *from tx where c1 = 20;
#NSET2: [6, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(3), is_atom(FALSE)
#CSEK2: [6, 1, 0]; scan_type(UNIQUE),INDEX33555479(TX), scan_range[20,20]SSEK2
2 级索引数据定位;(second index seek)
create index ind2 on tx(c2);
explain select c2 from tx where c2 = 'aaa';
#NSET2: [6, 2500, 0]
#PRJT2: [6, 2500, 0]; exp_num(1), is_atom(FALSE)
#SSEK2: [6, 2500, 0]; scan_type(ASC), IND2(TX), scan_range[aaa,aaa]BLKUP2
(Batch lookup), 使用 2 级别索引的定位信息, 在聚集索引中查找数据
explain select * from tx where c2 = 'aaa';
#NSET2: [20, 2500, 0]
#PRJT2: [20, 2500, 0]; exp_num(4), is_atom(FALSE)
#BLKUP2: [20, 2500, 0]; IND2(TX)
#SSEK2: [20, 2500, 0]; scan_type(ASC), IND2(TX), scan_range[aaa,aaa]NEST LOOP SEMI JOIN2
嵌套循环半连接。通常用于无法使用 HASH, 索引的不等值的 In/Exists;效率比较差。
explain select * from t1 where exists (select * from t2 where t1.c1 <> t2.c1);
#NSET2: [19, 1, 0]
#PRJT2: [19, 1, 0]; exp_num(3), is_atom(FALSE)
#NEST LOOP SEMI JOIN2: [19, 1, 0]; join condition(T1.C1 <> T2.C1)
#CSCN2: [6, 1, 0]; INDEX33555480(T1)
#CSCN2: [6, 1, 0]; INDEX33555476(T2)NEST LOOP INDEX JOIN2
嵌套循环索引连接 (IJI2)。 右表上有索引, 对于左表的每一行,利用索引在右表中定位。这是 DM6 中最常用的优化方式;DM7 由于 HASH 连接比较成熟,因此其重要性略有下降。
explain select *from t1, tx where t1.c1 = tx.c1;
#NSET2: [26, 1, 0]
#PRJT2: [26, 1, 0]; exp_num(5), is_atom(FALSE)
#NEST LOOP INDEX JOIN2: [26, 1, 0]
#CSCN2: [6, 1, 0]; INDEX33555480(T1)
#CSEK2: [19, 1, 0]; scan_type(UNIQUE),INDEX33555479(TX), scan_range[T1.C1,T1.C1]HASH2 INNER JOIN
HASH 内连接(HI3);无法利用索引时,系统一般采用 HASH 连接。
explain select *from t1, t2 where t1.c1 = t2.c1;
#NSET2: [13, 1, 0]
#PRJT2: [13, 1, 0]; exp_num(4), is_atom(FALSE)
#HASH2 INNER JOIN: [13, 1, 0]; KEY_NUM(1);
#CSCN2: [6, 1, 0]; INDEX33555480(T1)
#CSCN2: [6, 1, 0]; INDEX33555476(T2)INDEX JOIN SEMI JOIN2
索引半连接。在半连接的右表中,有索引可利用时使用,如下例 tx (c1) 上有索引。
explain select *from t1 where exists (select * from tx where t1.c1 = tx.c1);
#NSET2: [8, 1, 0]
#PRJT2: [8, 1, 0]; exp_num(3), is_atom(FALSE)
#INDEX JOIN SEMI JOIN2: [8, 1, 0];
#CSCN2: [6, 1, 0]; INDEX33555480(T1)
#CSEK2: [19, 1, 0]; scan_type(UNIQUE), INDEX33555479(TX), scan_range[T1.C1,T1.C1]HASH RIGHT SEMI JOIN2
HASH 右半连接 (HRS2)。半连接一般情况下,总是在左表建立 HASH, 如果右表数据更少,则转化为右半连接。
explain select * from tx where exists (select *from t1 where tx.c3 = t1.c2);
#NSET2: [40, 1, 0]
#PRJT2: [40, 1, 0]; exp_num(4), is_atom(FALSE)
#HASH RIGHT SEMI JOIN2: [40, 1, 0];
#CSCN2: [6, 1, 0]; INDEX33555480(T1)
#CSCN2: [23, 100000, 0]; INDEX33555479(TX)HASH RIGHT SEMI JOIN32
HASH 右半连接。这个专门用来处理 all/any/some 等谓词。但 all/any/some 不是必须用这个操作符。
explain select *from tx where c3 > all(select c2 from t1);
#NSET2: [30, 100000, 0]
#PRJT2: [30, 100000, 0]; exp_num(4), is_atom(FALSE)
#HASH RIGHT SEMI JOIN32: [30, 100000, 0]; op all;, join condition(TX.C3 > DMTEMPVIEW_00001021.col_name)
#CSCN2: [23, 100000, 0]; INDEX33555479(TX)
#PRJT2: [6, 1, 0]; exp_num(1), is_atom(FALSE)
#CSCN2: [6, 1, 0]; INDEX33555480(T1)MERGE SEMI JOIN3
归并半连接。如果连接的关键字正好是索引列,则可以使用。
explain select * from tx a where exists (select *from tx b where a.c1 = b.c1 and b.c3 = 10);
#NSET2: [27, 2500, 0]
#PRJT2: [27, 2500, 0]; exp_num(4), is_atom(FALSE)
#MERGE SEMI JOIN3: [27, 2500, 0];
#CSCN2: [23, 100000, 0]; INDEX33555479(TX)
#SLCT2: [23, 2500, 0]; TX.C3 = 10
#CSCN2: [23, 100000, 0]; INDEX33555479(TX)CONST VALUE LIST
系统自动创建的一个常量列表,用于与实体表做连接。
explain select *from t1 where c1 in (1, 2, 3);
#NSET2: [6, 1, 0]
#PRJT2: [6, 1, 0]; exp_num(3), is_atom(FALSE)
#HASH2 INNER JOIN: [6, 1, 0]; KEY_NUM(1);
#CSCN2: [6, 1, 0]; INDEX33555480(T1)
#CONST VALUE LIST: [0, 3, 0]; row_num(3), col_num(1)HEAP TABLE/HEAP TABLE SCAN
有时,系统会把重复利用的中间保存在一个临时表中,这个表称为 HEAP TABLE; 与之配套的是,一个专门的扫描操作符号。(HTAB/HSCN)
explain with v1 as
(select count(*) from tx)
select *
from v1 a, v1 b;
#NSET2: [89, 1, 0]
#PIPE2: [89, 1, 0]
#PRJT2: [66, 1, 0]; exp_num(2), is_atom(FALSE)
#NEST LOOP INNER JOIN2: [66, 1, 0];
#HEAP TABLE SCAN: [22, 1, 0]; table_no(0),
#HEAP TABLE SCAN: [22, 1, 0]; table_no(0),
#HEAP TABLE: [22, 1, 0]; table_no(0),
#PRJT2: [22, 1, 0]; exp_num(1), is_atom(FALSE)
#FAGR2: [22, 1, 0]; sfun_num(1)FBTR
Fill Btr, 填充 B 树。建索引的时候使用。一般不能用 explain 看,到, 但是建索引的时候,可以在 V$SQL_NODE_HISTORY 中查到 。UFLT
(Filter for Update from),这是个用于实现 UPDATE FROM 语句 (SQL SERVER 兼容性) 的操作符号。以 rowid 作为 key,建 hash 表,如果下层数据过来没有 hash 匹配项则插入到 hash 表,
explain update t1 set t1.c1 = tx.c2 from tx where t1.c2 = tx.c1;
#UPDATE : [0, 0, 0]; table(T1), type(select)
#TEMP TABLE SPOOL: [26, 1, 0]
#UFLT: [0, 0, 0]; IS_TOP_1(TRUE)
#PRJT2: [26, 1, 0]; exp_num(2), is_atom(FALSE)
#NEST LOOP INDEX JOIN2: [26, 1, 0]
#CSCN2: [6, 1, 0]; INDEX33555480(T1)
#CSEK2: [19, 1, 0]; scan_type(UNIQUE), INDEX33555479(TX), scan_range[T1.C2,T1.C2]HIERARCHICAL QUERY (CNNTB)
用于实现层次查询。
explain select * from tx connect by prior c1 = c2 start with c3 = 0;
#NSET2: [118773, 12500000, 0]
#PRJT2: [118773, 12500000, 0]; exp_num(3), is_atom(FALSE)
#HIERARCHICAL QUERY: [118773, 12500000, 0];
#PRJT2: [23, 2500, 0]; exp_num(4), is_atom(FALSE)
#SLCT2: [23, 2500, 0]; TX.C3 = 0
#CSCN2: [23, 100000, 0]; INDEX33555479(TX)
#PRJT2: [23, 2500, 0]; exp_num(4), is_atom(FALSE)
#SLCT2: [23, 2500, 0]; var1 = exp_cast(TX.C2)
#CSCN2: [23, 100000, 0]; INDEX33555479(TX)INDEX JOIN LEFT JOIN2
索引左外连接,使用右表的索引。
explain select * from t1 left join tx on t1.c1 = tx.c1;
#NSET2: [26, 1, 0]
#PRJT2: [26, 1, 0]; exp_num(5), is_atom(FALSE)
#INDEX JOIN LEFT JOIN2: [26, 1, 0]
#CSCN2: [6, 1, 0]; INDEX33555480(T1)
#CSEK2: [19, 1, 0]; scan_type(UNIQUE), INDEX33555479(TX), scan_range[T1.C1,T1.C1]MPP BROADCAST
Mpp 模式下,消息广播到各站点,包含必要的聚集函数合并计算MPP GATHER
Mpp 模式下,消息收集到主站点MPP DISTRIBUTE
Mpp 模式下,消息各站点的相互重分发, 一般在连接前做MPP SCATTER
Mpp 模式下,主站点向各从站点广播消息PARALLEL
水平分区表的并行查询RNSK
(Row number stop key), 实现 ORACLE 兼容的 rownum;
explain select * from tx where rownum = 10;
#NSET2: [23, 100000, 0]
#PRJT2: [23, 100000, 0]; exp_num(4), is_atom(FALSE)
#RNSK: [23, 100000, 0]; rownum = exp_cast(10)
#CSCN2: [23, 100000, 0]; INDEX33555479(TX)CNTS
用于实现全文索引的 CONTAINSSSCN
(second index scan), 直接使用 2 级索引进行扫描;
explain select c2 from tx;
#NSET2: [22, 100000, 0]
#PRJT2: [22, 100000, 0]; exp_num(1), is_atom(FALSE)
#SSCN: [22, 100000, 0]; IND2(TX)AFUN
分析函数计算。
explain
SELECT c1, SUM(c3) OVER (PARTITION BY c1) FROM tx;
#NSET2: [23, 100000, 0]
#PRJT2: [23, 100000, 0]; exp_num(2), is_atom(FALSE)
#AFUN: [23, 100000, 0]; afun_num(1)
#CSCN2: [23, 100000, 0]; INDEX33555479(TX)PSCN/ASCN/ESCN
PSCN: 批量参数当作表来扫描
ASCN: 数组当作表来扫描
ESCN: 外部表扫描DISTINCT
去重。
explain select distinct c1 from tx;
#NSET2: [22, 1000, 0]
#PRJT2: [22, 1000, 0]; exp_num(1), is_atom(FALSE)
#DISTINCT: [22, 1000, 0]
#SSCN: [22, 100000, 0]; IND2(TX)HASH LEFT SEMI MUTIPLE JOIN
多列 HASH 左半连接, 用于实现多列 IN。
explain select * from t1 where (c1, c2) not in (select c1, c2 from tx);
#NSET2: [35, 1, 0]
#PRJT2: [35, 1, 0]; exp_num(3), is_atom(FALSE)
#HASH LEFT SEMI MULTIPLE JOIN: [35, 1, 0]; (ANTI), join condition((exp11 AND exp11))
#CSCN2: [6, 1, 0]; INDEX33555480(T1)REMOTE / LSET
实现外部连接 (DBLINK)。
REMOTE: 远程查询;
LSET: 远程的结果集
# 关于索引
# 如何建立索引
# 索引适用范围
在以下场景下可考虑创建索引:
・仅当要通过索引访问表中很少的一部分行(1%~20%)。
・索引可覆盖查询所需的所有列,不需额外去访问表。
注意
对于一个表来说索引并非越多越好,过多的索引将影响该表的 DML 效率。
存在下列情况将导致无法使用索引:
・组合索引中,条件列中没有组合索引的首列。
・条件列带有函数或计算。
索引排序是按照字段值进行排序的,字段值通过函数或计算后的值索引无法获取。
・索引过滤性能不好时。
如对一张 10 万条记录的表进行条件查询,获取 5 万条数据,通过索引进行查找效率低于全表扫描,将放弃使用索引。
# 如何查看执行计划
mysql: explain select * from table
dm: 看执行 id
sqlserver:
# 优化 sql 语句的记录
通用方法:
看执行计划 优先优化 sace2(全表扫描)、 blkup(回表扫描)
select count (*) from xxxx; -- 看看这个表有多少行
select xx,count (*) from xxx group by xx ; --xx 为过滤条件,看看每一个过滤条件的数据分布情况,确定数据分布均匀并能过滤较多的条件上建索引
stat 100 on xxxx (xx); -- 更新统计信息
看 SLCT2 看是否存在表达式的值
分组一个查询字段看分布情况 如果均匀且没有空的值可以考虑在该字段建立索引,如果分布不均匀查询出来的数据太大 过滤性差就会影响查询速度
看 BLKUP2(回表查询)尽量避免出现回表查询,如果不能避免尽量使得走回表前的结果集少
看 CSCN2 全表扫描的情况
优化过程中可以通过以下 sql 查询 sql 的执行情况
执行时间、io 等待时间等待
如果 io 等待时间过程就要看看服务器 io 的使用情况了
命令:iostat -d -x -k 1 10
# 案例一 (达梦):
-- 有一条这样的语句 | |
select left(distno, 4), count(*), sum(case when ywlx = 4 then 1 else 0 end) from zcjy_tdnz.openreportrecord group by left(distno, 4) | |
select count(*) from zcjy_tdnz.openreportrecord | |
--19255177 | |
执行计划 | |
1 #NSET2: [3447, 1189, 52] | |
2 #PRJT2: [3447, 1189, 52]; exp_num(3), is_atom(FALSE) | |
3 #HAGR2: [3447, 1189, 52]; grp_num(1), sfun_num(2), distinct_flag[0,0]; slave_empty(2) keys(DMTEMPVIEW_16778393.TMPCOL0) | |
4 #LOCAL COLLECT: [3447, 1189, 52]; op_id(1) n_grp_by (0) n_cols(0) n_keys(0) for_sync(FALSE) | |
5 #HAGR2: [3447, 1189, 52]; grp_num(1), sfun_num(2), distinct_flag[0,0]; slave_empty(0) keys(DMTEMPVIEW_16778393.TMPCOL0) | |
6 #PRJT2: [2144, 18554643, 52]; exp_num(2), is_atom(FALSE) | |
7 #SSCN: [2144, 18554643, 52]; ywlx_distno_20220211(OPENREPORTRECORD) | |
看第7行 SSCN 表示走了索引 | |
[2144, 18554643, 52] | |
表示 耗时2144毫秒,查询的记录数为18554643, 输出的字节数是52 | |
这里可以看到总数有19255177 走了索引查询的记录数为18554643,过滤性很差 | |
-- 接着修改 直接筛选 ywlx = 4 为过滤条件 | |
select left(distno, 4), count(*), sum(case when ywlx = 4 then 1 else 0 end) from zcjy_tdnz.openreportrecord where ywlx = 4 group by left(distno, 4) | |
1 #NSET2: [92, 1189, 52] | |
2 #PRJT2: [92, 1189, 52]; exp_num(3), is_atom(FALSE) | |
3 #HAGR2: [92, 1189, 52]; grp_num(1), sfun_num(2), distinct_flag[0,0]; slave_empty(2) keys(DMTEMPVIEW_16778398.TMPCOL0) | |
4 #LOCAL COLLECT: [92, 1189, 52]; op_id(1) n_grp_by (0) n_cols(0) n_keys(0) for_sync(FALSE) | |
5 #HAGR2: [92, 1189, 52]; grp_num(1), sfun_num(2), distinct_flag[0,0]; slave_empty(0) keys(DMTEMPVIEW_16778398.TMPCOL0) | |
6 #PRJT2: [60, 449698, 52]; exp_num(2), is_atom(FALSE) | |
7 #SSEK2: [60, 449698, 52]; scan_type(ASC), ywlx_distno_20220211(OPENREPORTRECORD), scan_range[(4,min),(4,max)) | |
看第7行 SSEK2 表示走了索引 | |
[60, 449698, 52] | |
表示 耗时60毫秒,查询的记录数为449698, 输出的字节数是52 | |
这里可以看到总数有19255177 走了索引查询的记录数为449698,过滤性就很好 |
在程序中和在 sql 工具中 执行同一条 sql 语句前者比后者慢很多?
原因:
1、因为分页参数传递引起,把 #{} 换成 ${} 快了很多
如果需要在函数中写动态的 sql 执行并且返回结果集 可以用下面的方法:
create or replace FUNCTION f_sjcxfx_jyxxcx_result( | |
v_sql VARCHAR(8000), | |
v_distNo VARCHAR(100), | |
v_level INT, | |
v_xmmc VARCHAR(500), | |
v_xmbh VARCHAR(200), | |
v_bdate VARCHAR(200), | |
v_edate VARCHAR(200), | |
v_jybh VARCHAR(200), | |
v_jgzt VARCHAR(200), | |
v_jyjg VARCHAR(200), | |
v_blzt VARCHAR(200), | |
v_jysq_bdate VARCHAR(200), | |
v_jysq_edate VARCHAR(200) | |
) | |
RETURN sjcxfx_jyxxcx_table | |
is | |
v_test sjcxfx_jyxxcx_table := sjcxfx_jyxxcx_table(); | |
results sjcxfx_jyxxcx_table; | |
begin | |
-- 创建返回的结果 | |
/*create or replace type sjcxfx_jyxxcx_report as object( | |
distNo VARCHAR(50), | |
distName VARCHAR(50), | |
prj_no VARCHAR(200), | |
title VARCHAR(8000), | |
ztname VARCHAR(1000), | |
jyjffs VARCHAR(50) | |
); | |
create or replace type sjcxfx_jyxxcx_table as table of sjcxfx_jyxxcx_report;*/ | |
set v_sql = 'select sjcxfx_jyxxcx_report(distNo, distName, prj_no, title, ztname, jyjffs) from proj where jyokflag=1 '; | |
if v_distNo <> '' | |
then | |
set v_sql = v_sql||' and distno like '''||v_distNo||'%'''; | |
end if; | |
-- | |
if v_xmmc <> '' | |
then | |
set v_sql = v_sql||' and title like ''%'||v_xmmc||'%'''; | |
end if; | |
-- | |
if v_xmbh <> '' | |
then | |
set v_sql = v_sql||' and prj_no like ''%'||v_xmbh||'%'''; | |
end if; | |
-- | |
if v_bdate <> '' | |
then | |
set v_sql = v_sql||' and zhaobiao_pub_date >= '''||v_bdate||''''; | |
end if; | |
-- | |
if v_edate <> '' | |
then | |
set v_sql = v_sql||' and zhaobiao_pub_date <= '''||v_edate||''''; | |
end if; | |
-- | |
if v_jybh <> '' | |
then | |
set v_sql = v_sql||' and prj_jiaoyi_no like ''%'||v_jybh||'%'''; | |
end if; | |
-- | |
if v_jyjg <> '' | |
then | |
set v_sql = v_sql||' and endway = '''||v_jyjg||''''; | |
end if; | |
-- | |
if v_blzt <> '' | |
then | |
set v_sql = v_sql||' and jyokflag = '||v_blzt||''; | |
end if; | |
EXECUTE IMMEDIATE v_sql BULK COLLECT INTO results; | |
return results; | |
end; |
# 案例二 (StarRocks):
相关的 Query Profile 的结构查看:查看
建表语句: | |
CREATE TABLE `gz_balance3` ( | |
`sno` int(11) NOT NULL COMMENT "", | |
`years` int(11) NOT NULL COMMENT "", | |
`acc_set` int(11) NOT NULL COMMENT "", | |
`months` tinyint(4) NULL COMMENT "", | |
`sy` int(11) NULL COMMENT "", | |
`acc_code` varchar(65533) NULL COMMENT "", | |
`acc_name` varchar(65533) NULL COMMENT "", | |
`dr_cr` varchar(65533) NULL COMMENT "", | |
`bal_year_begin` decimal(18, 2) NULL COMMENT "", | |
`bal_month_begin` decimal(18, 2) NULL COMMENT "", | |
`debit_month` decimal(18, 2) NULL COMMENT "", | |
`credit_month` decimal(18, 2) NULL COMMENT "", | |
`bal_month_end` decimal(18, 2) NULL COMMENT "", | |
`debit_year` decimal(18, 2) NULL COMMENT "", | |
`credit_year` decimal(18, 2) NULL COMMENT "", | |
`benifit_month` decimal(18, 2) NULL COMMENT "", | |
`benifit_year` decimal(18, 2) NULL COMMENT "", | |
`oldacc_code` varchar(65533) NULL COMMENT "", | |
`tmpplace` varchar(65533) NULL COMMENT "", | |
`optstyle` varchar(65533) NULL COMMENT "", | |
`linkDistId` varchar(65533) NULL COMMENT "", | |
`years_months` int(11) NULL COMMENT "", | |
`len_acc_code` tinyint(4) NULL COMMENT "", | |
`_clientId` int(11) NULL COMMENT "", | |
`_clientUser` varchar(65533) NULL COMMENT "", | |
`update_time_zl` datetime NULL COMMENT "" | |
) ENGINE=OLAP | |
PRIMARY KEY(`sno`, `years`) | |
DISTRIBUTED BY HASH(`sno`, `years`) BUCKETS 8 | |
PROPERTIES ( | |
"replication_num" = "1", | |
"in_memory" = "false", | |
"enable_persistent_index" = "true", | |
"replicated_storage" = "true", | |
"compression" = "LZ4" | |
); | |
注意: 总数据量 - 5亿 | |
语句: | |
select case when length('01')=2 then left(a.linkdistid,4) else a.linkdistid end distno, | |
count(distinct ifnull(b.acc_set,c.acc_set)) zts, | |
convert(sum(b.debit_month),DECIMAL) bqs, | |
convert(sum(c.debit_month),DECIMAL) sqs, | |
case when sum(c.debit_month)=0 then 0 else convert(sum(b.debit_month)/sum(c.debit_month)*100,DECIMAL) end zxl | |
from gz_acc_set a left outer join gz_balance3 b on a.acc_set=b.acc_set | |
and b.YEARs=2015 and b.months=4 | |
and b.acc_code like '522%' | |
left outer join gz_balance3 c on a.acc_set=c.acc_set | |
and c.YEARs*12+c.months=2015*12+4-1 | |
and c.acc_code like '522%' | |
where b.debit_month+c.debit_month>=1 | |
and a.dwlx =case when '全部'='全部' then dwlx else '全部' end | |
and a.linkDistId like '01%' group by case when length('01')=2 then left(a.linkdistid,4) else a.linkdistid end | |
> OK | |
> 查询时间: 9.958s 原本需要查询9秒 | |
上面的 01、 2015、 months = 4 中的4 都是程序会替换的直接变量 | |
下面是explain 的执行计划(只显示耗时最长的节点) | |
PLAN FRAGMENT 3 | |
OUTPUT EXPRS: | |
PARTITION: RANDOM | |
STREAM DATA SINK | |
EXCHANGE ID: 08 | |
UNPARTITIONED | |
7:Project | |
| <slot 60> : 60: acc_set | |
| <slot 68> : 68: debit_month | |
| | |
6:OlapScanNode | |
TABLE: gz_balance3 | |
PREAGGREGATION: ON | |
PREDICATES: CAST(59: years AS BIGINT) * 12 + CAST(61: months AS BIGINT) = 24183, 63: acc_code LIKE '522%' | |
partitions=1/1 | |
rollup: gz_balance3 | |
tabletRatio=8/8 | |
tabletList=45545,45547,45549,45551,45553,45555,45557,45559 | |
cardinality=1 | |
avgRowSize=17.700815 | |
explain analyze 查询Query Profile信息(只显示 最耗时的节点的信息) | |
[0mFragment 3[0m | |
│ [0mBackendNum: 1[0m | |
│ [0mInstancePeakMemoryUsage: 26.688 MB, InstanceAllocatedMemoryUsage: 42.205 GB[0m | |
│ [0mPrepareTime: 409.670us[0m | |
└──[0mDATA_STREAM_SINK (id=8)[0m | |
│ [0mPartitionType: UNPARTITIONED[0m | |
└──[0mPROJECT (id=7) [0m | |
│ [0mEstimates: [row: ?, cpu: ?, memory: ?, network: ?, cost: ?][0m | |
│ [0mTotalTime: 51.620us (0.00%) [CPUTime: 51.620us][0m | |
│ [0mOutputRows: 71.199K (71199)[0m | |
│ [0mExpression: [60: acc_set, 68: debit_month][0m | |
└──[1m[31mOLAP_SCAN (id=6) [0m | |
[1m[31mEstimates: [row: 1, cpu: 17.70, memory: 0.00, network: 0.00, cost: 8.85][0m | |
[1m[31mTotalTime: 11s76ms (86.07%) [CPUTime: 39.997ms, ScanTime: 11s36ms][0m | |
[1m[31mOutputRows: 71.199K (71199)[0m | |
[1m[31mTable: : gz_balance3[0m | |
[1m[31mSubordinateOperators: [0m | |
[1m[31mCHUNK_ACCUMULATE[0m | |
[1m[31mNOOP[0m | |
[1m[31mOLAP_SCAN_PREPARE[0m | |
[1m[31mDetail Timers: [ScanTime = IOTaskExecTime + IOTaskWaitTime][0m | |
[1m[31mIOTaskExecTime: 10s491ms [min=9s946ms, max=11s20ms][0m | |
[1m[31mSegmentRead: 10s230ms [min=9s655ms, max=10s728ms][0m | |
[1m[31mBlockFetch: 9s270ms [min=8s756ms, max=9s745ms][0m | |
[1m[31mIOTaskWaitTime: 29.425ms [min=13.810ms, max=45.469ms][0m | |
从上面的信息可以发现 Fragment 3的耗时最长,并且可以看到分配的内存也非常的高 InstanceAllocatedMemoryUsage: 42.205 GB | |
定位到对应的执行内容为: | |
PLAN FRAGMENT 3 | |
OUTPUT EXPRS: | |
PARTITION: RANDOM | |
STREAM DATA SINK | |
EXCHANGE ID: 08 | |
UNPARTITIONED | |
7:Project | |
| <slot 60> : 60: acc_set | |
| <slot 68> : 68: debit_month | |
| | |
6:OlapScanNode | |
TABLE: gz_balance3 | |
PREAGGREGATION: ON | |
PREDICATES: CAST(59: years AS BIGINT) * 12 + CAST(61: months AS BIGINT) = 24183, 63: acc_code LIKE '522%' | |
partitions=1/1 | |
rollup: gz_balance3 | |
tabletRatio=8/8 | |
tabletList=45545,45547,45549,45551,45553,45555,45557,45559 | |
cardinality=1 | |
avgRowSize=17.700815 | |
partitions=1/1:表示总共一个分区,在一个分区里查询 | |
tabletRatio=8/8: 表示总共8个桶,扫描了8个桶的内容 | |
PREDICATES: CAST(59: years AS BIGINT) * 12 + CAST(61: months AS BIGINT) = 24183, 63: acc_code LIKE '522%' | |
从这里可以看出 是这个过滤条件运算花费的时间比较长,从这里入手优化 要结合实际的情况优化 | |
比如我这里的语句中过滤条件是: | |
and c.YEARs*12+c.months=2015*12+4-1 中 c.YEARs*12+c.months 做了运算 无法直接过滤数据 | |
这里想要的效果是表示我需要查询上一个月份的数据 那么其实可以 优化这个条件且不需要做运算,可以改成以下语句: | |
and (c.years = (case when 4 = 1 then 2015 - 1 else 2015 end) and c.months = (case when 4 = 1 then 12 else 4 - 1 end)) 其中2015、 4、 都是传入的值 | |
修改之后的语句: | |
select case when length('01')=2 then left(a.linkdistid,4) else a.linkdistid end distno, | |
count(distinct ifnull(b.acc_set,c.acc_set)) zts, | |
convert(sum(b.debit_month),DECIMAL) bqs, | |
convert(sum(c.debit_month),DECIMAL) sqs, | |
case when sum(c.debit_month)=0 then 0 else convert(sum(b.debit_month)/sum(c.debit_month)*100,DECIMAL) end zxl | |
from gz_acc_set a left outer join gz_balance3 b on a.acc_set=b.acc_set | |
and b.YEARs=2015 and b.months=4 | |
and b.acc_code like '522%' | |
left outer join gz_balance3 c on a.acc_set=c.acc_set | |
--and c.YEARs*12+c.months=2015*12+4-1 | |
and (c.years = (case when 4 = 1 then 2015 - 1 else 2015 end) and c.months = (case when 4 = 1 then 12 else 4 - 1 end)) | |
and c.acc_code like '522%' | |
where b.debit_month+c.debit_month>=1 | |
and a.dwlx =case when '全部'='全部' then dwlx else '全部' end | |
and a.linkDistId like '01%' group by case when length('01')=2 then left(a.linkdistid,4) else a.linkdistid end | |
> OK | |
> 查询时间: 2.384s | |
缩短了8秒! | |
执行计划:(只显示关心的) | |
PLAN FRAGMENT 3 | |
OUTPUT EXPRS: | |
PARTITION: RANDOM | |
STREAM DATA SINK | |
EXCHANGE ID: 08 | |
UNPARTITIONED | |
7:Project | |
| <slot 60> : 60: acc_set | |
| <slot 68> : 68: debit_month | |
| | |
6:OlapScanNode | |
TABLE: gz_balance3 | |
PREAGGREGATION: ON | |
PREDICATES: 59: years = 2015, 61: months = 3, 63: acc_code LIKE '522%' | |
partitions=1/1 | |
rollup: gz_balance3 | |
tabletRatio=8/8 | |
tabletList=45545,45547,45549,45551,45553,45555,45557,45559 | |
cardinality=1 | |
avgRowSize=17.700815 | |
PLAN FRAGMENT 4 | |
OUTPUT EXPRS: | |
PARTITION: RANDOM | |
STREAM DATA SINK | |
EXCHANGE ID: 04 | |
UNPARTITIONED | |
3:Project | |
| <slot 34> : 34: acc_set | |
| <slot 42> : 42: debit_month | |
| | |
2:OlapScanNode | |
TABLE: gz_balance3 | |
PREAGGREGATION: ON | |
PREDICATES: 33: years = 2015, 35: months = 4, 37: acc_code LIKE '522%' | |
partitions=1/1 | |
rollup: gz_balance3 | |
tabletRatio=8/8 | |
tabletList=45545,45547,45549,45551,45553,45555,45557,45559 | |
cardinality=1 | |
avgRowSize=17.700815 | |
explain analyze | |
[0m | |
[0mFragment 3[0m | |
│ [0mBackendNum: 1[0m | |
│ [0mInstancePeakMemoryUsage: 11.233 MB, InstanceAllocatedMemoryUsage: 1.892 GB[0m | |
│ [0mPrepareTime: 388.670us[0m | |
└──[0mDATA_STREAM_SINK (id=8)[0m | |
│ [0mPartitionType: UNPARTITIONED[0m | |
└──[0mPROJECT (id=7) [0m | |
│ [0mEstimates: [row: ?, cpu: ?, memory: ?, network: ?, cost: ?][0m | |
│ [0mTotalTime: 39.300us (0.00%) [CPUTime: 39.300us][0m | |
│ [0mOutputRows: 71.199K (71199)[0m | |
│ [0mExpression: [60: acc_set, 68: debit_month][0m | |
└──[1m[31mOLAP_SCAN (id=6) [0m | |
[1m[31mEstimates: [row: 1, cpu: 17.70, memory: 0.00, network: 0.00, cost: 8.85][0m | |
[1m[31mTotalTime: 451.245ms (48.60%) [CPUTime: 1.948ms, ScanTime: 449.296ms][0m | |
[1m[31mOutputRows: 71.199K (71199)[0m | |
[1m[31mTable: : gz_balance3[0m | |
[1m[31mSubordinateOperators: [0m | |
[1m[31mCHUNK_ACCUMULATE[0m | |
[1m[31mNOOP[0m | |
[1m[31mOLAP_SCAN_PREPARE[0m | |
[1m[31mDetail Timers: [ScanTime = IOTaskExecTime + IOTaskWaitTime][0m | |
[1m[31mIOTaskExecTime: 349.445ms [min=267.150ms, max=446.469ms][0m | |
[1m[31mSegmentRead: 333.591ms [min=252.794ms, max=434.923ms][0m | |
[1m[31mBlockFetch: 274.072ms [min=198.346ms, max=368.110ms][0m | |
[1m[31mIOTaskWaitTime: 6.920ms [min=143.380us, max=28.939ms][0m | |
[0m | |
[0mFragment 4[0m | |
│ [0mBackendNum: 1[0m | |
│ [0mInstancePeakMemoryUsage: 13.604 MB, InstanceAllocatedMemoryUsage: 1.965 GB[0m | |
│ [0mPrepareTime: 419.470us[0m | |
└──[0mDATA_STREAM_SINK (id=4)[0m | |
│ [0mPartitionType: UNPARTITIONED[0m | |
└──[0mPROJECT (id=3) [0m | |
│ [0mEstimates: [row: ?, cpu: ?, memory: ?, network: ?, cost: ?][0m | |
│ [0mTotalTime: 56.350us (0.01%) [CPUTime: 56.350us][0m | |
│ [0mOutputRows: 71.245K (71245)[0m | |
│ [0mExpression: [34: acc_set, 42: debit_month][0m | |
└──[1m[31mOLAP_SCAN (id=2) [0m | |
[1m[31mEstimates: [row: 1, cpu: 17.70, memory: 0.00, network: 0.00, cost: 8.85][0m | |
[1m[31mTotalTime: 450.460ms (48.51%) [CPUTime: 14.747ms, ScanTime: 435.712ms][0m | |
[1m[31mOutputRows: 71.245K (71245)[0m | |
[1m[31mTable: : gz_balance3[0m | |
[1m[31mSubordinateOperators: [0m | |
[1m[31mCHUNK_ACCUMULATE[0m | |
[1m[31mNOOP[0m | |
[1m[31mOLAP_SCAN_PREPARE[0m | |
[1m[31mDetail Timers: [ScanTime = IOTaskExecTime + IOTaskWaitTime][0m | |
[1m[31mIOTaskExecTime: 375.914ms [min=303.763ms, max=426.845ms][0m | |
[1m[31mSegmentRead: 359.928ms [min=285.005ms, max=410.825ms][0m | |
[1m[31mBlockFetch: 281.056ms [min=223.391ms, max=328.108ms][0m | |
[1m[31mIOTaskWaitTime: 9.453ms [min=214.510us, max=24.273ms][0m | |
[0m | |
从上面的执行计划中可以看出 时间大大缩短了 而且内存消耗也大大减少了 InstanceAllocatedMemoryUsage: 1.965 GB | |
还可以继续优化,通过上面的执行计划可以发现我这里只有1个分区,我们根据年份分组查询数据: | |
select years, count(*) from gz_balance group by years | |
2016 48503278 | |
2019 56560829 | |
2018 57214609 | |
2015 44546275 | |
2020 58545580 | |
2021 58165822 | |
2017 58700094 | |
2023 56764942 | |
2022 62766491 | |
看结果可以知道 每年的数据很平均 那么可以根据年份建立索引或者分区,这里选择分区 | |
CREATE TABLE `gz_balance` ( | |
`sno` int(11) NOT NULL COMMENT "", | |
`years` int(11) NOT NULL COMMENT "", | |
`acc_set` int(11) NOT NULL COMMENT "", | |
`months` tinyint(4) NULL COMMENT "", | |
`sy` int(11) NULL COMMENT "", | |
`acc_code` varchar(65533) NULL COMMENT "", | |
`acc_name` varchar(65533) NULL COMMENT "", | |
`dr_cr` varchar(65533) NULL COMMENT "", | |
`bal_year_begin` decimal(18, 2) NULL COMMENT "", | |
`bal_month_begin` decimal(18, 2) NULL COMMENT "", | |
`debit_month` decimal(18, 2) NULL COMMENT "", | |
`credit_month` decimal(18, 2) NULL COMMENT "", | |
`bal_month_end` decimal(18, 2) NULL COMMENT "", | |
`debit_year` decimal(18, 2) NULL COMMENT "", | |
`credit_year` decimal(18, 2) NULL COMMENT "", | |
`benifit_month` decimal(18, 2) NULL COMMENT "", | |
`benifit_year` decimal(18, 2) NULL COMMENT "", | |
`oldacc_code` varchar(65533) NULL COMMENT "", | |
`tmpplace` varchar(65533) NULL COMMENT "", | |
`optstyle` varchar(65533) NULL COMMENT "", | |
`linkDistId` varchar(65533) NULL COMMENT "", | |
`years_months` int(11) NULL COMMENT "", | |
`len_acc_code` tinyint(4) NULL COMMENT "", | |
`_clientId` int(11) NULL COMMENT "", | |
`_clientUser` varchar(65533) NULL COMMENT "", | |
`update_time_zl` datetime NULL COMMENT "" | |
) ENGINE=OLAP | |
PRIMARY KEY(`sno`, `years`) | |
PARTITION BY RANGE (years) ( | |
START ("2014") END ("2060") EVERY (1) | |
) | |
DISTRIBUTED BY HASH(`sno`, `years`) BUCKETS 8 | |
PROPERTIES ( | |
"replication_num" = "1", | |
"in_memory" = "false", | |
"enable_persistent_index" = "true", | |
"replicated_storage" = "true", | |
"compression" = "LZ4" | |
); | |
表示从2014 开始创建分区 一直到2060 | |
从新在查询: | |
select case when length('01')=2 then left(a.linkdistid,4) else a.linkdistid end distno, | |
count(distinct ifnull(b.acc_set,c.acc_set)) zts, | |
convert(sum(b.debit_month),DECIMAL) bqs, | |
convert(sum(c.debit_month),DECIMAL) sqs, | |
case when sum(c.debit_month)=0 then 0 else convert(sum(b.debit_month)/sum(c.debit_month)*100,DECIMAL) end zxl | |
from gz_acc_set a left outer join gz_balance b on a.acc_set=b.acc_set | |
and b.YEARs=2015 and b.months=4 | |
and b.acc_code like '522%' | |
left outer join gz_balance c on a.acc_set=c.acc_set | |
--and c.YEARs*12+c.months=2015*12+4-1 | |
and (c.years = (case when 4 = 1 then 2015 - 1 else 2015 end) and c.months = (case when 4 = 1 then 12 else 4 - 1 end)) | |
and c.acc_code like '522%' | |
where b.debit_month+c.debit_month>=1 | |
and a.dwlx =case when '全部'='全部' then dwlx else '全部' end | |
and a.linkDistId like '01%' group by case when length('01')=2 then left(a.linkdistid,4) else a.linkdistid end | |
> OK | |
> 查询时间: 0.209s | |
因为分区了 那么每个桶的数据量就会少,查询就会更快 | |
执行计划: | |
PLAN FRAGMENT 4 | |
OUTPUT EXPRS: | |
PARTITION: RANDOM | |
STREAM DATA SINK | |
EXCHANGE ID: 05 | |
HASH_PARTITIONED: 60: acc_set | |
4:Project | |
| <slot 60> : 60: acc_set | |
| <slot 68> : 68: debit_month | |
| | |
3:OlapScanNode | |
TABLE: gz_balance | |
PREAGGREGATION: ON | |
PREDICATES: 61: months = 3, 63: acc_code LIKE '522%' | |
partitions=1/21 | |
rollup: gz_balance | |
tabletRatio=8/8 | |
tabletList=46294,46296,46298,46300,46302,46304,46306,46308 | |
cardinality=572172 | |
avgRowSize=23.7504 | |
PLAN FRAGMENT 5 | |
OUTPUT EXPRS: | |
PARTITION: RANDOM | |
STREAM DATA SINK | |
EXCHANGE ID: 02 | |
HASH_PARTITIONED: 34: acc_set | |
1:Project | |
| <slot 34> : 34: acc_set | |
| <slot 42> : 42: debit_month | |
| | |
0:OlapScanNode | |
TABLE: gz_balance | |
PREAGGREGATION: ON | |
PREDICATES: 35: months = 4, 37: acc_code LIKE '522%' | |
partitions=1/21 | |
rollup: gz_balance | |
tabletRatio=8/8 | |
tabletList=46294,46296,46298,46300,46302,46304,46306,46308 | |
cardinality=572172 | |
avgRowSize=23.7504 | |
partitions=1/21 表示总共有21个分区走了其中一个 | |
tabletRatio=8/8 表示走一个分区的8个桶 | |
[0m | |
[0mFragment 4[0m | |
│ [0mBackendNum: 1[0m | |
│ [0mInstancePeakMemoryUsage: 27.611 MB, InstanceAllocatedMemoryUsage: 171.213 MB[0m | |
│ [0mPrepareTime: 432.860us[0m | |
└──[0mDATA_STREAM_SINK (id=5)[0m | |
│ [0mPartitionType: HASH_PARTITIONED[0m | |
│ [0mPartitionExprs: [60: acc_set][0m | |
└──[0mPROJECT (id=4) [0m | |
│ [0mEstimates: [row: ?, cpu: ?, memory: ?, network: ?, cost: ?][0m | |
│ [0mTotalTime: 29.300us (0.01%) [CPUTime: 29.300us][0m | |
│ [0mOutputRows: 71.199K (71199)[0m | |
│ [0mExpression: [60: acc_set, 68: debit_month][0m | |
└──[1m[31mOLAP_SCAN (id=3) [0m | |
[1m[31mEstimates: [row: 572171, cpu: 13589308.07, memory: 0.00, network: 0.00, cost: 6794654.03][0m | |
[1m[31mTotalTime: 120.130ms (48.71%) [CPUTime: 1.695ms, ScanTime: 118.434ms][0m | |
[1m[31mOutputRows: 71.199K (71199)[0m | |
[1m[31mRuntimeFilter: 71.199K (71199) -> 71.199K (71199) (0.00%)[0m | |
[1m[31mTable: : gz_balance[0m | |
[1m[31mSubordinateOperators: [0m | |
[1m[31mCHUNK_ACCUMULATE[0m | |
[1m[31mNOOP[0m | |
[1m[31mOLAP_SCAN_PREPARE[0m | |
[1m[31mDetail Timers: [ScanTime = IOTaskExecTime + IOTaskWaitTime][0m | |
[1m[31mIOTaskExecTime: 96.399ms [min=73.400ms, max=117.540ms][0m | |
[1m[31mSegmentRead: 86.994ms [min=66.072ms, max=106.873ms][0m | |
[1m[31mBlockFetch: 53.470ms [min=43.348ms, max=74.015ms][0m | |
[1m[31mIOTaskWaitTime: 431.071us [min=105.350us, max=894.680us][0m | |
[0m | |
[0mFragment 5[0m | |
│ [0mBackendNum: 1[0m | |
│ [0mInstancePeakMemoryUsage: 34.622 MB, InstanceAllocatedMemoryUsage: 168.411 MB[0m | |
│ [0mPrepareTime: 367.390us[0m | |
└──[0mDATA_STREAM_SINK (id=2)[0m | |
│ [0mPartitionType: HASH_PARTITIONED[0m | |
│ [0mPartitionExprs: [34: acc_set][0m | |
└──[0mPROJECT (id=1) [0m | |
│ [0mEstimates: [row: ?, cpu: ?, memory: ?, network: ?, cost: ?][0m | |
│ [0mTotalTime: 34.850us (0.01%) [CPUTime: 34.850us][0m | |
│ [0mOutputRows: 71.245K (71245)[0m | |
│ [0mExpression: [34: acc_set, 42: debit_month][0m | |
└──[1m[31mOLAP_SCAN (id=0) [0m | |
[1m[31mEstimates: [row: 572171, cpu: 13589308.07, memory: 0.00, network: 0.00, cost: 6794654.03][0m | |
[1m[31mTotalTime: 102.443ms (41.54%) [CPUTime: 2.462ms, ScanTime: 99.980ms][0m | |
[1m[31mOutputRows: 71.245K (71245)[0m | |
[1m[31mRuntimeFilter: 71.245K (71245) -> 71.245K (71245) (0.00%)[0m | |
[1m[31mTable: : gz_balance[0m | |
[1m[31mSubordinateOperators: [0m | |
[1m[31mCHUNK_ACCUMULATE[0m | |
[1m[31mNOOP[0m | |
[1m[31mOLAP_SCAN_PREPARE[0m | |
[1m[31mDetail Timers: [ScanTime = IOTaskExecTime + IOTaskWaitTime][0m | |
[1m[31mIOTaskExecTime: 84.520ms [min=65.642ms, max=98.813ms][0m | |
[1m[31mSegmentRead: 69.156ms [min=53.675ms, max=82.214ms][0m | |
[1m[31mBlockFetch: 46.202ms [min=32.908ms, max=58.337ms][0m | |
[1m[31mIOTaskWaitTime: 2.098ms [min=343.760us, max=9.110ms][0m | |
[0m | |
可以看到走了分区消耗的内存更少 因为可以快速定位到指定分区 InstanceAllocatedMemoryUsage: 168.411 MB |