PostgreSQL の実行計画の例
このページは、テストデータを用いて PostgreSQL の実行計画の検証を行った結果を記載するページです。
目次
注意
- 常に同じ結果・速度になるわけではありません (ある程度の参考のために記載しています)
前提条件
SELECT version();
-- PostgreSQL 13.4, compiled by Visual C++ build 1914, 64-bit
データは下記のデータを用います。
- 部署マスタ (部署ID, 部署名) - 100件
- 部署ID = bigserial PRIMARY KEY
- 部署名 = varchar(255)
- 従業員マスタ (従業員ID, 社員名, 性別, 部署ID) - 100万件
- 従業員ID = bigserial PRIMARY KEY
- 社員名 = varchar(255) (インデックスあり)
- 性別 = integer (1 = 男性, 2 = 女性。インデックスあり)
- 部署ID = bigint (部署マスタ.部署ID の外部キー制約。インデックスあり)
テストデータ
-- 部署マスタ定義
DROP TABLE IF EXISTS dept CASCADE;
CREATE TABLE dept (
id bigserial PRIMARY KEY, -- 部署ID
dept_name varchar(255) -- 部署名
);
-- 100件分の部署データを登録
INSERT INTO dept
SELECT
i,
'dept' || TO_CHAR(i, 'FM000') -- dept001 ~ dept100
FROM generate_series(1, 100) AS i;
-- 従業員マスタ定義
DROP TABLE IF EXISTS emp CASCADE;
CREATE TABLE emp (
id bigserial PRIMARY KEY, -- 従業員ID
emp_name varchar(255), -- 社員名
gender integer, -- 性別 (1 = 男性, 2 = 女性)
dept_id bigint REFERENCES dept(id) -- 部署ID
);
CREATE INDEX emp_name_idx ON emp(emp_name); -- 社員名へのインデックス
CREATE INDEX emp_gender_idx ON emp(gender); -- 性別へのインデックス
CREATE INDEX emp_dept_id_idx ON emp(dept_id); -- 部署IDへのインデックス
-- 100万件分の従業員データを登録
INSERT INTO emp
SELECT
i,
'emp' || TO_CHAR(i, 'FM0000000'), -- emp0000001 ~ emp1000000
CAST(FLOOR(random() * 10) AS numeric) % 2 + 1, -- 1, 2
CAST(FLOOR(random() * 100) AS numeric) + 1 -- 1 ~ 100
FROM generate_series(1, 1000000) AS i;
UPDATE emp SET dept_id = NULL WHERE id % 10000 = 0; -- 一部の従業員を部署未所属にする (100件)
UPDATE emp SET emp_name = 'doumei' WHERE id % 10000 = 0; -- 一部の従業員名を同名にする (100件)
クエリの例と結果
一般的なクエリ
EXPLAIN ANALYZE SELECT * FROM emp;
"Seq Scan on emp (cost=0.00..17355.00 rows=1000000 width=30) (actual time=0.011..58.786 rows=1000000 loops=1)"
"Planning Time: 0.068 ms"
"Execution Time: 83.593 ms"
EXPLAIN ANALYZE SELECT * FROM emp WHERE id = 1000;
"Index Scan using emp_pkey on emp (cost=0.42..8.44 rows=1 width=30) (actual time=0.014..0.015 rows=1 loops=1)"
" Index Cond: (id = 1000)"
"Planning Time: 0.100 ms"
"Execution Time: 0.027 ms"
EXPLAIN ANALYZE SELECT * FROM emp WHERE id <> 1000;
"Seq Scan on emp (cost=0.00..19855.00 rows=999999 width=30) (actual time=0.013..98.106 rows=999999 loops=1)"
" Filter: (id <> 1000)"
" Rows Removed by Filter: 1"
"Planning Time: 0.126 ms"
"Execution Time: 121.208 ms"
OR と IN (主キー検索の比較)
EXPLAIN ANALYZE SELECT * FROM emp WHERE id = 1000 OR id = 2000 OR id = 3000;
"Bitmap Heap Scan on emp (cost=13.30..25.17 rows=3 width=30) (actual time=0.028..0.030 rows=3 loops=1)"
" Recheck Cond: ((id = 1000) OR (id = 2000) OR (id = 3000))"
" Heap Blocks: exact=3"
" -> BitmapOr (cost=13.30..13.30 rows=3 width=0) (actual time=0.023..0.023 rows=0 loops=1)"
" -> Bitmap Index Scan on emp_pkey (cost=0.00..4.43 rows=1 width=0) (actual time=0.013..0.013 rows=1 loops=1)"
" Index Cond: (id = 1000)"
" -> Bitmap Index Scan on emp_pkey (cost=0.00..4.43 rows=1 width=0) (actual time=0.006..0.006 rows=1 loops=1)"
" Index Cond: (id = 2000)"
" -> Bitmap Index Scan on emp_pkey (cost=0.00..4.43 rows=1 width=0) (actual time=0.003..0.003 rows=1 loops=1)"
" Index Cond: (id = 3000)"
"Planning Time: 0.105 ms"
"Execution Time: 0.055 ms"
EXPLAIN ANALYZE SELECT * FROM emp WHERE id IN (1000, 2000, 3000);
"Index Scan using emp_pkey on emp (cost=0.42..17.33 rows=3 width=30) (actual time=0.023..0.030 rows=3 loops=1)"
" Index Cond: (id = ANY ('{1000,2000,3000}'::bigint[]))"
"Planning Time: 0.143 ms"
"Execution Time: 0.042 ms"
- IN にしたほうが Index Scan のみになりコストが低い
IN と EXISTS
EXPLAIN ANALYZE SELECT * FROM emp WHERE dept_id IN (SELECT id FROM dept);
"Hash Join (cost=3.25..20094.48 rows=1000000 width=30) (actual time=0.040..273.272 rows=999900 loops=1)"
" Hash Cond: (emp.dept_id = dept.id)"
" -> Seq Scan on emp (cost=0.00..17355.00 rows=1000000 width=30) (actual time=0.011..58.023 rows=1000000 loops=1)"
" -> Hash (cost=2.00..2.00 rows=100 width=8) (actual time=0.024..0.025 rows=100 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 12kB"
" -> Seq Scan on dept (cost=0.00..2.00 rows=100 width=8) (actual time=0.007..0.012 rows=100 loops=1)"
"Planning Time: 0.346 ms"
"Execution Time: 296.546 ms"
EXPLAIN ANALYZE SELECT * FROM emp WHERE EXISTS (SELECT 1 FROM dept WHERE dept.id = emp.dept_id);
"Hash Join (cost=3.25..20094.48 rows=1000000 width=30) (actual time=0.039..275.049 rows=999900 loops=1)"
" Hash Cond: (emp.dept_id = dept.id)"
" -> Seq Scan on emp (cost=0.00..17355.00 rows=1000000 width=30) (actual time=0.011..58.257 rows=1000000 loops=1)"
" -> Hash (cost=2.00..2.00 rows=100 width=8) (actual time=0.023..0.025 rows=100 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 12kB"
" -> Seq Scan on dept (cost=0.00..2.00 rows=100 width=8) (actual time=0.007..0.013 rows=100 loops=1)"
"Planning Time: 0.182 ms"
"Execution Time: 298.490 ms"
- このケースでは違いは特にない
インデックスが設定されている項目の検索
EXPLAIN ANALYZE SELECT * FROM emp WHERE gender = 1;
"Bitmap Heap Scan on emp (cost=5344.89..18823.23 rows=489867 width=30) (actual time=18.253..84.765 rows=499445 loops=1)"
" Recheck Cond: (gender = 1)"
" Heap Blocks: exact=7355"
" -> Bitmap Index Scan on emp_gender_idx (cost=0.00..5222.43 rows=489867 width=0) (actual time=17.403..17.403 rows=499445 loops=1)"
" Index Cond: (gender = 1)"
"Planning Time: 0.174 ms"
"Execution Time: 101.507 ms"
EXPLAIN ANALYZE SELECT * FROM emp WHERE emp_name = 'emp0000001';
"Index Scan using emp_name_idx on emp (cost=0.42..8.44 rows=1 width=30) (actual time=0.015..0.015 rows=1 loops=1)"
" Index Cond: ((emp_name)::text = 'emp0000001'::text)"
"Planning Time: 0.098 ms"
"Execution Time: 0.028 ms"
EXPLAIN ANALYZE SELECT * FROM emp WHERE emp_name = 'doumei';
"Index Scan using emp_name_idx on emp (cost=0.42..8.44 rows=1 width=30) (actual time=0.017..0.029 rows=100 loops=1)"
" Index Cond: ((emp_name)::text = 'doumei'::text)"
"Planning Time: 0.090 ms"
"Execution Time: 0.042 ms"
EXPLAIN ANALYZE SELECT * FROM emp WHERE dept_id = 100;
"Bitmap Heap Scan on emp (cost=121.42..7969.21 rows=10967 width=30) (actual time=1.561..8.253 rows=10144 loops=1)"
" Recheck Cond: (dept_id = 100)"
" Heap Blocks: exact=5468"
" -> Bitmap Index Scan on emp_dept_id_idx (cost=0.00..118.68 rows=10967 width=0) (actual time=1.043..1.044 rows=10144 loops=1)"
" Index Cond: (dept_id = 100)"
"Planning Time: 0.096 ms"
"Execution Time: 8.725 ms"
- インデックスを設定していても、区分の種類やデータ量によってうまく使えるケースと使えないケース (使っても意味がない) がある (Seq Scan になることもある)
JOIN (LEFT JOIN, INNER JOIN の比較)
(従業員マスタと部署マスタを結合するクエリの比較。部署に未所属の従業員が100名いるため、件数は INNER JOIN が100件少なくなる)
EXPLAIN ANALYZE SELECT emp.emp_name, dept.dept_name FROM emp LEFT JOIN dept ON emp.dept_id = dept.id
"Hash Left Join (cost=3.25..20094.48 rows=1000000 width=18) (actual time=0.077..271.505 rows=1000000 loops=1)"
" Hash Cond: (emp.dept_id = dept.id)"
" -> Seq Scan on emp (cost=0.00..17355.00 rows=1000000 width=18) (actual time=0.013..57.751 rows=1000000 loops=1)"
" -> Hash (cost=2.00..2.00 rows=100 width=16) (actual time=0.058..0.060 rows=100 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 13kB"
" -> Seq Scan on dept (cost=0.00..2.00 rows=100 width=16) (actual time=0.010..0.027 rows=100 loops=1)"
"Planning Time: 0.262 ms"
"Execution Time: 294.266 ms"
EXPLAIN ANALYZE SELECT emp.emp_name, dept.dept_name FROM emp INNER JOIN dept ON emp.dept_id = dept.id
"Hash Join (cost=3.25..20094.48 rows=1000000 width=18) (actual time=0.056..279.061 rows=999900 loops=1)"
" Hash Cond: (emp.dept_id = dept.id)"
" -> Seq Scan on emp (cost=0.00..17355.00 rows=1000000 width=18) (actual time=0.011..56.856 rows=1000000 loops=1)"
" -> Hash (cost=2.00..2.00 rows=100 width=16) (actual time=0.039..0.040 rows=100 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 13kB"
" -> Seq Scan on dept (cost=0.00..2.00 rows=100 width=16) (actual time=0.011..0.023 rows=100 loops=1)"
"Planning Time: 0.209 ms"
"Execution Time: 301.998 ms"
- このケースでは特に実行計画に違いがない
COUNT() の比較
EXPLAIN ANALYZE SELECT COUNT(*) FROM emp;
"Finalize Aggregate (cost=13563.55..13563.56 rows=1 width=8) (actual time=75.110..81.125 rows=1 loops=1)"
" -> Gather (cost=13563.33..13563.54 rows=2 width=8) (actual time=74.966..81.118 rows=3 loops=1)"
" Workers Planned: 2"
" Workers Launched: 2"
" -> Partial Aggregate (cost=12563.33..12563.34 rows=1 width=8) (actual time=51.109..51.110 rows=1 loops=3)"
" -> Parallel Seq Scan on emp (cost=0.00..11521.67 rows=416667 width=0) (actual time=0.013..30.551 rows=333333 loops=3)"
"Planning Time: 0.129 ms"
"Execution Time: 81.160 ms"
EXPLAIN ANALYZE SELECT COUNT(*) FROM emp WHERE dept_id = 1;
"Aggregate (cost=237.09..237.09 rows=1 width=8) (actual time=0.889..0.890 rows=1 loops=1)"
" -> Index Only Scan using emp_dept_id_idx on emp (cost=0.42..212.00 rows=10033 width=0) (actual time=0.017..0.581 rows=9912 loops=1)"
" Index Cond: (dept_id = 1)"
" Heap Fetches: 0"
"Planning Time: 0.103 ms"
"Execution Time: 0.915 ms"
EXPLAIN ANALYZE SELECT COUNT(id) FROM emp WHERE dept_id = 1;
"Aggregate (cost=8009.11..8009.12 rows=1 width=8) (actual time=10.138..10.139 rows=1 loops=1)"
" -> Bitmap Heap Scan on emp (cost=114.18..7984.03 rows=10033 width=8) (actual time=1.721..9.105 rows=9912 loops=1)"
" Recheck Cond: (dept_id = 1)"
" Heap Blocks: exact=5444"
" -> Bitmap Index Scan on emp_dept_id_idx (cost=0.00..111.67 rows=10033 width=0) (actual time=1.050..1.050 rows=9912 loops=1)"
" Index Cond: (dept_id = 1)"
"Planning Time: 0.096 ms"
"Execution Time: 10.307 ms"
- インデックスが使えるケースでも、
COUNT
の違いで実行計画に違いが出るCOUNT(*)
はインデックスの情報だけで件数が判断できるので Index Only Scan になるCOUNT(項目)
は項目の値が NULL かどうかで件数が変わるため、Index Only Scan にできない (インデックスを確認した後に項目の値を確認しにいく必要がある)
別テーブルの件数取得クエリの比較
(「部署名, 部署ごとの従業員数」を算出するクエリの比較)
EXPLAIN ANALYZE SELECT dept_name, (SELECT COUNT(*) FROM emp WHERE emp.dept_id = dept.id) FROM dept;
"Seq Scan on dept (cost=0.00..23641.50 rows=100 width=16) (actual time=1.936..103.614 rows=100 loops=1)"
" SubPlan 1"
" -> Aggregate (cost=236.39..236.40 rows=1 width=8) (actual time=1.034..1.034 rows=1 loops=100)"
" -> Index Only Scan using emp_dept_id_idx on emp (cost=0.42..211.39 rows=9998 width=0) (actual time=0.005..0.631 rows=9999 loops=100)"
" Index Cond: (dept_id = dept.id)"
" Heap Fetches: 0"
"Planning Time: 0.176 ms"
"Execution Time: 103.692 ms"
EXPLAIN ANALYZE SELECT dept.dept_name, dept_emp.cnt
FROM dept
INNER JOIN (SELECT dept_id, COUNT(*) AS cnt FROM emp GROUP BY dept_id) AS dept_emp ON dept.id = dept_emp.dept_id;
"Hash Join (cost=14612.60..14639.21 rows=100 width=16) (actual time=117.584..123.064 rows=100 loops=1)"
" Hash Cond: (emp.dept_id = dept.id)"
" -> Finalize GroupAggregate (cost=14609.35..14634.68 rows=100 width=16) (actual time=117.531..122.987 rows=101 loops=1)"
" Group Key: emp.dept_id"
" -> Gather Merge (cost=14609.35..14632.68 rows=200 width=16) (actual time=117.526..122.948 rows=302 loops=1)"
" Workers Planned: 2"
" Workers Launched: 2"
" -> Sort (cost=13609.32..13609.57 rows=100 width=16) (actual time=94.078..94.081 rows=101 loops=3)"
" Sort Key: emp.dept_id"
" Sort Method: quicksort Memory: 29kB"
" Worker 0: Sort Method: quicksort Memory: 29kB"
" Worker 1: Sort Method: quicksort Memory: 29kB"
" -> Partial HashAggregate (cost=13605.00..13606.00 rows=100 width=16) (actual time=93.990..94.005 rows=101 loops=3)"
" Group Key: emp.dept_id"
" Batches: 1 Memory Usage: 32kB"
" Worker 0: Batches: 1 Memory Usage: 32kB"
" Worker 1: Batches: 1 Memory Usage: 32kB"
" -> Parallel Seq Scan on emp (cost=0.00..11521.67 rows=416667 width=8) (actual time=0.232..35.434 rows=333333 loops=3)"
" -> Hash (cost=2.00..2.00 rows=100 width=16) (actual time=0.045..0.046 rows=100 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 13kB"
" -> Seq Scan on dept (cost=0.00..2.00 rows=100 width=16) (actual time=0.018..0.029 rows=100 loops=1)"
"Planning Time: 0.201 ms"
"Execution Time: 123.126 ms"
- ①のケースでは「部署マスタの100件 × 従業員マスタの件数取得 (SubPlan 1) の合計コスト ≒ 全体の合計コスト」になっているが、実際の速度的には②と大差がない (少し速い)
- 部署マスタを1万件に増やした場合でも、速度的には①のほうが少し速い