--CREATE LANGUAGE plpgsql; --CREATE LANGUAGE plperl; --CREATE LANGUAGE plpythonu; CREATE OR REPLACE FUNCTION random_string(length INT) RETURNS TEXT AS $$ DECLARE chars TEXT[] := '{a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z}'; result TEXT := ''; i INT := 0; BEGIN FOR i IN 1..length LOOP result := result || chars[1+random()*(array_length(chars, 1)-1)]; END LOOP; RETURN result; END; $$ LANGUAGE plpgsql; -- SELECT array_to_string(ARRAY(SELECT chr((65 + round(random() * 25))::int) -- FROM generate_series(1,CEIL(RANDOM()*100)::int)), ''); DROP TABLE rel; DROP TABLE child; DROP TABLE parent; -- no indexes, primary or foreign keys (yet...) CREATE TABLE rel ( parentId INT NOT NULL, childId INT NOT NULL, data VARCHAR(80) NOT NULL ); -- 2M of records: parentId from [1, 1000], childId from [1, 2000] -- random data from an iid distribution -- takes roughly 2.25 minutes on my machine INSERT INTO rel SELECT CEIL(RANDOM()*1000)::int, CEIL(RANDOM()*2000)::int, random_string(CEIL(RANDOM()*80)::int) FROM GENERATE_SERIES(1, 1000) s1(N1), GENERATE_SERIES(1, 2000) s2(N2); -- collect statistics VACUUM ANALYZE rel; -- total inserted records: 2000000 SELECT COUNT(*) FROM rel; -- first 100 inserted records: look at some data SELECT * FROM rel LIMIT 100; -- create parent table with unique parentIds from rel CREATE TABLE parent AS SELECT parentId, CEIL(RANDOM()*10000)::int AS data FROM (SELECT DISTINCT parentId FROM rel) AS parentIds; -- collect statistics VACUUM ANALYZE parent; -- total inserted records: 1000, most likely SELECT COUNT(*) FROM parent; -- create child table with unique childIds from rel CREATE TABLE child AS SELECT childId, CEIL(RANDOM()*20000)::int AS data FROM (SELECT DISTINCT childId FROM rel) AS childIds; -- collect statistics VACUUM ANALYZE child; -- total inserted records: 2000, most likely SELECT COUNT(*) FROM child; -- what do you expect to see? SELECT AVG(parentId) FROM rel; SELECT AVG(childId) FROM rel; SELECT AVG(parentId) FROM parent; SELECT AVG(childId) FROM child; -- ************************************************************ -- *** Cost of plans without indexes in place. -- ************************************************************ -- [cost: 15] seq scan EXPLAIN VERBOSE SELECT * FROM parent; -- [cost: 29] seq scan EXPLAIN VERBOSE SELECT * FROM child; -- [cost: 39K] seq scan EXPLAIN VERBOSE SELECT * FROM rel; -- [cost: 67] seq scan + sort EXPLAIN VERBOSE SELECT * FROM parent ORDER BY parentId; EXPLAIN VERBOSE SELECT * FROM parent ORDER BY data; EXPLAIN VERBOSE SELECT * FROM parent ORDER BY parentId, data; -- [cost: 143] seq scan + sort EXPLAIN VERBOSE SELECT * FROM child ORDER BY childId; EXPLAIN VERBOSE SELECT * FROM child ORDER BY data; EXPLAIN VERBOSE SELECT * FROM child ORDER BY childId, data; -- [cost: 448K] seq scan + sort EXPLAIN VERBOSE SELECT * FROM rel ORDER BY parentId; EXPLAIN VERBOSE SELECT * FROM rel ORDER BY childId; EXPLAIN VERBOSE SELECT * FROM rel ORDER BY data; EXPLAIN VERBOSE SELECT * FROM rel ORDER BY parentId, data; EXPLAIN VERBOSE SELECT * FROM rel ORDER BY parentId, childId, data; -- [cost: 17.50--20.00] all queries below: filter + seq scan EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId = 400; EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId <> 400; EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId > 400; EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId > 800; EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId BETWEEN 800 AND 900; EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId BETWEEN 200 AND 800; EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId > 800 or parentId < 200; EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId > 900 or parentId < 100; -- [cost: 34.00-39.00] all queries below: filter + seq scan EXPLAIN VERBOSE SELECT * FROM child WHERE childId = 400; EXPLAIN VERBOSE SELECT * FROM child WHERE childId <> 400; EXPLAIN VERBOSE SELECT * FROM child WHERE childId > 400; EXPLAIN VERBOSE SELECT * FROM child WHERE childId > 1800; EXPLAIN VERBOSE SELECT * FROM child WHERE childId BETWEEN 1800 AND 1900; EXPLAIN VERBOSE SELECT * FROM child WHERE childId BETWEEN 200 AND 1800; EXPLAIN VERBOSE SELECT * FROM child WHERE childId > 1800 or childId < 200; EXPLAIN VERBOSE SELECT * FROM child WHERE childId > 1900 or childId < 100; -- [cost: 44K--49K] all queries below: filter + seq scan EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId = 400; EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId <> 400; EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId > 400; EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId > 800; EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId BETWEEN 800 AND 900; EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId BETWEEN 200 AND 800; EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId > 800 or parentId < 200; EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId > 900 or parentId < 100; -- [cost: 44K--49K] all queries below: filter + seq scan EXPLAIN VERBOSE SELECT * FROM rel WHERE childId = 400; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId <> 400; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId > 400; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId > 1800; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId BETWEEN 1800 AND 1900; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId BETWEEN 200 AND 1800; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId > 1800 or childId < 200; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId > 1900 or childId < 100; -- [cost: 44K--49K] all queries below: filter + seq scan EXPLAIN VERBOSE SELECT * FROM rel WHERE data = 'ce'; EXPLAIN VERBOSE SELECT * FROM rel WHERE data <> 'ce'; EXPLAIN VERBOSE SELECT * FROM rel WHERE data > 'd'; EXPLAIN VERBOSE SELECT * FROM rel WHERE data > 'x'; EXPLAIN VERBOSE SELECT * FROM rel WHERE data BETWEEN 'a' AND 'b'; EXPLAIN VERBOSE SELECT * FROM rel WHERE data BETWEEN 'd' AND 'r'; EXPLAIN VERBOSE SELECT * FROM rel WHERE data > 'x' OR data < 'b'; EXPLAIN VERBOSE SELECT * FROM rel WHERE data > 'r' OR data < 'd'; -- [cost: 67K] hash join of (seq scan of rel) and (seq scan of parent) EXPLAIN VERBOSE SELECT rel.* FROM rel INNER JOIN parent ON (rel.parentId = parent.parentId) -- [cost: 47K] hash join of (seq scan of rel) and (filter + seq scan of parent) EXPLAIN VERBOSE SELECT rel.* FROM rel INNER JOIN parent ON (rel.parentId = parent.parentId) WHERE parent.data < 100; -- [cost: 47K] hash join of (seq scan of rel) and (filter + seq scan of parent) EXPLAIN VERBOSE SELECT rel.* FROM rel INNER JOIN parent ON (rel.parentId = parent.parentId) WHERE parent.data < 400; -- [cost: 69K] hash join of (seq scan of rel) and (seq scan of child) EXPLAIN VERBOSE SELECT rel.* FROM rel INNER JOIN child ON (rel.childId = child.childId) -- [cost: 47K] hash join of (seq scan of rel) and (filter + seq scan of child) EXPLAIN VERBOSE SELECT rel.* FROM rel INNER JOIN child ON (rel.childId = child.childId) WHERE child.data < 100 -- [cost: 47K] hash join of (seq scan of rel) and (filter + seq scan of child) EXPLAIN VERBOSE SELECT rel.* FROM rel INNER JOIN child ON (rel.childId = child.childId) WHERE child.data < 400 -- [cost: 55K] hash join of (seq scan of rel) and (hash join of (seq scan of child) and (seq scan of parent)) EXPLAIN VERBOSE SELECT * FROM parent NATURAL INNER JOIN child INNER JOIN rel ON (rel.parentId = parent.parentId AND rel.childId = child.childId); -- [cost: 44K] hash join of (filter + seq scan of rel) and (hash join of (seq scan of child) and (seq scan of parent)) EXPLAIN VERBOSE SELECT * FROM parent NATURAL INNER JOIN child INNER JOIN rel ON (rel.parentId = parent.parentId AND rel.childId = child.childId) WHERE rel.data < 'b'; -- [cost: 44K] hash join of (filter + seq scan of rel) and (hash join of (seq scan of parent) and (filter + seq scan of child)) EXPLAIN VERBOSE SELECT * FROM parent NATURAL INNER JOIN child INNER JOIN rel ON (rel.parentId = parent.parentId AND rel.childId = child.childId) WHERE rel.data < 'b' AND child.data < 100; -- [cost: 44K] hash join of (hash join of (filter + seq scan of rel) and (filter + seq scan of child)) and (filter + seq scan of parent) EXPLAIN VERBOSE SELECT * FROM parent NATURAL INNER JOIN child INNER JOIN rel ON (rel.parentId = parent.parentId AND rel.childId = child.childId) WHERE rel.data < 'b' AND child.data < 100 AND parent.data < 100; -- [cost: ~32B] scans all tuples of the materialed subquery for each tuple in rel EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId NOT IN (SELECT parentId FROM rel); -- [cost: 67K] hash left-join on (seq scan on rel) and (seq scan on parent) EXPLAIN VERBOSE SELECT rel.* FROM rel LEFT OUTER JOIN parent ON (rel.parentId = parent.parentId); -- running this query with ANALYZE, it is executed with profiling and the actual cost is 8.6K EXPLAIN ANALYZE VERBOSE SELECT rel.* FROM rel LEFT OUTER JOIN parent ON (rel.parentId = parent.parentId); -- [cost: 47K] hash anti-join on (seq scan on rel) and (seq scan on parent) EXPLAIN VERBOSE SELECT rel.* FROM rel LEFT OUTER JOIN parent ON (rel.parentId = parent.parentId) WHERE parent.parentId IS NULL; -- running this query with ANALYZE, it is executed with profiling and the actual cost is 5.8K EXPLAIN ANALYZE VERBOSE SELECT rel.* FROM rel LEFT OUTER JOIN parent ON (rel.parentId = parent.parentId) WHERE parent.parentId IS NULL; -- analogous results can be seen joining with the child table EXPLAIN VERBOSE SELECT * FROM rel WHERE childId NOT IN (SELECT childId FROM rel); EXPLAIN VERBOSE SELECT * FROM rel LEFT OUTER JOIN child ON (rel.childId = child.childId); EXPLAIN VERBOSE SELECT * FROM rel LEFT OUTER JOIN child ON (rel.childId = child.childId) WHERE child.childId IS NULL; -- ************************************************************ -- *** Create indexes to optimize the queries above. -- ************************************************************ -- before we create the PK for rel, make sure we *can* by dropping repeated tuples DELETE FROM rel WHERE (parentId, childId, data) IN (SELECT parentId, childId, data FROM rel GROUP BY parentId, childId, data HAVING COUNT(*) > 1); -- create the PKs and indexes below-- UNIQUE indexes are created automatically for PKs. -- Indexes use B-Trees by default. B-Tree indexes may be used to find one value, a range -- of values, or to resolve comparisons involving: <, <=, =, >=, >, NOT NULL, BETWEEN. -- For multiple columns, the index is most useful if the conditions involve the leading -- (leftmost) columns. In contrast, a hash index handles simple equality comparisons. ALTER TABLE parent ADD CONSTRAINT pkParent PRIMARY KEY (parentId); ALTER TABLE child ADD CONSTRAINT pkChild PRIMARY KEY (childId); ALTER TABLE rel ADD CONSTRAINT pkRel PRIMARY KEY (parentId, childId, data); --ALTER TABLE parent DROP CONSTRAINT pkParent; --ALTER TABLE child DROP CONSTRAINT pkChild; --ALTER TABLE rel DROP CONSTRAINT pkRel; -- These indexes will also prove useful in some of the queries. -- It is also wise to create indexes on columns that participate in joins -- and in constraints (foreign keys and check constraints, as for primary -- and unique constraints, indexes are created automatically). CREATE INDEX ixRelData ON rel(data); CREATE INDEX ixParentData ON parent(data); CREATE INDEX ixChildData ON child(data); --DROP INDEX ixRelData; --DROP INDEX ixParentData; --DROP INDEX ixChildData; -- The FKs below are never used in query planning, just for referential integrity. -- But in reality, constraints provide useful information for the query engine, so -- smart query engines can make assumptions based on FKs as well as check constraints. ALTER TABLE rel ADD CONSTRAINT fkRelChild FOREIGN KEY (childId) REFERENCES child(childId); ALTER TABLE rel ADD CONSTRAINT fkRelParent FOREIGN KEY (parentId) REFERENCES parent(parentId); -- ALTER TABLE rel DROP CONSTRAINT fkRelChild; -- ALTER TABLE rel DROP CONSTRAINT fkRelParent; -- ************************************************************ -- *** Cost of plans with indexes in place. Note that all of -- *** the improvements that I point out are in the worst case -- *** of the plan. This means that some times we do not see -- *** an improvement on the numbers. However, on average, the -- *** improvement does exist since the cost of queries will -- *** (roughly) the entire range of the cost estimate, while -- *** I used the more pessimistic cost estimate to illustrate -- *** the differences. -- ************************************************************ -- CREATE INDEX ixRelParentId ON rel(parentId); -- CREATE INDEX ixRelChildId ON rel(childId); -- [cost: 15] seq scan-- no change (expected) EXPLAIN VERBOSE SELECT * FROM parent; -- [cost: 29] seq scan-- no change (expected) EXPLAIN VERBOSE SELECT * FROM child; -- [cost: 39K] seq scan-- no change (expected) EXPLAIN VERBOSE SELECT * FROM rel; -- [cost: 55] index scan-- improved from 67 (20%) EXPLAIN VERBOSE SELECT * FROM parent ORDER BY parentId; EXPLAIN VERBOSE SELECT * FROM parent ORDER BY data; -- [cost: 67] seq scan + sort-- no change (expected) EXPLAIN VERBOSE SELECT * FROM parent ORDER BY parentId, data; -- [cost: 94] index scan-- improved from 143 (36%) EXPLAIN VERBOSE SELECT * FROM child ORDER BY childId; EXPLAIN VERBOSE SELECT * FROM child ORDER BY data; -- [cost: 143] seq scan + sort-- no change (expected) EXPLAIN VERBOSE SELECT * FROM child ORDER BY childId, data; -- [cost: 448K] seq scan + sort-- no change (unexpected-- pkRel is not being used!) EXPLAIN VERBOSE SELECT * FROM rel ORDER BY parentId; EXPLAIN VERBOSE SELECT * FROM rel ORDER BY childId; EXPLAIN VERBOSE SELECT * FROM rel ORDER BY data; EXPLAIN VERBOSE SELECT * FROM rel ORDER BY parentId, data; EXPLAIN VERBOSE SELECT * FROM rel ORDER BY parentId, childId, data; -- [cost: 8] index scan: improved from 17.50--20.00 (54%) EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId = 400; -- [cost: 17.50--20.00] filter + seq scan-- no change (expected) EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId <> 400; EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId > 400; -- [cost: 13] heap scan + index scan: improved from 17.50--20.00 (25%) EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId > 800; -- [cost: 11] heap scan + index scan: improved from 17.50--20.00 (37%) EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId BETWEEN 800 AND 900; -- [cost: 17.50--20.00] filter + seq scan-- no change (expected) EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId BETWEEN 200 AND 800; EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId > 800 or parentId < 200; -- [cost: 18] heap scan + bitmap or + 2 x index scan: improved from 20.00 (10%) EXPLAIN VERBOSE SELECT * FROM parent WHERE parentId > 900 or parentId < 100; -- [cost: 8] index scan: improved from 34.00-39.00 (76%) EXPLAIN VERBOSE SELECT * FROM child WHERE childId = 400; -- [cost: 34.00-39.00] filter + seq scan-- no change (expected) EXPLAIN VERBOSE SELECT * FROM child WHERE childId <> 400; EXPLAIN VERBOSE SELECT * FROM child WHERE childId > 400; -- [cost: 17.30] heap scan + index scan: improved from 34.00-39.00 (50%) EXPLAIN VERBOSE SELECT * FROM child WHERE childId > 1800; -- [cost: 15.78] heap scan + index scan: improved from 34.00-39.00 (55%) EXPLAIN VERBOSE SELECT * FROM child WHERE childId BETWEEN 1800 AND 1900; -- [cost: 34.00-39.00] filter + seq scan-- no change (expected) EXPLAIN VERBOSE SELECT * FROM child WHERE childId BETWEEN 200 AND 1800; -- [cost: 26] heap scan + bitmap or + 2 x index scan: improved from 34.00-39.00 (23%) EXPLAIN VERBOSE SELECT * FROM child WHERE childId > 1800 or childId < 200; -- [cost: 22] heap scan + bitmap or + 2 x index scan: improved from 34.00-39.00 (35%) EXPLAIN VERBOSE SELECT * FROM child WHERE childId > 1900 or childId < 100; -- [cost: 5.8K] heap scan + index scan: improved from 44K-49K (86%) EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId = 400; -- [cost: 44K--49K] filter + seq scan: no change (expected) EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId <> 400; EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId > 400; -- [cost: 41.8K] heap scan + index scan: improved from 44K-49K (5%) EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId > 800; -- [cost: 31.6K] heap scan + index scan: improved from 44K-49K (30%) EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId BETWEEN 800 AND 900; -- [cost: 44K--49K] filter + seq scan: no change (expected) EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId BETWEEN 200 AND 800; EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId > 800 or parentId < 200; -- [cost: 43K] heap scan + bitmap or + 2 x index scan: improved from 44K-49K (2.2%) EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId > 900 or parentId < 100; -- [cost: 44K--49K] filter + seq scan: no change (expected) EXPLAIN VERBOSE SELECT * FROM rel WHERE childId = 400; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId <> 400; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId > 400; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId > 1800; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId BETWEEN 1800 AND 1900; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId BETWEEN 200 AND 1800; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId > 1800 or childId < 200; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId > 1900 or childId < 100; -- After executing the last block of queries above, it is clear that the -- relation would benefit from an index on childId. -- [cost: 12] index scan: improved from 44K-49K (100%) EXPLAIN VERBOSE SELECT * FROM rel WHERE data = 'ce'; -- [cost: 44K--49K] filter + seq scan-- no change (expected) EXPLAIN VERBOSE SELECT * FROM rel WHERE data <> 'ce'; EXPLAIN VERBOSE SELECT * FROM rel WHERE data > 'd'; -- [cost: 30K] heap scan + index scan: improved from 44K-49K (32%) EXPLAIN VERBOSE SELECT * FROM rel WHERE data > 'x'; -- [cost: 22K] heap scan + index scan: improved from 44K-49K (50%) EXPLAIN VERBOSE SELECT * FROM rel WHERE data BETWEEN 'a' AND 'b'; -- [cost: 44K--49K] filter + seq scan-- no change (expected) EXPLAIN VERBOSE SELECT * FROM rel WHERE data BETWEEN 'd' AND 'r'; -- [cost: 32K] heap scan + bitmap or + 2 x index scan: improved from 44K-49K (27%) EXPLAIN VERBOSE SELECT * FROM rel WHERE data > 'x' OR data < 'b'; -- [cost: 44K--49K] filter + seq scan-- no change (expected) EXPLAIN VERBOSE SELECT * FROM rel WHERE data > 'r' OR data < 'd'; -- [cost: 67K] hash join of (seq scan of rel) and (seq scan of parent)-- no change (expected) EXPLAIN VERBOSE SELECT rel.* FROM rel INNER JOIN parent ON (rel.parentId = parent.parentId) -- [cost: 45K] nested loop of (filter + seq scan of parent) and (heap scan + index scan of pkRel)-- improvement from 47K (4%) EXPLAIN VERBOSE SELECT rel.* FROM rel INNER JOIN parent ON (rel.parentId = parent.parentId) WHERE parent.data < 100; -- [cost: 47K] hash join of (seq scan of rel) and (heap scan + index scan of ixParentData)-- no *cost* change (expected) EXPLAIN VERBOSE SELECT rel.* FROM rel INNER JOIN parent ON (rel.parentId = parent.parentId) WHERE parent.data < 400; -- [cost: 69K] hash join of (seq scan of rel) and (seq scan of child)-- no change (expected) EXPLAIN VERBOSE SELECT rel.* FROM rel INNER JOIN child ON (rel.childId = child.childId) -- ONE index used, but depending on selectivity, cost may be quite good -- [cost: 47K] hash join of (seq scan of rel) and (heap scan + index scan of ixChildData)-- no *cost* change (expected) EXPLAIN VERBOSE SELECT rel.* FROM rel INNER JOIN child ON (rel.childId = child.childId) WHERE child.data < 100 -- running this query with ANALYZE, it is executed with profiling and the actual cost is 5.6K (88% improvement) EXPLAIN ANALYZE VERBOSE SELECT rel.* FROM rel INNER JOIN child ON (rel.childId = child.childId) WHERE child.data < 100 -- ONE index used, but poor selectivity -- [cost: 47K] hash join of (seq scan of rel) and (heap scan + index scan of ixChildData)-- no *cost* change (expected) EXPLAIN VERBOSE SELECT rel.* FROM rel INNER JOIN child ON (rel.childId = child.childId) WHERE child.data < 400 -- running this query with ANALYZE, it is executed with profiling and the actual cost is 5.6K (88% improvement) EXPLAIN ANALYZE VERBOSE SELECT rel.* FROM rel INNER JOIN child ON (rel.childId = child.childId) WHERE child.data < 400 -- ONE index used -- [cost: 9.3K] nested loop of (hash join of (seq scan of child) and (seq scan of parent)) and (index scan of pkRel)-- improved from 55K (81%) EXPLAIN VERBOSE SELECT * FROM parent NATURAL INNER JOIN child INNER JOIN rel ON (rel.parentId = parent.parentId AND rel.childId = child.childId); -- running this query with ANALYZE, it is executed with profiling and the actual cost is 13 (100% improvement) EXPLAIN ANALYZE VERBOSE SELECT * FROM parent NATURAL INNER JOIN child INNER JOIN rel ON (rel.parentId = parent.parentId AND rel.childId = child.childId); -- ONE index used -- [cost: 9.3K] nested loop of (hash join of (seq scan of child) and (seq scan of parent)) and (index scan of pkRel)-- improved from 44K (79%) EXPLAIN VERBOSE SELECT * FROM parent NATURAL INNER JOIN child INNER JOIN rel ON (rel.parentId = parent.parentId AND rel.childId = child.childId) WHERE rel.data < 'b'; -- running this query with ANALYZE, it is executed with profiling and the actual cost is 10 (100% improvement) EXPLAIN ANALYZE VERBOSE SELECT * FROM parent NATURAL INNER JOIN child INNER JOIN rel ON (rel.parentId = parent.parentId AND rel.childId = child.childId) WHERE rel.data < 'b'; -- THREE indexes used -- [cost: 98] nested loop of (hash join of (seq scan of parent) and (hash join of (heap scan + index scan of ixChildData)) and (index scan of pkRel)-- improved from 44K (100%) EXPLAIN VERBOSE SELECT * FROM parent NATURAL INNER JOIN child INNER JOIN rel ON (rel.parentId = parent.parentId AND rel.childId = child.childId) WHERE rel.data < 'b' AND child.data < 100; -- THREE indexes used -- [cost: 32] nested loop of (hash join of (heap scan + index scan of ixChildData) and (heap scan + index scan of ixParentData)) and (index scan of pkRel)-- improved from 44K (100%) EXPLAIN VERBOSE SELECT * FROM parent NATURAL INNER JOIN child INNER JOIN rel ON (rel.parentId = parent.parentId AND rel.childId = child.childId) WHERE rel.data < 'b' AND child.data < 100 AND parent.data < 100; -- [cost: ~32B] no plan change (expected) EXPLAIN VERBOSE SELECT * FROM rel WHERE parentId NOT IN (SELECT parentId FROM rel); -- [cost: 67K] no plan change (expected) EXPLAIN VERBOSE SELECT rel.* FROM rel LEFT OUTER JOIN parent ON (rel.parentId = parent.parentId); -- [cost: 47K] no plan change (expected) EXPLAIN VERBOSE SELECT rel.* FROM rel LEFT OUTER JOIN parent ON (rel.parentId = parent.parentId) WHERE parent.parentId IS NULL; -- analogous results can be seen joining with the child table EXPLAIN VERBOSE SELECT * FROM rel WHERE childId NOT IN (SELECT childId FROM rel); EXPLAIN VERBOSE SELECT * FROM rel LEFT OUTER JOIN child ON (rel.childId = child.childId); EXPLAIN VERBOSE SELECT * FROM rel LEFT OUTER JOIN child ON (rel.childId = child.childId) WHERE child.childId IS NULL; -- ************************************************************ -- *** Create more indexes to optimize the queries above. -- ************************************************************ -- The need for this index was observed above. CREATE INDEX ixRelChildId ON rel(childId); -- [cost: 3.1K] heap scan + index scan: improved from 44K-49K (93%) EXPLAIN VERBOSE SELECT * FROM rel WHERE childId = 400; -- [cost: 44K--49K] filter + seq scan: no change (expected) EXPLAIN VERBOSE SELECT * FROM rel WHERE childId <> 400; EXPLAIN VERBOSE SELECT * FROM rel WHERE childId > 400; -- [cost: 25.3K] heap scan + index scan: improved from 44K-49K (44%) EXPLAIN VERBOSE SELECT * FROM rel WHERE childId > 1800; -- [cost: 23K] heap scan + index scan: improved from 44K-49K (48%) EXPLAIN VERBOSE SELECT * FROM rel WHERE childId BETWEEN 1800 AND 1900; -- [cost: 44K--49K] filter + seq scan: no change (expected) EXPLAIN VERBOSE SELECT * FROM rel WHERE childId BETWEEN 200 AND 1800; -- [cost: 32K] heap scan + bitmap or + 2x index scan: improved from 44K-49K (27%) EXPLAIN VERBOSE SELECT * FROM rel WHERE childId > 1800 or childId < 200; -- [cost: 25.8K] heap scan + index scan: improved from 44K-49K (41%) EXPLAIN VERBOSE SELECT * FROM rel WHERE childId > 1900 or childId < 100; -- ************************************************************ -- *** Some final thoughts. -- *** In the end, remember that the size of the results you -- *** are returning impose a hard limit on how much you can -- *** optimize-- if you return 2 million rows, your query -- *** should take long! That said, it is important to get -- *** comfortable with the existing features of the DBMS you -- *** are using-- what kinds of indexes are available? when -- *** are they used? how do they compare (cost/benefit)? It -- *** is also important to know the types of queries your -- *** DBMS will have to tackle on a regular basis. Optimize -- *** for the more common and important queries. Finally, -- *** make sure you update your index statistics regularly. -- *** Most databases provide some form of (semi-)automatic -- *** tool to perform such updates. In PostgreSQL, it is the -- *** Autovacuum feature. -- ************************************************************