Skip to content

Instantly share code, notes, and snippets.

@dlangille
Last active January 11, 2016 20:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dlangille/58a414a260f4c6706d7e to your computer and use it in GitHub Desktop.
Save dlangille/58a414a260f4c6706d7e to your computer and use it in GitHub Desktop.
CREATE TABLE batch_testing (
FileIndex int,
JobId int,
Path varchar,
Name varchar,
LStat varchar,
Md5 varchar,
DeltaSeq smallint);
-- adjust 66 to be a jobid not already in use
INSERT INTO batch_testing (fileindex, jobid, path, name, lstat, md5, deltaseq)
SELECT F.fileindex,
66,
(SELECT P.path FROM path P WHERE P.pathid = F.pathid),
(SELECT FN.name FROM filename FN WHERE FN.filenameid = F.filenameid),
F.lstat,
F.md5,
F.deltaseq
FROM file F
WHERE F.jobid = 220384;
explain analyse
INSERT INTO File (FileIndex, JobId, PathId, FilenameId, LStat, MD5, DeltaSeq)
SELECT batch_testing.FileIndex, batch_testing.JobId, Path.PathId, Filename.FilenameId, batch_testing.LStat,
batch_testing.MD5, batch_testing.DeltaSeq
FROM batch_testing JOIN Path ON (batch_testing.Path = Path.Path)
JOIN Filename ON (batch_testing.Name = Filename.Name);
bacula-# INSERT INTO File (FileIndex, JobId, PathId, FilenameId, LStat, MD5, DeltaSeq)
bacula-# SELECT batch_testing.FileIndex, batch_testing.JobId, Path.PathId, Filename.FilenameId, batch_testing.LStat,
bacula-# batch_testing.MD5, batch_testing.DeltaSeq
bacula-# FROM batch_testing JOIN Path ON (batch_testing.Path = Path.Path)
bacula-# JOIN Filename ON (batch_testing.Name = Filename.Name);
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------
Insert on file (cost=609217.11..1086290.91 rows=2180160 width=82) (actual time=2694597.462..2694597.462 rows=0 loops=1)
-> Hash Join (cost=609217.11..1086290.91 rows=2180160 width=82) (actual time=12318.241..49475.263 rows=1576445 loops=1)
Hash Cond: ((batch_testing.name)::text = filename.name)
-> Hash Join (cost=224390.82..485373.22 rows=2180160 width=110) (actual time=5407.658..12573.282 rows=1576445 loops=1)
Hash Cond: ((batch_testing.path)::text = path.path)
-> Seq Scan on batch_testing (cost=0.00..67221.60 rows=2180160 width=138) (actual time=0.039..2030.204 rows=1576445 loops=1)
-> Hash (cost=108371.03..108371.03 rows=3986703 width=107) (actual time=4490.619..4490.619 rows=3985488 loops=1)
Buckets: 4096 Batches: 256 Memory Usage: 2180kB
-> Seq Scan on path (cost=0.00..108371.03 rows=3986703 width=107) (actual time=0.028..1219.400 rows=3985488 loops=1)
-> Hash (cost=181973.02..181973.02 rows=9528502 width=42) (actual time=6901.088..6901.088 rows=9531064 loops=1)
Buckets: 8192 Batches: 256 Memory Usage: 2771kB
-> Seq Scan on filename (cost=0.00..181973.02 rows=9528502 width=42) (actual time=0.031..1942.926 rows=9531064 loops=1)
Planning time: 9.673 ms
Execution time: 2694598.161 ms
(14 rows)
bacula=#
About 45 minutes.
explain analyze
INSERT INTO File (FileIndex, JobId, PathId, FilenameId, LStat, MD5, DeltaSeq)
SELECT BT.FileIndex,
BT.JobId,
(SELECT Path.PathId FROM Path WHERE Path.path = BT.Path),
(SELECT Filename.FilenameId FROM Filename WHERE Filename.name = BT.Name) ,
BT.LStat,
BT.MD5,
BT.DeltaSeq
FROM batch_testing BT;
bacula=# explain analyse
bacula-# INSERT INTO File (FileIndex, JobId, PathId, FilenameId, LStat, MD5, DeltaSeq)
bacula-# SELECT BT.FileIndex,
bacula-# BT.JobId,
bacula-# (SELECT Path.PathId FROM Path WHERE Path.path = BT.Path),
bacula-# (SELECT Filename.FilenameId FROM Filename WHERE Filename.name = BT.Name) ,
bacula-# BT.LStat,
bacula-# BT.MD5,
bacula-# BT.DeltaSeq
bacula-# FROM batch_testing BT;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------
Insert on file (cost=0.00..27127759.19 rows=1577995 width=199) (actual time=904398.940..904398.940 rows=0 loops=1)
-> Seq Scan on batch_testing bt (cost=0.00..27127759.19 rows=1577995 width=199) (actual time=6.484..67718.354 rows=1576445 loops=1)
SubPlan 1
-> Index Scan using path_name_idx on path (cost=0.56..8.57 rows=1 width=4) (actual time=0.011..0.011 rows=1 loops=1576445)
Index Cond: (path = (bt.path)::text)
SubPlan 2
-> Index Scan using filename_name_idx on filename (cost=0.56..8.58 rows=1 width=4) (actual time=0.013..0.014 rows=1 loops=1576445)
Index Cond: (name = (bt.name)::text)
Planning time: 0.204 ms
Execution time: 904399.021 ms
(10 rows)
bacula=#
15 minutes.
3x faster.
bacula=# \d path
Table "public.path"
Column | Type | Modifiers
--------+---------+-------------------------------------------------------
pathid | integer | not null default nextval('path_pathid_seq'::regclass)
path | text | not null
Indexes:
"path_pkey" PRIMARY KEY, btree (pathid)
"path_name_idx" btree (path)
bacula=# \d filename
Table "public.filename"
Column | Type | Modifiers
------------+---------+---------------------------------------------------------------
filenameid | integer | not null default nextval('filename_filenameid_seq'::regclass)
name | text | not null
Indexes:
"filename_pkey" PRIMARY KEY, btree (filenameid)
"filename_name_idx" btree (name)
bacula=#
bacula=# \d file
Table "public.file"
Column | Type | Modifiers
------------+----------+-------------------------------------------------------
fileid | bigint | not null default nextval('file_fileid_seq'::regclass)
fileindex | integer | not null default 0
jobid | integer | not null
pathid | integer | not null
markid | integer | not null default 0
lstat | text | not null
md5 | text | not null
filenameid | integer | not null
deltaseq | smallint | default 0
Indexes:
"file_pkey" PRIMARY KEY, btree (fileid)
"file_fileindex" btree (fileindex)
"file_filenameid_idx" btree (filenameid)
"file_jobid_fileindex" btree (jobid, fileindex)
"file_jobid_idx" btree (jobid)
"file_jpfid_idx" btree (jobid, pathid, filenameid)
"file_pathid_idx" btree (pathid)
bacula=#
#local_preload_libraries = ''
#session_preload_libraries = ''
#------------------------------------------------------------------------------
# LOCK MANAGEMENT
#------------------------------------------------------------------------------
#deadlock_timeout = 1s
#max_locks_per_transaction = 64 # min 10
# (change requires restart)
# Note: Each lock table slot uses ~270 bytes of shared memory, and there are
# max_locks_per_transaction * (max_connections + max_prepared_transactions)
# lock table slots.
#max_pred_locks_per_transaction = 64 # min 10
# (change requires restart)
#------------------------------------------------------------------------------
# VERSION/PLATFORM COMPATIBILITY
#------------------------------------------------------------------------------
# - Previous PostgreSQL Versions -
#array_nulls = on
#backslash_quote = safe_encoding # on, off, or safe_encoding
#default_with_oids = off
#escape_string_warning = on
#lo_compat_privileges = off
#quote_all_identifiers = off
#sql_inheritance = on
#standard_conforming_strings = on
#synchronize_seqscans = on
# - Other Platforms and Clients -
#transform_null_equals = off
#------------------------------------------------------------------------------
# ERROR HANDLING
#------------------------------------------------------------------------------
#exit_on_error = off # terminate session on any error?
#restart_after_crash = on # reinitialize after backend crash?
#------------------------------------------------------------------------------
# CONFIG FILE INCLUDES
#------------------------------------------------------------------------------
# These options allow settings to be loaded from files other than the
# default postgresql.conf.
#include_dir = 'conf.d' # include files ending in '.conf' from
# directory 'conf.d'
#include_if_exists = 'exists.conf' # include file only if it exists
#include = 'special.conf' # include file
#------------------------------------------------------------------------------
# CUSTOMIZED OPTIONS
#------------------------------------------------------------------------------
# Add settings for extensions here
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment