Skip to content

Instantly share code, notes, and snippets.

@stompro
Created July 7, 2017 21:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stompro/a4f34f6e25f56cc5847dade9e5c80d36 to your computer and use it in GitHub Desktop.
Save stompro/a4f34f6e25f56cc5847dade9e5c80d36 to your computer and use it in GitHub Desktop.
super simple record changes
-- Check max length after super simple changes
select x.*, length(x.title) from (
SELECT r.id,
r.fingerprint,
r.quality,
r.tcn_source,
r.tcn_value,
FIRST(title.value) AS title,
FIRST(author.value) AS author,
STRING_AGG(DISTINCT publisher.value, ', ') AS publisher,
STRING_AGG(DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$), ', ') AS pubdate,
CASE WHEN ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) = '{NULL}'
THEN NULL
ELSE ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') )
END AS isbn,
CASE WHEN ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) = '{NULL}'
THEN NULL
ELSE ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') )
END AS issn
FROM biblio.record_entry r
LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a')
LEFT JOIN metabib.full_rec title_np ON (r.id = title_np.record AND title_np.tag = '245' AND title_np.subfield IN ('p','n'))
LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a')
LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND (publisher.tag = '260' OR (publisher.tag = '264' AND publisher.ind2 = '1')) AND publisher.subfield = 'b')
LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND (pubdate.tag = '260' OR (pubdate.tag = '264' AND pubdate.ind2 = '1')) AND pubdate.subfield = 'c')
LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z'))
LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a')
where not r.deleted
GROUP BY 1,2,3,4,5
) x
where length(x.title) > 60
and x.tcn_source!='';
select x.*, length(x.title) from (
SELECT r.id,
r.fingerprint,
r.quality,
r.tcn_source,
r.tcn_value,
CONCAT_WS(' ', FIRST(title.value),string_agg(distinct title_np.value,' ' order by title_np.value)) AS title,
FIRST(author.value) AS author,
STRING_AGG(DISTINCT publisher.value, ', ') AS publisher,
STRING_AGG(DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$), ', ') AS pubdate,
CASE WHEN ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) = '{NULL}'
THEN NULL
ELSE ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') )
END AS isbn,
CASE WHEN ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) = '{NULL}'
THEN NULL
ELSE ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') )
END AS issn
FROM biblio.record_entry r
LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a')
LEFT JOIN metabib.full_rec title_np ON (r.id = title_np.record AND title_np.tag = '245' AND title_np.subfield IN ('p','n'))
LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a')
LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND (publisher.tag = '260' OR (publisher.tag = '264' AND publisher.ind2 = '1')) AND publisher.subfield = 'b')
LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND (pubdate.tag = '260' OR (pubdate.tag = '264' AND pubdate.ind2 = '1')) AND pubdate.subfield = 'c')
LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z'))
LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a')
where not r.deleted
GROUP BY 1,2,3,4,5
) x
where length(x.title) > 60
and x.tcn_source!='';
-- try using a subquery join instead
explain analyse
SELECT r.id,
r.fingerprint,
r.quality,
r.tcn_source,
r.tcn_value,
CONCAT_WS(' ', FIRST(title.value),FIRST(title_np.val)) AS title,
FIRST(author.value) AS author,
STRING_AGG(DISTINCT publisher.value, ', ') AS publisher,
STRING_AGG(DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$), ', ') AS pubdate,
CASE WHEN ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) = '{NULL}'
THEN NULL
ELSE ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') )
END AS isbn,
CASE WHEN ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) = '{NULL}'
THEN NULL
ELSE ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') )
END AS issn
FROM biblio.record_entry r
LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a')
LEFT JOIN (
select b.record, string_agg(val, ' ') as val from (
select -- string_agg(' ',title_np.value)
title_np.record, title_np.value as val
from metabib.full_rec title_np
where
--title_np.record=96961
title_np.tag = '245'
AND title_np.subfield IN ('p','n')
--group by title_np.record
order by title_np.id
) b
group by 1 ) title_np on (title_np.record=r.id)
LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a')
LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND (publisher.tag = '260' OR (publisher.tag = '264' AND publisher.ind2 = '1')) AND publisher.subfield = 'b')
LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND (pubdate.tag = '260' OR (pubdate.tag = '264' AND pubdate.ind2 = '1')) AND pubdate.subfield = 'c')
LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z'))
LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a')
where not r.deleted
--and publisher.value~*'viz'
GROUP BY 1,2,3,4,5
;
select b.record, string_agg(val, ' ') as val from (
select -- string_agg(' ',title_np.value)
title_np.record, title_np.value as val
from metabib.full_rec title_np
where
--title_np.record=96961
title_np.tag = '245'
AND title_np.subfield IN ('p','n')
--group by title_np.record
order by title_np.id
) b
group by 1
;
explain analyse
SELECT r.id,
r.fingerprint,
r.quality,
r.tcn_source,
r.tcn_value,
FIRST(title.value) AS title,
FIRST(author.value) AS author,
STRING_AGG(DISTINCT publisher.value, ', ') AS publisher,
STRING_AGG(DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$), ', ') AS pubdate,
CASE WHEN ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) = '{NULL}'
THEN NULL
ELSE ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') )
END AS isbn,
CASE WHEN ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) = '{NULL}'
THEN NULL
ELSE ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') )
END AS issn
FROM biblio.record_entry r
LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a')
LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a')
LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND (publisher.tag = '260' OR (publisher.tag = '264' AND publisher.ind2 = '1')) AND publisher.subfield = 'b')
LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND (pubdate.tag = '260' OR (pubdate.tag = '264' AND pubdate.ind2 = '1')) AND pubdate.subfield = 'c')
LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z'))
LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a')
where not r.deleted
GROUP BY 1,2,3,4,5;
explain analyse
SELECT r.id,
r.fingerprint,
r.quality,
r.tcn_source,
r.tcn_value,
CONCAT_WS(' ', FIRST(title.value),string_agg(distinct title_np.value,' ' order by title_np.value)) AS title,
FIRST(author.value) AS author,
STRING_AGG(DISTINCT publisher.value, ', ') AS publisher,
STRING_AGG(DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$), ', ') AS pubdate,
CASE WHEN ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) = '{NULL}'
THEN NULL
ELSE ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') )
END AS isbn,
CASE WHEN ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) = '{NULL}'
THEN NULL
ELSE ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') )
END AS issn
FROM biblio.record_entry r
LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a')
LEFT JOIN metabib.full_rec title_np ON (r.id = title_np.record AND title_np.tag = '245' AND title_np.subfield IN ('p','n'))
LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a')
LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND (publisher.tag = '260' OR (publisher.tag = '264' AND publisher.ind2 = '1')) AND publisher.subfield = 'b')
LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND (pubdate.tag = '260' OR (pubdate.tag = '264' AND pubdate.ind2 = '1')) AND pubdate.subfield = 'c')
LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z'))
LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a')
where not r.deleted
--and publisher.value~*'viz'
GROUP BY 1,2,3,4,5
;
-- try using a subquery instead
--explain analyse
SELECT r.id,
r.fingerprint,
r.quality,
r.tcn_source,
r.tcn_value,
CONCAT_WS(' ', FIRST(title.value),(
-- Include 245 n and p subfields in the order they are entered.
SELECT string_agg(val, ' ') FROM (
SELECT title_np.value AS val
FROM metabib.full_rec title_np
WHERE
title_np.record=r.id
AND title_np.tag = '245'
AND title_np.subfield IN ('p','n')
ORDER BY title_np.id
) title_np
)
) AS title,
FIRST(author.value) AS author,
STRING_AGG(DISTINCT publisher.value, ', ') AS publisher,
STRING_AGG(DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$), ', ') AS pubdate,
CASE WHEN ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) = '{NULL}'
THEN NULL
ELSE ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') )
END AS isbn,
CASE WHEN ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) = '{NULL}'
THEN NULL
ELSE ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') )
END AS issn
FROM biblio.record_entry r
LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a')
LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a')
LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND (publisher.tag = '260' OR (publisher.tag = '264' AND publisher.ind2 = '1')) AND publisher.subfield = 'b')
LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND (pubdate.tag = '260' OR (pubdate.tag = '264' AND pubdate.ind2 = '1')) AND pubdate.subfield = 'c')
LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z'))
LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a')
where not r.deleted
--and publisher.value~*'viz'
GROUP BY 1,2,3,4,5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment