Created
July 7, 2017 21:30
-
-
Save stompro/a4f34f6e25f56cc5847dade9e5c80d36 to your computer and use it in GitHub Desktop.
super simple record changes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Check max length after super simple changes | |
select x.*, length(x.title) from ( | |
SELECT r.id, | |
r.fingerprint, | |
r.quality, | |
r.tcn_source, | |
r.tcn_value, | |
FIRST(title.value) AS title, | |
FIRST(author.value) AS author, | |
STRING_AGG(DISTINCT publisher.value, ', ') AS publisher, | |
STRING_AGG(DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$), ', ') AS pubdate, | |
CASE WHEN ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) = '{NULL}' | |
THEN NULL | |
ELSE ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) | |
END AS isbn, | |
CASE WHEN ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) = '{NULL}' | |
THEN NULL | |
ELSE ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) | |
END AS issn | |
FROM biblio.record_entry r | |
LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a') | |
LEFT JOIN metabib.full_rec title_np ON (r.id = title_np.record AND title_np.tag = '245' AND title_np.subfield IN ('p','n')) | |
LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a') | |
LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND (publisher.tag = '260' OR (publisher.tag = '264' AND publisher.ind2 = '1')) AND publisher.subfield = 'b') | |
LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND (pubdate.tag = '260' OR (pubdate.tag = '264' AND pubdate.ind2 = '1')) AND pubdate.subfield = 'c') | |
LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z')) | |
LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a') | |
where not r.deleted | |
GROUP BY 1,2,3,4,5 | |
) x | |
where length(x.title) > 60 | |
and x.tcn_source!=''; | |
select x.*, length(x.title) from ( | |
SELECT r.id, | |
r.fingerprint, | |
r.quality, | |
r.tcn_source, | |
r.tcn_value, | |
CONCAT_WS(' ', FIRST(title.value),string_agg(distinct title_np.value,' ' order by title_np.value)) AS title, | |
FIRST(author.value) AS author, | |
STRING_AGG(DISTINCT publisher.value, ', ') AS publisher, | |
STRING_AGG(DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$), ', ') AS pubdate, | |
CASE WHEN ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) = '{NULL}' | |
THEN NULL | |
ELSE ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) | |
END AS isbn, | |
CASE WHEN ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) = '{NULL}' | |
THEN NULL | |
ELSE ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) | |
END AS issn | |
FROM biblio.record_entry r | |
LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a') | |
LEFT JOIN metabib.full_rec title_np ON (r.id = title_np.record AND title_np.tag = '245' AND title_np.subfield IN ('p','n')) | |
LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a') | |
LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND (publisher.tag = '260' OR (publisher.tag = '264' AND publisher.ind2 = '1')) AND publisher.subfield = 'b') | |
LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND (pubdate.tag = '260' OR (pubdate.tag = '264' AND pubdate.ind2 = '1')) AND pubdate.subfield = 'c') | |
LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z')) | |
LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a') | |
where not r.deleted | |
GROUP BY 1,2,3,4,5 | |
) x | |
where length(x.title) > 60 | |
and x.tcn_source!=''; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- try using a subquery join instead | |
explain analyse | |
SELECT r.id, | |
r.fingerprint, | |
r.quality, | |
r.tcn_source, | |
r.tcn_value, | |
CONCAT_WS(' ', FIRST(title.value),FIRST(title_np.val)) AS title, | |
FIRST(author.value) AS author, | |
STRING_AGG(DISTINCT publisher.value, ', ') AS publisher, | |
STRING_AGG(DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$), ', ') AS pubdate, | |
CASE WHEN ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) = '{NULL}' | |
THEN NULL | |
ELSE ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) | |
END AS isbn, | |
CASE WHEN ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) = '{NULL}' | |
THEN NULL | |
ELSE ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) | |
END AS issn | |
FROM biblio.record_entry r | |
LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a') | |
LEFT JOIN ( | |
select b.record, string_agg(val, ' ') as val from ( | |
select -- string_agg(' ',title_np.value) | |
title_np.record, title_np.value as val | |
from metabib.full_rec title_np | |
where | |
--title_np.record=96961 | |
title_np.tag = '245' | |
AND title_np.subfield IN ('p','n') | |
--group by title_np.record | |
order by title_np.id | |
) b | |
group by 1 ) title_np on (title_np.record=r.id) | |
LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a') | |
LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND (publisher.tag = '260' OR (publisher.tag = '264' AND publisher.ind2 = '1')) AND publisher.subfield = 'b') | |
LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND (pubdate.tag = '260' OR (pubdate.tag = '264' AND pubdate.ind2 = '1')) AND pubdate.subfield = 'c') | |
LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z')) | |
LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a') | |
where not r.deleted | |
--and publisher.value~*'viz' | |
GROUP BY 1,2,3,4,5 | |
; | |
select b.record, string_agg(val, ' ') as val from ( | |
select -- string_agg(' ',title_np.value) | |
title_np.record, title_np.value as val | |
from metabib.full_rec title_np | |
where | |
--title_np.record=96961 | |
title_np.tag = '245' | |
AND title_np.subfield IN ('p','n') | |
--group by title_np.record | |
order by title_np.id | |
) b | |
group by 1 | |
; | |
explain analyse | |
SELECT r.id, | |
r.fingerprint, | |
r.quality, | |
r.tcn_source, | |
r.tcn_value, | |
FIRST(title.value) AS title, | |
FIRST(author.value) AS author, | |
STRING_AGG(DISTINCT publisher.value, ', ') AS publisher, | |
STRING_AGG(DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$), ', ') AS pubdate, | |
CASE WHEN ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) = '{NULL}' | |
THEN NULL | |
ELSE ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) | |
END AS isbn, | |
CASE WHEN ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) = '{NULL}' | |
THEN NULL | |
ELSE ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) | |
END AS issn | |
FROM biblio.record_entry r | |
LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a') | |
LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a') | |
LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND (publisher.tag = '260' OR (publisher.tag = '264' AND publisher.ind2 = '1')) AND publisher.subfield = 'b') | |
LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND (pubdate.tag = '260' OR (pubdate.tag = '264' AND pubdate.ind2 = '1')) AND pubdate.subfield = 'c') | |
LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z')) | |
LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a') | |
where not r.deleted | |
GROUP BY 1,2,3,4,5; | |
explain analyse | |
SELECT r.id, | |
r.fingerprint, | |
r.quality, | |
r.tcn_source, | |
r.tcn_value, | |
CONCAT_WS(' ', FIRST(title.value),string_agg(distinct title_np.value,' ' order by title_np.value)) AS title, | |
FIRST(author.value) AS author, | |
STRING_AGG(DISTINCT publisher.value, ', ') AS publisher, | |
STRING_AGG(DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$), ', ') AS pubdate, | |
CASE WHEN ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) = '{NULL}' | |
THEN NULL | |
ELSE ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) | |
END AS isbn, | |
CASE WHEN ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) = '{NULL}' | |
THEN NULL | |
ELSE ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) | |
END AS issn | |
FROM biblio.record_entry r | |
LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a') | |
LEFT JOIN metabib.full_rec title_np ON (r.id = title_np.record AND title_np.tag = '245' AND title_np.subfield IN ('p','n')) | |
LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a') | |
LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND (publisher.tag = '260' OR (publisher.tag = '264' AND publisher.ind2 = '1')) AND publisher.subfield = 'b') | |
LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND (pubdate.tag = '260' OR (pubdate.tag = '264' AND pubdate.ind2 = '1')) AND pubdate.subfield = 'c') | |
LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z')) | |
LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a') | |
where not r.deleted | |
--and publisher.value~*'viz' | |
GROUP BY 1,2,3,4,5 | |
; | |
-- try using a subquery instead | |
--explain analyse | |
SELECT r.id, | |
r.fingerprint, | |
r.quality, | |
r.tcn_source, | |
r.tcn_value, | |
CONCAT_WS(' ', FIRST(title.value),( | |
-- Include 245 n and p subfields in the order they are entered. | |
SELECT string_agg(val, ' ') FROM ( | |
SELECT title_np.value AS val | |
FROM metabib.full_rec title_np | |
WHERE | |
title_np.record=r.id | |
AND title_np.tag = '245' | |
AND title_np.subfield IN ('p','n') | |
ORDER BY title_np.id | |
) title_np | |
) | |
) AS title, | |
FIRST(author.value) AS author, | |
STRING_AGG(DISTINCT publisher.value, ', ') AS publisher, | |
STRING_AGG(DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$), ', ') AS pubdate, | |
CASE WHEN ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) = '{NULL}' | |
THEN NULL | |
ELSE ARRAY_AGG( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) | |
END AS isbn, | |
CASE WHEN ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) = '{NULL}' | |
THEN NULL | |
ELSE ARRAY_AGG( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) | |
END AS issn | |
FROM biblio.record_entry r | |
LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a') | |
LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a') | |
LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND (publisher.tag = '260' OR (publisher.tag = '264' AND publisher.ind2 = '1')) AND publisher.subfield = 'b') | |
LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND (pubdate.tag = '260' OR (pubdate.tag = '264' AND pubdate.ind2 = '1')) AND pubdate.subfield = 'c') | |
LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z')) | |
LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a') | |
where not r.deleted | |
--and publisher.value~*'viz' | |
GROUP BY 1,2,3,4,5 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment