Skip to content

Instantly share code, notes, and snippets.

@jeff
Last active July 14, 2019 06:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeff/4591754 to your computer and use it in GitHub Desktop.
Save jeff/4591754 to your computer and use it in GitHub Desktop.
some examples of queries to queue then re-ingest records in an Evergreen 2.2 system which contain records which were indexed under an older normalization style.
-- queue bre records for a batch re-ingest
-- select bres with a title containing the string "'s "
-- which do not have a keyword index_vector
-- matching 's'
INSERT INTO tadl_batch.bre (bre,batch_name)
WITH apos_recs AS (select distinct mkfe.source id, mkfe.value title
from metabib.keyword_field_entry mkfe
join asset.call_number acn on (acn.record = mkfe.source and not acn.deleted)
join asset.copy acp on (acp.call_number = acn.id and not acp.deleted)
where mkfe.field = 122
and mkfe.value like '%''s %'
order by mkfe.value)
SELECT ar.id, 'reingest_apos1'
FROM biblio.record_entry bre
JOIN apos_recs ar ON (ar.id = bre.id)
JOIN metabib.keyword_field_entry mkfe ON (bre.id = mkfe.source AND mkfe.field = 14)
WHERE mkfe.index_vector @@ !!'s'::tsquery
ORDER BY bre.edit_date DESC;
-- re-ingest the records by running this
-- query until there are no records
-- left to re-ingest
WITH updated AS (UPDATE biblio.record_entry
SET id = id
WHERE id IN (
select bre.id from biblio.record_entry bre
join tadl_batch.bre tbbre ON (tbbre.bre = bre.id AND tbbre.batch_name = 'reingest_apos1' AND update_time IS NULL)
AND not bre.deleted
LIMIT 500
) returning id)
UPDATE tadl_batch.bre
SET update_time = now()
WHERE batch_name = 'reingest_apos1'
AND bre IN (
SELECT id
FROM updated
);
-- query to confirm that those records that have
-- been updated no longer match the original
-- criteria -- that they have the 's' index vector now.
WITH apos_recs AS (select distinct bre
from tadl_batch.bre
where update_time is not null
and batch_name = 'reingest_apos1'
order by bre)
SELECT ar.bre, bre.edit_date::date
FROM biblio.record_entry bre
JOIN apos_recs ar ON (ar.bre = bre.id)
JOIN metabib.keyword_field_entry mkfe ON (bre.id = mkfe.source AND mkfe.field = 14)
WHERE mkfe.index_vector @@ !!'s'::tsquery
ORDER BY bre.edit_date DESC;
-- should return zero results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment