Last active
June 16, 2018 18:53
-
-
Save stompro/d036829ff1fd4e0fe3dcddf8a75b4e77 to your computer and use it in GitHub Desktop.
Purge unused authority records - Evergreen ILS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Before you try to clean up your authority records, you should have run the authority linking scripts, | |
and then set them up to run on a regular basis via cron. | |
#Link bibs to authorities | |
/openils/bin/authority_control_fields.pl -a | |
#Link authorities to other authorities. | |
/openils/bin/authority_authority_linker.pl -a | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- While double checking authority removal, I found that our system | |
-- had 120 deleted bibs referenced in metabib.browse_entry_def_map, | |
-- which caused some deleted authority records to be referenced there | |
-- also. | |
-- -- Deleted bibs associated with metabib.browse_entry_def_map -- why? | |
select distinct bre.id,bre.fingerprint, bre.deleted from metabib.browse_entry_def_map mbedm | |
join biblio.record_entry bre on bre.id=mbedm.source and bre.deleted | |
order by bre.id; | |
-- Update them to fix them - triggers a reingest for those records | |
update biblio.record_entry | |
set id=id | |
where | |
id in ( | |
select distinct bre.id from metabib.browse_entry_def_map mbedm | |
join biblio.record_entry bre on bre.id=mbedm.source and bre.deleted | |
order by bre.id | |
) | |
; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
----------------- | |
-- Backup authority records that are not linked, these are the ones | |
-- that will be purged, | |
-- change the schema and table to suit your needs | |
create table larl_temp.authority_record_entry_deleted_backup_6_2_2018 | |
as | |
-- List of all linked authorities | |
with used_authorities as ( | |
select distinct linked from ( | |
select source as linked from authority.authority_linking aal | |
union | |
select target as linked from authority.authority_linking aal | |
union | |
select authority as linked from authority.bib_linking abl | |
union | |
select authority as linked from metabib.browse_entry_def_map mbedm where authority is not null | |
) x | |
) | |
-- Unused authority records | |
select are.* from authority.record_entry are | |
left outer join used_authorities ua on ua.linked=are.id | |
where | |
ua.linked is null -- we only want authorities that are not in the linked list. | |
and not are.deleted | |
order by id | |
--limit 1000 | |
; | |
-- make a note/calendar entry to remove this table at some point. | |
-- you could also export it to a file to keep it somewhere else, not as part of the database. | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- ------------------------------------------------- | |
-- Mark unused/unlinked authority records as deleted | |
-- Disable a few triggers to speed up the process, since we are only deleting the records | |
-- these triggers don't make any changes and can be disabled. | |
ALTER TABLE authority.record_entry DISABLE TRIGGER update_headings_tgr; | |
ALTER TABLE authority.record_entry DISABLE TRIGGER a_marcxml_is_well_formed; | |
ALTER TABLE authority.record_entry DISABLE TRIGGER b_maintain_901; | |
ALTER TABLE authority.record_entry DISABLE TRIGGER c_maintain_control_numbers; | |
ALTER TABLE authority.record_entry DISABLE TRIGGER map_thesaurus_to_control_set; | |
begin; | |
--create temp table with all the authorities that are not linked to anything. | |
create temporary table unused_auth | |
on commit drop | |
as | |
-- List of all linked authorities | |
with used_authorities as ( | |
select distinct linked from ( | |
select source as linked from authority.authority_linking aal | |
union | |
select target as linked from authority.authority_linking aal | |
union | |
select authority as linked from authority.bib_linking abl | |
union | |
select authority as linked from metabib.browse_entry_def_map mbedm where authority is not null | |
) x | |
) | |
-- Unused authority records | |
select are.id from authority.record_entry are | |
left outer join used_authorities ua on ua.linked=are.id | |
where | |
ua.linked is null -- we only want authorities that are not in the linked list. | |
and not are.deleted | |
order by id | |
--limit 1000 | |
; | |
delete from authority.record_entry are | |
where are.id in | |
( | |
select id from unused_auth limit 50000 --limit the number deleted at once | |
) | |
; | |
commit; | |
-- Re-Enable triggers | |
ALTER TABLE authority.record_entry ENABLE TRIGGER update_headings_tgr; | |
ALTER TABLE authority.record_entry ENABLE TRIGGER a_marcxml_is_well_formed; | |
ALTER TABLE authority.record_entry ENABLE TRIGGER b_maintain_901; | |
ALTER TABLE authority.record_entry ENABLE TRIGGER c_maintain_control_numbers; | |
ALTER TABLE authority.record_entry ENABLE TRIGGER map_thesaurus_to_control_set; | |
--- Done deleting unused authority records | |
-------------------------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\pset pager off | |
-- Double check that there are no deleted authority records referenced anywhere. | |
-- If any results show up here, you need to look into why deleted authority records are being | |
-- referenced. | |
begin; | |
-- Temp table of all deleted authority records. | |
create temp table deleted_authority_records | |
on commit drop | |
as | |
select id from authority.record_entry where deleted; | |
-- metabib.browse_entry_def_map | |
select * from metabib.browse_entry_def_map mbedm | |
join deleted_authority_records dar on dar.id=mbedm.authority; | |
-- If any bib records are found here, run them through the authority_control_field.pl -ref -rec <recid> | |
-- they just need to be refreshed so they no longer point at the deleted bib. | |
-- vandelay.queued_authority_record | |
select * from vandelay.queued_authority_record vqar | |
join deleted_authority_records dar on dar.id=vqar.imported_as; | |
-- If you find any records here, clear out your authority import qeueues. | |
-- vandelay.authority_match | |
select * from vandelay.authority_match vam | |
join deleted_authority_records dar on dar.id=vam.eg_record; | |
-- authority.authority_linking Source | |
select * from authority.authority_linking aal | |
join deleted_authority_records dar on dar.id=aal.source; | |
-- authority.authority_linking target | |
select * from authority.authority_linking aal | |
join deleted_authority_records dar on dar.id=aal.target; | |
-- authority.bib_linking | |
select * from authority.bib_linking abl | |
join deleted_authority_records dar on dar.id=abl.authority; | |
-- authority.record_note | |
select * from authority.record_note arn | |
join deleted_authority_records dar on dar.id=arn.record | |
; | |
-- authority.simple_heading | |
select * from authority.simple_heading ash | |
join deleted_authority_records dar on dar.id=ash.record | |
; | |
rollback; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Look for authorities with duplicate headings, choose the copy that isn't linked to any bibs. | |
-- the authority.authority_linking will get fixed the next time the authority_authority_linker.pl is run. | |
-- Also, only select 1 of the records if there are multiple records with no bib linking, to leave at least one record | |
-- to be a target for authority.authority_linking. | |
-- Many duplicates would have already been removed by the previous step, but this step will get rid of the rest. | |
-- Method 1 | |
select heading, count(heading), min(id) deleteme | |
from | |
( | |
select are1.id, are1.heading, are2.id dup_id | |
from authority.record_entry are1 | |
join authority.record_entry are2 on are2.heading=are1.heading and are1.id != are2.id | |
where | |
not are1.deleted | |
and not are2.deleted | |
and not exists (select 1 from authority.bib_linking abl where abl.authority=are1.id) | |
--and not exists (select 1 from authority.authority_linking aal where aal.target=are1.id) | |
order by are1.heading | |
) x | |
group by heading | |
; | |
-- Delete those records | |
begin; | |
delete from authority.record_entry are | |
where are.id in | |
( | |
select min(id) id | |
from | |
( | |
select are1.id, are1.heading, are2.id dup_id | |
from authority.record_entry are1 | |
join authority.record_entry are2 on are2.heading=are1.heading and are1.id != are2.id | |
where | |
not are1.deleted | |
and not are2.deleted | |
and not exists (select 1 from authority.bib_linking abl where abl.authority=are1.id) | |
--and not exists (select 1 from authority.authority_linking aal where aal.target=are1.id) | |
order by are1.heading | |
) x | |
group by heading | |
) | |
; | |
commit; | |
-- You should re-run authority_authority_linker.pl at this point. | |
-- Method 2 | |
-- Another strategy is to just delete one of each of the duplicates, and then re-link the bibs that | |
-- pointed to the deleted duplicate copy, which should also trigger a reingest and fix metabib.browse_def_map | |
-- | |
-- Delete one of every duplicate. | |
begin; | |
-- Commands to run to re-link the bibs that are affected. | |
-- After delete is committed, run these commands. | |
select './authority_control_fields_custom.pl --refresh --record='||bib from | |
( | |
select distinct abl.bib from authority.bib_linking abl | |
where | |
abl.authority in ( | |
select xx.min id | |
from | |
( | |
select heading, count(id), string_agg(id::text,', ' order by id) authority_ids, min(id) | |
from authority.record_entry | |
where | |
not deleted | |
group by heading | |
having count(id) > 1 | |
order by count(id) desc | |
) xx | |
) | |
order by abl.bib | |
) x | |
; | |
delete from authority.record_entry are | |
where are.id in | |
( | |
select x.min id | |
from | |
( | |
select heading, count(id), string_agg(id::text,', ' order by id) authority_ids, min(id) | |
from authority.record_entry | |
where | |
not deleted | |
group by heading | |
having count(id) > 1 | |
order by count(id) desc | |
) x | |
) | |
; | |
commit; | |
-- Now take the output from the first query and put it in a shell script and run it | |
-- so all the bibs that had their authorities so cruely taken from them will get reunited | |
-- with their twin. | |
-- You should re-run authority_authority_linker.pl at this point. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- ------------- | |
-- Delete authority records for real | |
-- How many deleted authorities are there? | |
select count(id) from authority.record_entry where deleted; | |
-- Vacumm analyze authority.record_entry to make sure planner knows about current state of table. | |
VACUUM ANALYZE VERBOSE authority.record_entry; | |
-- Create several indexes to speed up foreign key checks on delete | |
-- create index for authority entries on metabib.browse_entry_def_map to speed up deletes. | |
CREATE INDEX browse_entry_def_map_authority_idx ON metabib.browse_entry_def_map (authority); | |
-- Create index for authority.authority_linking (source) | |
CREATE INDEX authority_linking_source_idx on authority.authority_linking (source); | |
-- Create index for authority.authority_linking (target) | |
CREATE INDEX authority_linking_target_idx on authority.authority_linking (target); | |
-- Create index for authority.bib_linking (authority) | |
CREATE INDEX bib_linking_authority_idx on authority.bib_linking (authority); | |
begin; | |
-- Disable delete protect rule in transaction. | |
ALTER TABLE authority.record_entry DISABLE RULE protect_authority_rec_delete; | |
-- Perform delete -- explain analyze | |
delete from authority.record_entry are | |
where are.id in | |
( | |
select id from authority.record_entry where deleted order by id limit 30000 | |
) | |
; | |
commit; | |
-- How many deleted authorities are there? | |
select count(id) from authority.record_entry where deleted; | |
-- Enable delete protect rule in transaction. | |
ALTER TABLE authority.record_entry ENABLE RULE protect_authority_rec_delete; | |
-- Drop index on authority column of metabib.browse_entry_def_map | |
DROP INDEX metabib.browse_entry_def_map_authority_idx; | |
-- Drop index on source column of authority.authority_linking | |
DROP INDEX authority.authority_linking_source_idx; | |
-- Drop index on target column of authority.authority_linking | |
DROP INDEX authority.authority_linking_target_idx; | |
-- Drop index on authority column of authority.bib_linking | |
DROP INDEX authority.bib_linking_authority_idx; | |
-- Vacumm analyze authority.record_entry to make sure planner knows about current state of table. | |
VACUUM ANALYZE VERBOSE authority.record_entry; | |
-- To reclaim the removed space, run a vacumm full, but only do that during a maintenance window, since | |
-- it locks the table and re-writes the whole thing. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment