Created
July 11, 2019 15:49
-
-
Save kevindew/1374a37cb074b83842d873d15b7d8512 to your computer and use it in GitHub Desktop.
Hacky script that can export data from a Publishing API instance into local environment - it tends to fail a lot.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "bundler/inline" | |
# To run this you need to be connected to the Publsihing API via a port forward | |
# I set this up by adding myself as a user in: https://signon.integration.publishing.service.gov.uk/api_users | |
# then storing the bearer token in ~/.publishing-api-token | |
# | |
# Then I logged onto the VPN and port forwarded Publishing API to 9000 with: | |
# ssh $(ssh integration "govuk_node_list --single-node -c publishing_api").integration -CNL 9000:127.0.0.1:3093 | |
# | |
# and ran: PUBLISHING_API_BEARER_TOKEN=$(cat ~/.publishing-api-token) ruby import-publishing-api.rb | |
gemfile do | |
source "https://rubygems.org" | |
gem "gds-api-adapters" | |
gem "byebug" | |
end | |
publishing_api_integration = GdsApi::PublishingApiV2.new( | |
"http://localhost:9000", | |
{ bearer_token: ENV['PUBLISHING_API_BEARER_TOKEN'], timeout: 60 }, | |
) | |
publishing_api_local = GdsApi.publishing_api_v2 | |
# Change these based on which document type to import | |
types = %w[organisation taxon] | |
pairs = types.map do |type| | |
enum = publishing_api_integration.get_content_items_enum(document_type: type, | |
states: %w[published]) | |
[type, enum] | |
end | |
pairs.each do |(name, enum)| | |
puts "loading #{name}s" | |
imported = 0 | |
skipped = 0 | |
enum.each.with_index do |result, i| | |
puts "#{imported} #{name}s imported, #{skipped} skipped" if i > 0 && i % 10 == 0 | |
content_id = result["content_id"] | |
begin | |
publishing_api_local.get_content(content_id) | |
skipped += 1 | |
next | |
rescue GdsApi::HTTPNotFound | |
end | |
fields = %w[ | |
analytics_identifier | |
base_path | |
description | |
details | |
document_type | |
first_published_at | |
last_edited_at | |
phase | |
public_updated_at | |
publishing_app | |
rendering_app | |
routes | |
schema_name | |
title | |
update_type | |
links | |
] | |
payload = result.slice(*fields).tap do |r| | |
r["update_type"] ||= "major" | |
end | |
begin | |
publishing_api_local.put_content(content_id, payload) | |
publishing_api_local.publish(content_id) | |
links = publishing_api_integration.get_links(content_id) | |
publishing_api_local.patch_links(content_id, links: links["links"]) unless links["links"].empty? | |
imported += 1 | |
rescue GdsApi::HTTPUnprocessableEntity | |
skipped += 1 | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment