Created
December 2, 2020 14:47
-
-
Save AndreasBaumgart/a09dbc56747c9bf1a1b514c9dc7bf880 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'open-uri' | |
namespace :rm2022_nodes_export do | |
desc 'Export for Roadmap 2020' | |
task export: :environment do | |
node_paths = %w[ | |
community | |
die-community/albert-lehmann | |
die-community/community-30676 | |
die-community/community-34487 | |
die-community/andreas-hinterberger | |
die-community/christoph-schreyer | |
die-community/community-64706 | |
die-community/community-45080 | |
die-community/community-80230 | |
die-community/community-72726 | |
die-community/community-80310 | |
die-community/community-76441 | |
die-community/community-97734 | |
die-community/community-24436 | |
die-community/luregn-hug | |
die-community/luzius-wyrsch | |
die-community/community-76587 | |
die-community/marcel-ingold | |
die-community/martin-kessler | |
die-community/community-32978 | |
die-community/community-58661 | |
die-community/maud-rasmussen | |
die-community/michael-klasa | |
die-community/community-23953 | |
die-community/oliver-wimmer | |
die-community/community-97404 | |
die-community/community-92592 | |
die-community/roberti-bianchetti | |
die-community/silvan-rosser | |
die-community/stefan-funk | |
die-community/stephan-walter | |
die-community/community-77485 | |
die-community/community-10885 | |
die-community/thomas-loehrer | |
die-community/community-42913 | |
die-community/community-11914 | |
roadmap_elektromobilitaet_2022/fotos | |
8-november-2019/fotos | |
erkenntnisse-ergebnisse-plattform-24-04/foto-gallerie | |
roadmap_elektromobilitaet_2022/fotos/unterzeichnung | |
roadmap_elektromobilitaet_2022/fotos/fotos_workshop | |
roadmap_elektromobilitaet_2022/fotos/fotos_download | |
roadmap_elektromobilitaet_2022/massnahmen | |
roadmap_elektromobilitaet_2022/alle_massnahmen | |
mitwirken | |
news | |
news/update-massnahmen | |
news/neue-massnahmen-auf-der-roadmap-elektromobilitaet-2022 | |
naechste-veranstaltungen/plattform-elektromobilitaet-mittwoch-9-dezember-2020 | |
naechste-veranstaltungen | |
naechste-veranstaltungen/plattform-elektromobilitaet-mittwoch-9-dezember-2020 | |
naechste-veranstaltungen/vergangene-veranstaltungen | |
naechste-veranstaltungen/vergangene-veranstaltungen/simple-1 | |
news/community/plattform_20191108 | |
naechste-veranstaltungen/vergangene-veranstaltungen/get-it-done-tag-vom-24-oktober-2019 | |
roadmap_elektromobilitaet_2022/timeline/plattform-vom-20-august-2019 | |
roadmap_elektromobilitaet_2022/timeline/get-it-done-tag-vom-25-juli-2019 | |
roadmap_elektromobilitaet_2022/timeline/get-it-done-tag-vom-25-juni-2019 | |
agenda-get-it-done-tag-vom-22-mai-2019 | |
roadmap_elektromobilitaet_2022/timeline/plattform-elektromobilitaet-4 | |
unterzeichnung | |
roadmap_elektromobilitaet_2022/willkommen_event | |
ueber-die-roadmap | |
die-roadmap-elektromobilitaet-in-180-sekunden | |
] | |
image_fields = %w[ | |
cover_image | |
detail_image | |
customer_logo | |
action1_image | |
action2_image | |
logo_image | |
] | |
providers_sql = <<~SQL | |
select | |
distinct provider_nodes.id as provider_id | |
from website_nodes as provider_nodes | |
left join website_node_links as links on links.provider_id = provider_nodes.id | |
left join website_nodes as consumer_nodes on links.consumer_id = consumer_nodes.id | |
where consumer_nodes.content->>'url_name' in (#{node_paths.map { |path| ActiveRecord::Base.connection.quote(path) }.join(', ')}) | |
SQL | |
provider_ids = ActiveRecord::Base.connection.execute(providers_sql).to_a.collect do |row_hash| | |
row_hash['provider_id'] | |
end | |
localized_columns_sql = %i[de fr it en].collect do |locale| | |
<<~SUBSQL | |
nodes.content->>'title_#{locale}' as title_#{locale}, | |
nodes.content->>'headline_#{locale}' as headline_#{locale}, | |
nodes.content->>'lead_sanitized_html_#{locale}' as lead_#{locale}, | |
nodes.content->>'body_sanitized_html_#{locale}' as body_#{locale}, | |
nodes.content->>'link1_url_#{locale}' as link1_url_#{locale}, | |
nodes.content->>'link2_url_#{locale}' as link2_url_#{locale}, | |
nodes.content->>'link1_text_#{locale}' as link1_text_#{locale}, | |
nodes.content->>'link2_text_#{locale}' as link2_text_#{locale}, | |
SUBSQL | |
end.join("\n") | |
sql = <<~SQL | |
select | |
sites.name as site_name, | |
nodes.id as page_id, | |
nodes.content->>'url_name' as page_path, | |
replace(nodes.type, 'Website::Node::', '') as template, | |
#{localized_columns_sql} | |
#{image_fields.collect { |image_field| "nodes.content->>'#{image_field}_file_name' as #{image_field}_file_name," }.join("\n")} | |
nodes.content->>'tags' as tags, | |
array_agg(links.id order by position) filter (where links.context = 'children') as children_page_ids, | |
array_agg(links.id order by position) filter (where links.context = 'steps') as steps_page_ids, | |
array_agg(links.id order by position) filter (where links.context = 'timeline_items') as timeline_items_page_ids, | |
array_agg(links.id order by position) filter (where links.context = 'links') as links_page_ids, | |
array_agg(links.id order by position) filter (where links.context = 'tools') as tools_page_ids, | |
array_agg(links.id order by position) filter (where links.context = 'options') as options_page_ids, | |
array_agg(links.id order by position) filter (where links.context = 'teasers') as teasers_page_ids, | |
array_agg(links.id order by position) filter (where links.context = 'applications') as applications_page_ids, | |
array_agg(links.id order by position) filter (where links.context = 'terms_and_conditions') as terms_and_conditions_page_ids, | |
array_agg(links.id order by position) filter (where links.context = 'community') as community_page_ids, | |
array_agg(links.id order by position) filter (where links.context = 'root_node') as root_node_page_ids | |
from website_nodes as nodes | |
join website_sites as sites on nodes.site_id = sites.id | |
left join website_node_links as links on links.consumer_id = nodes.id | |
left join website_nodes as providers on links.provider_id = providers.id | |
where | |
sites.projects_project_id = 413 | |
and ( | |
nodes.content->>'url_name' in (#{node_paths.map { |path| ActiveRecord::Base.connection.quote(path) }.join(', ')}) | |
or nodes.id in (#{provider_ids.map { |id| ActiveRecord::Base.connection.quote(id) }.join(', ')}) | |
) | |
group by nodes.id, nodes.type, nodes.content, sites.name | |
order by | |
CASE | |
#{node_paths.each_with_index.map { |path, idx| "WHEN #{ActiveRecord::Base.connection.quote(path) } = nodes.content->>'url_name' THEN #{idx}" }.join("\n")} | |
ELSE 999999 | |
END ASC, | |
nodes.content->>'url_name' | |
SQL | |
File.write('rm2022_export.sql', sql) | |
res = ActiveRecord::Base.connection.execute(sql).to_a | |
image_urls = [] | |
if res.any? | |
csv = CSV.generate do |csv| | |
csv << res.first.keys | |
res.each do |row| | |
node = Website::Node::Base.find(row['page_id']) | |
image_fields.each do |image_field| | |
if node.respond_to?(image_field) && node.send(image_field).present? | |
url = "https://s3.amazonaws.com/brainstore-d2014#{node.send(image_field).url(:cover2x).gsub('/system', '')}" | |
image_urls << url.gsub(/\?.*$/, '') | |
row["#{image_field}_file_name"] = url | |
end | |
end | |
csv << row.values | |
csv << [] | |
end | |
end | |
File.write("rm2022_export.csv", csv) | |
File.write("rm2022_image_urls.txt", image_urls.join("\n")) | |
else | |
puts "No matching rows." | |
end | |
puts | |
puts "Download images with:" | |
puts "mkdir my_images_dir; cd my_images_dir; xargs -n 1 curl -O < ../rm2022_image_urls.txt" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment