Skip to content

Instantly share code, notes, and snippets.

@dshorthouse
Last active May 10, 2024 17:43
Show Gist options
  • Save dshorthouse/abd44065acf65c1211ff4fbf02e1fbac to your computer and use it in GitHub Desktop.
Save dshorthouse/abd44065acf65c1211ff4fbf02e1fbac to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
# encoding: utf-8
require 'csv'
require 'dina'
Dina.config = {
authorization_url: 'https://dina.biodiversity.agr.gc.ca/auth',
endpoint_url: 'https://dina.biodiversity.agr.gc.ca/api',
server_name: 'dina-prod',
realm: 'dina',
client_id: 'dina-public',
user: 'USERNAME',
password: 'PASSWORD',
token_store_file: File.join(Dir.pwd, "token.json")
}
def images_by_tag(tag:)
payload = {
query: {
bool: {
must: [
{ term: { "data.attributes.acTags.keyword": "#{tag}" }},
{ term: { "data.attributes.group":"dao" } }
]
}
},
size: 10_000
}
Dina::Search.execute(index: "object_store", payload: payload)
end
CSV.open(File.join(Dir.pwd, "biomob_duplicate_barcodes.csv"), "w") do |csv|
csv << ["barcode", "folder_barcode", "original_directory_name", "dina_uuid", "dina_url"]
images_by_tag(tag: "duplicated barcode").each do |obj|
attributes = obj.attributes["managed_attributes"]
csv << [
attributes["barcode"],
attributes["folder_barcode"],
attributes["original_directory_name"],
obj.id,
"https://dina.biodiversity.agr.gc.ca/object-store/object/view?id=#{obj.id}"
]
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment