Created
June 4, 2014 23:50
-
-
Save iaingray/25b6bbb659cacf1611b2 to your computer and use it in GitHub Desktop.
Script to remove leading underscores from s3 files, to allow them to be used in Hive
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/ruby | |
require 'fog' | |
require 'inifile' | |
INI_FILE = ENV['HOME']+'/.aws/config' | |
#details from command line | |
location = /^s3:\/\/(?<bucket>[a-zA-Z0-9-]+)\/(?<folder>[\w-]*)\/?$/.match(ARGV[0]) | |
if location.nil? | |
puts "Invalid S3 bucket or folder, must be e.g. s3://my-bucket/ or s3://my-bucket/folder/" | |
exit | |
else | |
@bucket = location[:bucket] | |
@folder = location[:folder] | |
end | |
#get aws credentials from config file, map to symbol | |
aws = IniFile.load(INI_FILE, :parameters => ' = ')['default'].symbolize_keys! | |
connection = Fog::Storage.new( | |
{ | |
:provider => 'AWS', | |
:aws_access_key_id => aws[:aws_access_key_id], | |
:aws_secret_access_key => aws[:aws_secret_access_key] | |
}) | |
dir = connection.directories.get(@bucket, {:prefix => @folder}) | |
if(dir.nil?) | |
puts "Bucket not found" | |
exit | |
end | |
#rename all files by removing underscores | |
dir.files.map do |file| | |
if /^#{@folder}\/?_/.match(file.key) | |
new_key = file.key.sub(/(?<prefix>#{@folder}\/?)_/, '\k<prefix>') | |
puts "Renaming #{file.key} to #{new_key}" | |
if file.copy(@bucket, new_key).key == new_key then file.destroy end #delete old one if copy successful | |
end | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment