Skip to content

Instantly share code, notes, and snippets.

@PlanetRoast
Created November 8, 2017 10:32
Show Gist options
  • Save PlanetRoast/79c687e8d856b24ef9685a3849adc7d7 to your computer and use it in GitHub Desktop.
Save PlanetRoast/79c687e8d856b24ef9685a3849adc7d7 to your computer and use it in GitHub Desktop.
# FileGrabber
# Designed for downloading, unzipping, saving, and tidying of affiliate window product feeds.
# -------------------------------------------------------------------------------------------
# Usage: @foo = FileGrabber.new(url: "product-feed-url")
# Optional agrs:
# folder_name (sets the name of the folder which contains the downloaded files)
# folder_path (sets where to store the downloaded files)
# trim (how many downloads do you want to keep saved? The rest will be deleted)
class FileGrabber
def initialize(args={})
load_settings
@url = args[:url] # url of the product feed
@extension = args[:extension] ||= @default_extension # should be .csv.gz for affiliate window product feeds
@folder_name = args[:folder_name] ||= @default_folder_name # name of the local folder in which to store the file
@folder_path = args[:folder_path] ||= @default_folder_path # pathway leading to the folder
@name = args[:name] ||= @default_name # name of the file not including any time stamps
@trim = args[:trim] ||= @default_trim # how many files allowed in the folder before we delete old ones
@path = make_path # /app/assets/filegrabs
@full_path = make_full_path # /app/assets/filegrabs/grab001.csv
@timestamp = make_timestamp # unix timestamp to be appended to file name
@filename = make_filename
validate_args
validate_path
if valid?
download_file
unzip_and_save
delete_old_files
else
puts "FileGrabber didn't start because of a validation error."
end
end
def valid?
if has_errors?
false
else
true
end
end
def display_errors
# Prints out each error
# - See errors if you just want an array of the errors instead.
@errors.each do |e|
puts "FileGrabber error: #{e}"
end
end
def errors
# Returns an array of errors
# - See display_errors if you're too lazy to puts them out.
@errors
end
private
def make_filename
# => 10510987-grabbedfile
"#{@timestamp}-#{@name}"
end
def make_full_path
# = /home/al/sites/app/filegrabber/assorted/10510987-grabbedfile.csv
"#{@path}#{@filename}"
end
def delete_old_files
# Deletes old files from the path
# - Hopefully it doesn't become self aware and delete all of the files ._.
delete_count = list_files.count - @trim
list_files.first(delete_count).each do |f|
if f.include?(@name) # just to prevent accidentally deleting files that are not grabbed files
File.delete("#{@path}#{f}")
puts "Deleted #{@path}#{f}"
end
end
end
def list_files
# Looks in the path and returns a list of file names, newest files first
files = Dir.glob("#{@path}*")
results = []
files.each do |f|
results << f.split("/").last
end
results.sort
end
def make_timestamp
# Unix timestamp for appending to the file name
Time.now.to_i
end
def validate_path
# Makes sure there is a folder to save files to
# - Not strictly speaking a validation as it won't return a boolean
unless File.directory?(@path)
create_folder
end
end
def create_folder
# Creates a folder for saving files to if one doesn't already exist.
require 'fileutils'
FileUtils::mkdir_p @path
end
def download_file
# Not sure what will happen if the file is not downloaded correctly?
# - maybe something to add in the future.
require 'open-uri'
@download = open(@url)
end
def save_file
# Not any use for Affiliate window feeds as they are all zipped up
# - use unzip_and_save instead.
IO.copy_stream(@download, "#{@path}#{@filename}")
end
def unzip_and_save
gz = Zlib::GzipReader.new(@download)
csv = gz.read
File.write("#{@path}/#{@filename}.csv", csv)
gz.close
end
def load_settings
@errors = [] # Empty array for filling up with error messages
@default_folder_name = "assorted"
@default_folder_path = "filegrabber"
@file_name = "worksmate.png"
@default_name = "grabbedfile"
@default_trim = 3
@default_extension = ".csv.gz"
end
def validate_args
# validate args
validate_url
end
def validate_url
# FileGrabber needs at least a URL to kick things off
# - foo = FileGrabber.new(url: "www.my-product-feed.com")
if @url.blank?
@errors << "URL was left blank."
end
end
def has_errors?
# Because we can't use the built in rails validations we shall roll our own.
@errors.any?
end
def make_path
# make path
"#{Rails.root}/#{@folder_path}/#{@folder_name}/"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment