Created
July 13, 2012 10:17
-
-
Save Bijendra/3104077 to your computer and use it in GitHub Desktop.
Unzipping a file to parse and save dat from each file using nokogiri is time consuming as there are 739 files and each of 1.2-1.5 mb which comes down to total as 1.2 GB.This operation runs as a background job using redis-resque.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'open-uri' #Open-uri is a part of ruby standard library. It just need to be required when used. | |
require 'zip/zip' | |
require 'active_support/core_ext' | |
require 'nokogiri' | |
class JourneyPlanner | |
@queue = :journey_planner | |
class << self | |
def perform | |
zipfilename = download_file_from_url | |
Zip::ZipFile.open(zipfilename) do |zipfile| | |
zipfile.each do |entry| | |
xml = zipfile.read(entry) | |
xml_to_hash = get_the_hash_from_xml(xml) | |
journey_status_details_hash = xml_to_hash[:TransXChange] | |
AnnotatedStopPointRef.create_stop_points(journey_status_details_hash[:StopPoints]) | |
JourneyPatternSection.create_journey_pattern_sections(journey_status_details_hash[:JourneyPatternSections]) | |
Operator.create_operators(journey_status_details_hash[:Operators]) | |
Route.create_routes(journey_status_details_hash[:Routes]) | |
RouteSection.create_route_sections(journey_status_details_hash[:RouteSections]) | |
Service.create_services(journey_status_details_hash[:Services]) | |
VehicleJourney.create_vehicle_journeys(journey_status_details_hash[:VehicleJourneys]) | |
end | |
end | |
end | |
def get_the_hash_from_xml(xml_io) | |
begin | |
result = Nokogiri::XML(xml_io) | |
return { result.root.name.to_sym => xml_node_to_hash(result.root)} | |
rescue Exception => e | |
# raise the custom exception here | |
end | |
end | |
def download_file_from_url | |
begin | |
zip_file_from_url = open('http://www.tfl.gov.uk/tfl/businessandpartners/syndication/feed.aspx?email=rajashekaran@sourcebits.com&feedId=15') | |
return zip_file_from_url | |
rescue Exception => e | |
#raise the custom exception here | |
end | |
def xml_node_to_hash(node) | |
# If we are at the root of the document, start the hash | |
if node.element? | |
result_hash = {} | |
if node.children.size > 0 | |
node.children.each do |child| | |
result = xml_node_to_hash(child) | |
if child.name == "text" | |
unless child.next_sibling || child.previous_sibling | |
return prepare(result) | |
end | |
elsif result_hash[child.name.to_sym] | |
if result_hash[child.name.to_sym].is_a?(Object::Array) | |
result_hash[child.name.to_sym] << prepare(result) | |
else | |
result_hash[child.name.to_sym] = [result_hash[child.name.to_sym]] << prepare(result) | |
end | |
else | |
result_hash[child.name.to_sym] = prepare(result) | |
end | |
end | |
return result_hash | |
else | |
return result_hash | |
end | |
else | |
return prepare(node.content.to_s) | |
end | |
end | |
def prepare(data) | |
(data.class == String && data.to_i.to_s == data) ? data.to_i : data | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment