Skip to content

Instantly share code, notes, and snippets.

@tannie
Forked from matt-west/json-split.rb
Last active March 22, 2022 17:16
Show Gist options
  • Save tannie/23872e8f265a7077c875162d5ae348a0 to your computer and use it in GitHub Desktop.
Save tannie/23872e8f265a7077c875162d5ae348a0 to your computer and use it in GitHub Desktop.
Ruby script to split a JSON file
#!/usr/bin/env ruby
require 'rubygems'
require 'json'
puts "Opening File"
file = File.open(ARGV[0])
puts "Fetching Contents"
bytes_re = /((?:\\\\)+|[^\\])(?:\\u[0-9a-f]{4})+/
content = file.read.gsub(bytes_re) do |bad_unicode|
$1 + eval(%Q{"#{bad_unicode[$1.size..-1].gsub('\u00', '\x')}"}).to_json[1...-1]
end
puts "Parsing JSON"
json = JSON.parse(content)
puts "Producing Files"
segment_size = 100.0
totalItems = json.length
loops = (totalItems / segment_size).ceil
puts "Total Objects: " + json.length.to_s
puts "Total Loops: " + loops.to_s
# Create Files
loops.times do |i|
puts "Creating file #{i + 1}"
segment = JSON.pretty_generate(json.slice(i * segment_size, segment_size))
# Write places to the places.json file
File.open("output.#{i + 1}.json", 'w') do |segment_file|
segment_file.write segment
end
end
@tannie
Copy link
Author

tannie commented Mar 22, 2022

added

bytes_re = /((?:\\)+|[^\\])(?:\u[0-9a-f]{4})+/
content = file.read.gsub(bytes_re) do |bad_unicode|
$1 + eval(%Q{"#{bad_unicode[$1.size..-1].gsub('\u00', '\x')}"}).to_json[1...-1]
end

to deal with Faceboook bad unicode

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment