Last active
December 28, 2016 09:34
-
-
Save flangofas/22c5a980bddf80a979ad97179eba4756 to your computer and use it in GitHub Desktop.
Creating JSON with million records
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "aflangofas/json", | |
"require": { | |
"fzaninotto/faker": "^1.6" | |
}, | |
"authors": [ | |
{ | |
"name": "Antonis Flangofas", | |
"email": "antonisflags@gmail.com" | |
} | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$folder = 'output/'; | |
$files = array_diff(scandir($folder), ['.', '..']); | |
foreach ($files as $file) { | |
echo 'Importing ' . $file . PHP_EOL; | |
$output = shell_exec('mongoimport --db test --collection users --jsonArray --file output/' . $file); | |
echo $output . PHP_EOL; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require __DIR__ . '/vendor/autoload.php'; | |
use Faker\Factory; | |
$iterations = isset($argv[1]) ? $argv[1] : 5; | |
$entries = isset($argv[2]) ? $argv[2] : 6000; | |
function convert($size) | |
{ | |
$unit=array('b','kb','mb','gb','tb','pb'); | |
return @round($size/pow(1024,($i=floor(log($size,1024)))),2).' '.$unit[$i]; | |
} | |
function random_float ($min,$max) { | |
return ($min+lcg_value()*(abs($max-$min))); | |
} | |
function array_rand_value($container = []) | |
{ | |
if (empty($container)) { | |
return false; | |
} | |
$key = array_rand($container); | |
return $container[$key]; | |
}; | |
function createEmails($num = 1) | |
{ | |
$faker = Factory::create(); | |
$emails = []; | |
do { | |
$emails[] = [ | |
'value' => $faker->email, | |
'type' => array_rand_value(['personal', 'work']) | |
]; | |
$num --; | |
} while ($num); | |
return $emails; | |
} | |
function createPhoneNumbers($num = 1) | |
{ | |
$faker = Factory::create(); | |
$numbers = []; | |
$default = true; | |
do { | |
$numbers[] = [ | |
'value' => $faker->phoneNumber(), | |
'type' => array_rand_value(['personal', 'work']), | |
'is_Default' => $default | |
]; | |
$num --; | |
$default = false; | |
} while ($num); | |
return $numbers; | |
} | |
function createAddresses($num = 1) | |
{ | |
$faker = Factory::create(); | |
$addresses = []; | |
do { | |
$addresses[] = [ | |
'type' => array_rand_value(['work', 'home']), | |
'address' => $faker->address(), | |
'city' => $faker->city(), | |
'zip' => $faker->postcode(), | |
'country' => $faker->country(), | |
'state' => $faker->stateAbbr() | |
]; | |
$num --; | |
} while ($num); | |
return $addresses; | |
} | |
function createResults($options) | |
{ | |
$faker = Factory::create(); | |
extract($options); | |
$results = [ | |
"finalScore" => number_format(random_float(0.00, 10.00), 2, '.', ''), | |
"dateSent"=> $serviceDate, | |
"dateCompleted" => $serviceDate, | |
"surveyDetails" => [ | |
"On a scale of 0-10, please rate your level of satisfaction with your service(s) (with 10 being the highest level of satisfaction):" => rand(0,10), | |
"What is the likelihood that you would suggest Vein Clinics of America to a friend or family member interested in similar services? (10 being the greatest likelihood):" => rand(0,10), | |
"The Office Staff: The Scheduling Process was Easy and Convenient" => array_rand_value(['Disagree Strongly', 'Disagree', 'Neutral', 'Agree', 'Agree Strongly']), | |
"The Office Staff: Your Greeting and Send-Off were Warm and Professionalt" => array_rand_value(['Disagree Strongly', 'Disagree', 'Neutral', 'Agree', 'Agree Strongly']), | |
"The Office Staff: Your Wait was Brief and Comfortable" => array_rand_value(['Disagree Strongly', 'Disagree', 'Neutral', 'Agree', 'Agree Strongly']), | |
"The Office Staff: The Payment Process was Easy and Convenient" => array_rand_value(['Disagree Strongly', 'Disagree', 'Neutral', 'Agree', 'Agree Strongly']), | |
"What factors were most important in choosing this location for your procedure? (choose any that apply)" => array_rand_value(["Confidence that I would get great results","Positive Consultation Experience","Staff Expertise","Travel (Distance)"]), | |
"Getting to Know You: For the next few questions, please choose the answer that accurately describes you, starting with Gender:" => array_rand_value(['male', 'female', 'prefer not to say']) | |
] | |
]; | |
$emptyChance = rand(0, 4); | |
if (!$emptyChance) { | |
return []; | |
} | |
return $results; | |
} | |
function createEvents($num = 50, $options = []) | |
{ | |
$events = []; | |
$faker = Factory::create(); | |
$serviceDate = $faker->dateTimeBetween('-3 years', 'now'); | |
// $completedDate = $serviceDate->add(new DateInterval('P10D'))->format('Y-m-d'); | |
$serviceDate = $serviceDate->format('Y-m-d'); | |
$cId = intval( "0" . rand(1,9) . rand(0,9) . rand(0,9) . rand(0,9) . rand(0,9) . rand(0,9) . rand(0,9)); | |
extract($options); | |
do { | |
$events[] = [ | |
'clientBrandID' => $id, | |
'surveyType' => array_rand_value([5005, 5004, 5003, 5002, 5001]), | |
'location'=> $city, | |
'clientContactID' => $cId, | |
"firstName" => $firstname, | |
"lastName" => $lastname, | |
"birthDate" => $dob, | |
"email" => $email['value'], | |
"address" => $address, | |
"city" => $city, | |
"state" => $state, | |
"zip" => $zip, | |
"consultDate" => "", | |
"consultantName" => "", | |
"consultServiceType" => "", | |
"serviceDate" => 2, | |
"serviceProviderName" => "Carolyn Thas, RN", | |
"serviceType" => array_rand_value(['MVD', 'DVM', 'MDV', 'DVMM', 'MMM']), | |
"consultCancellationDate" => "", | |
"serviceRevenue" => number_format(random_float(0.00,20000.00), 2, '.', ''), | |
"userVarChar4" => "Insurance", | |
"leadSource" => array_rand_value(['DTC', 'Phone', 'Website', 'Social Media', 'Other']), | |
"results" => createResults(array_merge($options, ['serviceDate' => $serviceDate])) | |
]; | |
$num --; | |
} while ($num); | |
return $events; | |
} | |
function createClients($options) | |
{ | |
global $dob, $email; | |
$result = []; | |
$clientIds = [35, 67, 73, 78, 82]; | |
$clientIds = array_slice($clientIds, rand(0, count($clientIds) - 1), count($clientIds)); | |
extract($options); | |
foreach ($clientIds as $id) { | |
$result[] = [ | |
'clientId' => $id, | |
'mainInfo' => [ | |
"firstName" => $firstname, | |
"lastName" => $lastname, | |
"birthDate" => $dob, | |
"email" => $email['value'], | |
"address" => $address, | |
"city" => $city, | |
"state" => $state, | |
"zip" => $zip, | |
], | |
'events' => createEvents(rand(1,5), array_merge($options, ['dob' => $dob, 'email' => $email, 'id' => $id])) | |
]; | |
} | |
return $result; | |
} | |
$faker = Factory::create(); | |
$c = []; | |
for ($y = 1; $y <= $iterations; $y++) { | |
for ($i = 0; $i < $entries; $i++) { | |
//survey taker | |
$firstname = $faker->firstName; | |
$lastname = $faker->lastName; | |
$dob = $faker->dateTimeThisCentury->format('Y-m-d'); | |
$emails = createEmails(rand(1,2)); | |
$email = $emails[0]; | |
$addresses = createAddresses(rand(1,2)); | |
$contact = $addresses[0]; | |
$options = array_merge($contact, ['firstname' => $firstname, 'lastname' => $lastname, 'email' => $email]); | |
extract($contact); | |
$container = [ | |
'name' => [ | |
'firstname' => $firstname, | |
'lastname' => $lastname, | |
], | |
'emails' => $emails, | |
'phoneNumbers' => createPhoneNumbers(rand(1,2)), | |
'addresses' => $addresses, | |
'dob' => $dob, | |
'clients' => createClients($options) | |
]; | |
$c[] = $container; | |
} | |
echo "Batch " . $y . PHP_EOL; | |
echo "Memory usage " . convert(memory_get_usage(true)) . PHP_EOL; | |
$fileName = 'user-dataset-' . $y . '.json'; | |
file_put_contents('output/' . $fileName, json_encode($c), JSON_PRETTY_PRINT); | |
$container = $firstname = $contact = $emails = $addresses = $fileName = $c = null; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment