Skip to content

Instantly share code, notes, and snippets.

@rizqidjamaluddin
Created April 3, 2017 05:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rizqidjamaluddin/10a8ffa95432ec74c42301f683a7d783 to your computer and use it in GitHub Desktop.
Save rizqidjamaluddin/10a8ffa95432ec74c42301f683a7d783 to your computer and use it in GitHub Desktop.
<?php
$file = fopen("src.txt", 'r');
$out = fopen("result.csv", 'w');
fputcsv($out, ['first_name', 'last_name', 'address', 'city', 'state', 'zip']);
$buffer = [];
$counter = 0;
while(true) {
$line = fgets($file);
if (!$line) break;
if (strlen(trim($line)) == 0){
echo "\n";
continue;
}
$matches = [];
if (substr($line, 0, 1) == '#') {
// is line with ML Number
// dump existing buffer
if (!empty($buffer)) {
$counter++;
echo str_repeat('-', 50);
echo "\n Saving #" . ($counter) ."\n";
echo str_repeat('-', 50) . "\n\n";
fputcsv($out, $buffer);
$buffer = [];
}
printf("%-100s %s", trim($line), "New address detected.\n");
continue;
}
if (substr($line, 0, 5) == 'Attn:') {
// is name line
$name = trim(substr($line, 5));
// get rid of spare period in name
if (substr($name, -1) == '.') {
$name = substr($name, 0, -1);
}
$buffer[0] = $name;
$buffer[1] = '';
printf("%-100s %s", trim($line), "Name found. {$buffer[0]} \n");
continue;
}
if (preg_match('/^([A-Z]{2}) (\d{5})/', $line, $matches) ||
preg_match('/^([A-Z]{2})(\d{5})/', $line, $matches)) {
// is state/zip
$buffer[4] = $matches[1];
$buffer[5] = $matches[2];
printf("%-100s %s", trim($line), "State/zip found. {$buffer[4]} ; {$buffer[5]} \n");
continue;
}
if (preg_match('/\(\d{3}\)/', $line) || preg_match('/\d{3}-\d{4}/', $line)) {
// phone number, ignore
printf("%-100s %s", trim($line), "Phone number; skipping. \n");
continue;
}
// is address
$firstLine = trim($line);
$secondLine = trim(fgets($file));
$city = trim(fgets($file));
// get rid of spare comma in address
if (substr($secondLine, -1) == ',') {
$secondLine = substr($secondLine, 0, -1);
}
$buffer[2] = $firstLine . ' ' . $secondLine;
$buffer[3] = $city;
printf("%-100s %s", $firstLine, "Company name in address line found.\n");
printf("%-100s %s", $secondLine, "Main address line found. Full: {$buffer[2]} \n");
printf("%-100s %s", $city, "City line found. Full: {$buffer[3]} \n");
}
if (!empty($buffer)) {
$counter++;
fputcsv($out, $buffer);
$buffer = [];
echo str_repeat('-', 50);
echo "\n Saving #" . ($counter) ."\n";
echo str_repeat('-', 50) . "\n\n";
}
echo "Done, $counter entries.\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment