Created
November 23, 2016 10:18
-
-
Save alancoleman/11fd303275d4a70666bba6e9734c77dc to your computer and use it in GitHub Desktop.
PHP Script to scrub and save a csv file. This little script is designed to be used from the terminal, or an emulator like Konsole. The script is called and the csv to be processed is carried across as an argument, something like this: php csv_scrub.php /home/[you]/files/test.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
echo "\n*** CSV Scrub ***\n"; // Message console | |
$csvFile = $argv[1]; // Define $csvFile from Konsole argument | |
/* File */ | |
if (empty($csvFile)) { //Throw error if $csvFile not defined | |
echo "\n*** ERROR: Filename not specified ***\n"; // Message console | |
die(); | |
} else { | |
echo "\n*** File: (" . $csvFile . ") Defined ***\n"; // Message console | |
} | |
if (file_exists($csvFile)) { //Throw error if $csvFile does not exist | |
echo "\n*** File: (" . $csvFile . ") Found ***\n"; // Message console | |
} else { | |
echo "\n*** ERROR: File not found ***\n"; // Message console | |
die(); | |
} | |
$handle = fopen($csvFile, "r+") or exit("\n*** File: Unable to open file ***\n\n"); // Open CSV file defined in console command or exit | |
echo "\n*** File: (" . $csvFile . ") Opened ***\n"; // Message console | |
/* Temp File */ | |
$temp_filename = tempnam('/tmp', 'abcd') or exit("\n*** File: Unable to create tempfile ***\n\n"); // Creatre temp file or exit | |
echo "\n*** Temp File: (" . $temp_filename . ") Created ***\n"; // Message console | |
$file_handle = fopen($temp_filename , 'r+') or exit("\n*** File: Unable to open tempfile ***\n\n"); // Open temp file or exit | |
echo "\n*** Temp File: (" . $temp_filename . ") Opened ***\n"; // Message console | |
/* Pull and edit data from File */ | |
$counter = 0; // Set counter | |
echo "\n*** File: Input ***\n\n"; // Message console | |
while(!feof($handle)) // While loop to process rows in csv file | |
{ | |
$line = fgets($handle); // Set row from file | |
echo $line; // Message console | |
//echo $counter; | |
if ($counter == 0) { // Scrub the first line of the csv file, which will be the header | |
$line = '"' . str_replace(',','","',$line); | |
$line = str_replace('ATTRIBUTE_30','ATTRIBUTE_30"',$line); | |
} else { // Scrub all other lines | |
$line = str_replace('""""""','""',$line); | |
$line = str_replace('"""','"',$line); | |
$line = str_replace('NULL','""',$line); | |
} | |
$counter++; // Increment counter | |
fputs($file_handle, $line); // Write scrubbed line to temp file | |
} | |
echo "\n*** File: (" . $counter . ") Lines edited ***\n"; // Message console | |
echo "\n*** Temp File: Output ***\n\n"; // Message console | |
rewind($file_handle); // Rewind pointer to start | |
echo stream_get_contents($file_handle); // Message console, Read what we have written. | |
rewind($file_handle); // Rewind pointer to start | |
$file_put_location = str_replace('.csv','_scrubbed.csv',$csvFile); // Define location to save file | |
echo "\n\n*** Scrubbed file location: (" . $file_put_location . ") ***\n"; // Message console | |
file_put_contents($file_put_location, stream_get_contents($file_handle)); // Write scrubbed data to a new file in a new location | |
if (file_exists($file_put_location)) { //Throw error if new file does not exist | |
echo "\n*** File: (" . $file_put_location . ") Written ***\n\n\n"; | |
} else { | |
echo "\n*** ERROR: File doesn't seem to have been written ***\n\n"; | |
die(); | |
} | |
fclose($handle); // Close file | |
fclose($file_handle); // Close file | |
unlink($temp_filename ); // Remove temporary file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment