Skip to content

Instantly share code, notes, and snippets.

@RandomArray
Last active June 22, 2021 23:23
Show Gist options
  • Save RandomArray/20ee2a86680a9940ca62 to your computer and use it in GitHub Desktop.
Save RandomArray/20ee2a86680a9940ca62 to your computer and use it in GitHub Desktop.
Imports a CSV file into a MySQL database line-by-line. Displays output timer, a count, and ok/fail status. This was created to import a 200mb CSV file with over 200,000 rows. After many failed attempts to import (Out of Memory errors), I wrote this script.
<?php
ini_set('max_execution_time', 0);
header( 'Content-type: text/html; charset=utf-8' ); // Stream output
echo '<div id="o"></div>
<script> function u(m){var e = document.getElementById("o");e.innerHTML=m;} </script>';
$startTime = time();
define('DB_HOST','localhost');
define('DB_NAME','mytestdb');
define('DB_USER','username');
define('DB_PASS','password');
$db = new PDO('mysql:host='.DB_HOST.';dbname='.DB_NAME, DB_USER, DB_PASS);
$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
$rcount = 0;
$count = 0;
if(($handle = fopen('LargeCSVFile.csv', 'r')) !== false)
{
$count++;
// loop through the file line-by-line
while(($data = fgetcsv($handle)) !== false)
{
// The CSV had 35 columns. Change the code below to match number of columns in your CSV.
$sql= "INSERT INTO `mytable` (`col0`, `col1`, `col2`, `col3`, `col4`, `col5`, `col6`, `col7`, `col8`, `col9`, `col10`, `col11`, `col12`, `col13`, `col14`, `col15`, `col16`, `col17`, `col18`, `col19`, `col20`, `col21`, `col22`, `col23`, `col24`, `col25`, `col26`, `col27`, `col28`, `col29`, `col30`, `col31`, `col32`, `col33`, `col34`) VALUES ( :data0, :data1, :data2, :data3, :data4, :data5, :data6, :data7, :data8, :data9, :data10, :data11, :data12, :data13, :data14, :data15, :data16, :data17, :data18, :data19, :data20, :data21, :data22, :data23, :data24, :data25, :data26, :data27, :data28, :data29, :data30, :data31, :data32, :data33, :data34 );";
$stmt = $db->prepare($sql);
$stmt->bindParam(':data0', $data[0], PDO::PARAM_STR);
$stmt->bindParam(':data1', $data[1], PDO::PARAM_STR);
$stmt->bindParam(':data2', $data[2], PDO::PARAM_STR);
$stmt->bindParam(':data3', $data[3], PDO::PARAM_STR);
$stmt->bindParam(':data4', $data[4], PDO::PARAM_STR);
$stmt->bindParam(':data5', $data[5], PDO::PARAM_STR);
$stmt->bindParam(':data6', $data[6], PDO::PARAM_STR);
$stmt->bindParam(':data7', $data[7], PDO::PARAM_STR);
$stmt->bindParam(':data8', $data[8], PDO::PARAM_STR);
$stmt->bindParam(':data9', $data[9], PDO::PARAM_STR);
$stmt->bindParam(':data10', $data[10], PDO::PARAM_STR);
$stmt->bindParam(':data11', $data[11], PDO::PARAM_STR);
$stmt->bindParam(':data12', $data[12], PDO::PARAM_STR);
$stmt->bindParam(':data13', $data[13], PDO::PARAM_STR);
$stmt->bindParam(':data14', $data[14], PDO::PARAM_STR);
$stmt->bindParam(':data15', $data[15], PDO::PARAM_STR);
$stmt->bindParam(':data16', $data[16], PDO::PARAM_STR);
$stmt->bindParam(':data17', $data[17], PDO::PARAM_STR);
$stmt->bindParam(':data18', $data[18], PDO::PARAM_STR);
$stmt->bindParam(':data19', $data[19], PDO::PARAM_STR);
$stmt->bindParam(':data20', $data[20], PDO::PARAM_STR);
$stmt->bindParam(':data21', $data[21], PDO::PARAM_STR);
$stmt->bindParam(':data22', $data[22], PDO::PARAM_STR);
$stmt->bindParam(':data23', $data[23], PDO::PARAM_STR);
$stmt->bindParam(':data24', $data[24], PDO::PARAM_STR);
$stmt->bindParam(':data25', $data[25], PDO::PARAM_STR);
$stmt->bindParam(':data26', $data[26], PDO::PARAM_STR);
$stmt->bindParam(':data27', $data[27], PDO::PARAM_STR);
$stmt->bindParam(':data28', $data[28], PDO::PARAM_STR);
$stmt->bindParam(':data29', $data[29], PDO::PARAM_STR);
$stmt->bindParam(':data30', $data[30], PDO::PARAM_STR);
$stmt->bindParam(':data31', $data[31], PDO::PARAM_STR);
$stmt->bindParam(':data32', $data[32], PDO::PARAM_STR);
$stmt->bindParam(':data33', $data[33], PDO::PARAM_STR);
$stmt->bindParam(':data34', $data[34], PDO::PARAM_STR);
$r = $stmt->execute(); // Change to just $stmt->execute(); and remove all comments to speed up. Who needs output?
// Only output one line every 250 of lines to save resources
// For some reason the output lags behind the actual row count in mysql.
// The lag gets worse as the program runs.. Comment out from here to line 81 to stop output.
$rcount++;
if($rcount>=250){
if($r){
$o = '<script>u("'.timedif($startTime).' - '.$count.' - OK")</script>';
}else{
$o = '<script>u("'.timedif($startTime).' - '.$count.' - FAIL")</script>';
}
echo $o;
flush();
ob_flush();
$rcount=0;
}
unset($data);
$count++;
}
fclose($handle);
}
function timedif($start){
return gmdate("H:i:s", (time()-$start));
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment