Last active
July 23, 2018 07:54
-
-
Save dejurin/6de3043ab1ac0ba6157c4dd7a2bb176e to your computer and use it in GitHub Desktop.
csv to sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class FileCache { | |
/** | |
* The root cache directory. | |
* | |
* @var string | |
*/ | |
private $cache_dir = '/tmp/cache'; | |
/** | |
* Creates a FileCache object. | |
* | |
* @param array $options | |
*/ | |
public function __construct(array $options = array()) { | |
$available_options = array('cache_dir'); | |
foreach ($available_options as $name) { | |
if (isset($options[$name])) { | |
$this->$name = $options[$name]; | |
} | |
} | |
} | |
/** | |
* Fetches an entry from the cache. | |
* | |
* @param string $id | |
*/ | |
public function get($id) { | |
$file_name = $this->getFileName($id); | |
if (!is_file($file_name) || !is_readable($file_name)) { | |
return false; | |
} | |
$lines = file($file_name); | |
$lifetime = array_shift($lines); | |
$lifetime = (int)trim($lifetime); | |
if ($lifetime !== 0 && $lifetime < time()) { | |
@unlink($file_name); | |
return false; | |
} | |
$serialized = join('', $lines); | |
$data = unserialize($serialized); | |
return $data; | |
} | |
/** | |
* Deletes a cache entry. | |
* | |
* @param string $id | |
* | |
* @return bool | |
*/ | |
public function delete($id) { | |
$file_name = $this->getFileName($id); | |
return unlink($file_name); | |
} | |
/** | |
* Puts data into the cache. | |
* | |
* @param string $id | |
* @param mixed $data | |
* @param int $lifetime | |
* | |
* @return bool | |
*/ | |
public function save($id, $data, $lifetime = 3600) { | |
$dir = $this->getDirectory($id); | |
if (!is_dir($dir)) { | |
if (!mkdir($dir, 0755, true)) { | |
return false; | |
} | |
} | |
$file_name = $this->getFileName($id); | |
$lifetime = time() + $lifetime; | |
$serialized = serialize($data); | |
$result = file_put_contents($file_name, $lifetime . PHP_EOL . $serialized); | |
if ($result === false) { | |
return false; | |
} | |
return true; | |
} | |
//------------------------------------------------ | |
// PRIVATE METHODS | |
//------------------------------------------------ | |
/** | |
* Fetches a directory to store the cache data. | |
* | |
* @param string $id | |
* | |
* @return string | |
*/ | |
protected function getDirectory($id) { | |
$hash = sha1($id, false); | |
$dirs = array($this->getCacheDirectory(), substr($hash, 0, 2), substr($hash, 2, 2),); | |
return join(DIRECTORY_SEPARATOR, $dirs); | |
} | |
/** | |
* Fetches a base directory to store the cache data. | |
* | |
* @return string | |
*/ | |
protected function getCacheDirectory() { | |
return $this->cache_dir; | |
} | |
/** | |
* Fetches a file path of the cache data. | |
* | |
* @param string $id | |
* | |
* @return string | |
*/ | |
protected function getFileName($id) { | |
$directory = $this->getDirectory($id); | |
$hash = sha1($id, false); | |
$file = $directory . DIRECTORY_SEPARATOR . $hash . '.cache'; | |
return $file; | |
} | |
} | |
// functions | |
function rrmdir($dir) { | |
if (is_dir($dir)) { | |
$objects = scandir($dir); | |
foreach ($objects as $object) { | |
if ($object != '.' && $object != '..') { | |
if (filetype($dir . '/' . $object) == 'dir') { | |
rrmdir($dir . '/' . $object); | |
} else { | |
unlink($dir . '/' . $object); | |
} | |
} | |
} | |
reset($objects); | |
rmdir($dir); | |
} | |
} | |
function mb_ucwords($str) { | |
$str = mb_convert_case(strtolower($str), MB_CASE_TITLE, 'UTF-8'); | |
return addslashes($str); | |
} | |
function get_count_line($file) { | |
$linecount = 0; | |
$handle = fopen($file, 'r'); | |
while (!feof($handle)) { | |
$line = fgets($handle); | |
++$linecount; | |
} | |
fclose($handle); | |
return $linecount; | |
} | |
function get_csv_files($dir) { | |
$arr = scandir($dir); | |
$_arr = []; | |
foreach ($arr as $value) { | |
if (pathinfo($value, PATHINFO_EXTENSION) == 'csv') { | |
$_arr[] = $value; | |
} | |
} | |
return $_arr; | |
} | |
function url_title($str, $separator = '-', $lowercase = false) { | |
if ($separator === 'dash') { | |
$separator = '-'; | |
} elseif ($separator === 'underscore') { | |
$separator = '_'; | |
} | |
$q_separator = preg_quote($separator, '#'); | |
$trans = array('&.+?;' => '', '[^\w\d _-]' => '', '\s+' => $separator, '(' . $q_separator . ')+' => $separator,); | |
$str = strip_tags($str); | |
foreach ($trans as $key => $val) { | |
$str = preg_replace('#' . $key . '#i' . (UTF8_ENABLED ? 'u' : ''), $val, $str); | |
} | |
if ($lowercase === true) { | |
$str = strtolower($str); | |
} | |
return trim(trim($str, $separator)); | |
} | |
$curr_path = dirname(__FILE__); | |
$sql_head = 'INSERT INTO `addresses` (`id`, `lot`, `lat`, `number`, `street`, `unit`, `city`, `district`, `region`, `postcode`, `state`) VALUES' . PHP_EOL; | |
$dir = basename($curr_path); | |
$sql = ''; | |
if (is_dir($curr_path . DIRECTORY_SEPARATOR . 'cache')) { | |
echo 'Start delete all cache' . PHP_EOL; | |
exec('rm -rf ' . $curr_path . DIRECTORY_SEPARATOR . 'cache'); | |
echo 'End delete all cache' . PHP_EOL; | |
} | |
$cache = new FileCache(['cache_dir' => $curr_path . DIRECTORY_SEPARATOR . 'cache']); | |
$files = get_csv_files($curr_path); | |
foreach ($files as $file) { | |
echo $file . PHP_EOL; | |
$z = 0; | |
if (file_exists($curr_path . DIRECTORY_SEPARATOR . $file . '.sql')) { | |
unlink($curr_path . DIRECTORY_SEPARATOR . $file . '.sql'); | |
} | |
if (($handle = fopen($curr_path . DIRECTORY_SEPARATOR . $file, 'r')) !== false) { | |
while (($data = fgetcsv($handle, 1000, ',')) !== false) { | |
++$z; | |
if ($z > 1 && (strlen($data[0]) > 3 && strlen($data[1]) > 3) && strlen($data[3]) > 3) { | |
$_city = mb_ucwords(empty($data[5]) ? str_replace(['statewide', 'city_of_', 'town_of_', 'statewide', 'providence', '_'], ['', '', '', '', '', ' '], pathinfo($curr_path . DIRECTORY_SEPARATOR . $file, PATHINFO_FILENAME)) : $data[5]); | |
// tmp | |
$id = 'map/' . strtolower(url_title($curr_path . DIRECTORY_SEPARATOR . $file . DIRECTORY_SEPARATOR . $data[2] . $data[3] . $_city, 'underscore')); | |
if (!$cache->get($id)) { | |
$cache->save($id, 1, 600); | |
$sql.= "(NULL, '" . $data[0] . "', '" . $data[1] . "', '" . mb_ucwords($data[2]) . "', '" . mb_ucwords($data[3]) . "', '" . mb_ucwords($data[4]) . "', '" . $_city . "', '" . mb_ucwords($data[6]) . "', '" . mb_ucwords($data[7]) . "', '" . mb_ucwords($data[8]) . "', '" . $dir . "')," . PHP_EOL; | |
} | |
} | |
if ($z % 500 == 0 && !empty($sql)) { | |
echo $z . PHP_EOL; | |
$sql = $sql_head . substr($sql, 0, -2) . ';' . PHP_EOL; | |
file_put_contents($curr_path . DIRECTORY_SEPARATOR . $file . '.sql', $sql, FILE_APPEND | LOCK_EX); | |
$sql = ''; | |
} | |
} | |
fclose($handle); | |
} | |
} | |
if (is_dir($curr_path . DIRECTORY_SEPARATOR . 'cache')) { | |
echo 'Start delete all cache' . PHP_EOL; | |
exec('rm -rf ' . $curr_path . DIRECTORY_SEPARATOR . 'cache'); | |
echo 'End delete all cache' . PHP_EOL; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment