Skip to content

Instantly share code, notes, and snippets.

@dejurin
Last active July 23, 2018 07:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dejurin/6de3043ab1ac0ba6157c4dd7a2bb176e to your computer and use it in GitHub Desktop.
Save dejurin/6de3043ab1ac0ba6157c4dd7a2bb176e to your computer and use it in GitHub Desktop.
csv to sql
<?php
class FileCache {
/**
* The root cache directory.
*
* @var string
*/
private $cache_dir = '/tmp/cache';
/**
* Creates a FileCache object.
*
* @param array $options
*/
public function __construct(array $options = array()) {
$available_options = array('cache_dir');
foreach ($available_options as $name) {
if (isset($options[$name])) {
$this->$name = $options[$name];
}
}
}
/**
* Fetches an entry from the cache.
*
* @param string $id
*/
public function get($id) {
$file_name = $this->getFileName($id);
if (!is_file($file_name) || !is_readable($file_name)) {
return false;
}
$lines = file($file_name);
$lifetime = array_shift($lines);
$lifetime = (int)trim($lifetime);
if ($lifetime !== 0 && $lifetime < time()) {
@unlink($file_name);
return false;
}
$serialized = join('', $lines);
$data = unserialize($serialized);
return $data;
}
/**
* Deletes a cache entry.
*
* @param string $id
*
* @return bool
*/
public function delete($id) {
$file_name = $this->getFileName($id);
return unlink($file_name);
}
/**
* Puts data into the cache.
*
* @param string $id
* @param mixed $data
* @param int $lifetime
*
* @return bool
*/
public function save($id, $data, $lifetime = 3600) {
$dir = $this->getDirectory($id);
if (!is_dir($dir)) {
if (!mkdir($dir, 0755, true)) {
return false;
}
}
$file_name = $this->getFileName($id);
$lifetime = time() + $lifetime;
$serialized = serialize($data);
$result = file_put_contents($file_name, $lifetime . PHP_EOL . $serialized);
if ($result === false) {
return false;
}
return true;
}
//------------------------------------------------
// PRIVATE METHODS
//------------------------------------------------
/**
* Fetches a directory to store the cache data.
*
* @param string $id
*
* @return string
*/
protected function getDirectory($id) {
$hash = sha1($id, false);
$dirs = array($this->getCacheDirectory(), substr($hash, 0, 2), substr($hash, 2, 2),);
return join(DIRECTORY_SEPARATOR, $dirs);
}
/**
* Fetches a base directory to store the cache data.
*
* @return string
*/
protected function getCacheDirectory() {
return $this->cache_dir;
}
/**
* Fetches a file path of the cache data.
*
* @param string $id
*
* @return string
*/
protected function getFileName($id) {
$directory = $this->getDirectory($id);
$hash = sha1($id, false);
$file = $directory . DIRECTORY_SEPARATOR . $hash . '.cache';
return $file;
}
}
// functions
function rrmdir($dir) {
if (is_dir($dir)) {
$objects = scandir($dir);
foreach ($objects as $object) {
if ($object != '.' && $object != '..') {
if (filetype($dir . '/' . $object) == 'dir') {
rrmdir($dir . '/' . $object);
} else {
unlink($dir . '/' . $object);
}
}
}
reset($objects);
rmdir($dir);
}
}
function mb_ucwords($str) {
$str = mb_convert_case(strtolower($str), MB_CASE_TITLE, 'UTF-8');
return addslashes($str);
}
function get_count_line($file) {
$linecount = 0;
$handle = fopen($file, 'r');
while (!feof($handle)) {
$line = fgets($handle);
++$linecount;
}
fclose($handle);
return $linecount;
}
function get_csv_files($dir) {
$arr = scandir($dir);
$_arr = [];
foreach ($arr as $value) {
if (pathinfo($value, PATHINFO_EXTENSION) == 'csv') {
$_arr[] = $value;
}
}
return $_arr;
}
function url_title($str, $separator = '-', $lowercase = false) {
if ($separator === 'dash') {
$separator = '-';
} elseif ($separator === 'underscore') {
$separator = '_';
}
$q_separator = preg_quote($separator, '#');
$trans = array('&.+?;' => '', '[^\w\d _-]' => '', '\s+' => $separator, '(' . $q_separator . ')+' => $separator,);
$str = strip_tags($str);
foreach ($trans as $key => $val) {
$str = preg_replace('#' . $key . '#i' . (UTF8_ENABLED ? 'u' : ''), $val, $str);
}
if ($lowercase === true) {
$str = strtolower($str);
}
return trim(trim($str, $separator));
}
$curr_path = dirname(__FILE__);
$sql_head = 'INSERT INTO `addresses` (`id`, `lot`, `lat`, `number`, `street`, `unit`, `city`, `district`, `region`, `postcode`, `state`) VALUES' . PHP_EOL;
$dir = basename($curr_path);
$sql = '';
if (is_dir($curr_path . DIRECTORY_SEPARATOR . 'cache')) {
echo 'Start delete all cache' . PHP_EOL;
exec('rm -rf ' . $curr_path . DIRECTORY_SEPARATOR . 'cache');
echo 'End delete all cache' . PHP_EOL;
}
$cache = new FileCache(['cache_dir' => $curr_path . DIRECTORY_SEPARATOR . 'cache']);
$files = get_csv_files($curr_path);
foreach ($files as $file) {
echo $file . PHP_EOL;
$z = 0;
if (file_exists($curr_path . DIRECTORY_SEPARATOR . $file . '.sql')) {
unlink($curr_path . DIRECTORY_SEPARATOR . $file . '.sql');
}
if (($handle = fopen($curr_path . DIRECTORY_SEPARATOR . $file, 'r')) !== false) {
while (($data = fgetcsv($handle, 1000, ',')) !== false) {
++$z;
if ($z > 1 && (strlen($data[0]) > 3 && strlen($data[1]) > 3) && strlen($data[3]) > 3) {
$_city = mb_ucwords(empty($data[5]) ? str_replace(['statewide', 'city_of_', 'town_of_', 'statewide', 'providence', '_'], ['', '', '', '', '', ' '], pathinfo($curr_path . DIRECTORY_SEPARATOR . $file, PATHINFO_FILENAME)) : $data[5]);
// tmp
$id = 'map/' . strtolower(url_title($curr_path . DIRECTORY_SEPARATOR . $file . DIRECTORY_SEPARATOR . $data[2] . $data[3] . $_city, 'underscore'));
if (!$cache->get($id)) {
$cache->save($id, 1, 600);
$sql.= "(NULL, '" . $data[0] . "', '" . $data[1] . "', '" . mb_ucwords($data[2]) . "', '" . mb_ucwords($data[3]) . "', '" . mb_ucwords($data[4]) . "', '" . $_city . "', '" . mb_ucwords($data[6]) . "', '" . mb_ucwords($data[7]) . "', '" . mb_ucwords($data[8]) . "', '" . $dir . "')," . PHP_EOL;
}
}
if ($z % 500 == 0 && !empty($sql)) {
echo $z . PHP_EOL;
$sql = $sql_head . substr($sql, 0, -2) . ';' . PHP_EOL;
file_put_contents($curr_path . DIRECTORY_SEPARATOR . $file . '.sql', $sql, FILE_APPEND | LOCK_EX);
$sql = '';
}
}
fclose($handle);
}
}
if (is_dir($curr_path . DIRECTORY_SEPARATOR . 'cache')) {
echo 'Start delete all cache' . PHP_EOL;
exec('rm -rf ' . $curr_path . DIRECTORY_SEPARATOR . 'cache');
echo 'End delete all cache' . PHP_EOL;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment