Last active
December 15, 2015 11:19
-
-
Save sharapeco/5252509 to your computer and use it in GitHub Desktop.
Excel から出力した UTF-16LE のタブ区切りテキストを UTF-8 の CSV ファイルにするスクリプト、と、それをさらに JSON にするスクリプトと Makefile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
main($argv[1]); | |
function main($src) { | |
$fh = fopen($src, 'r'); | |
$rows = array(); | |
while ($row = fgetcsv($fh, 0)) { | |
$rows[] = $row; | |
} | |
fclose($fh); | |
$header = array_shift($rows); | |
$assoc = array(); | |
foreach ($rows as $cols) { | |
$arow = array(); | |
foreach ($cols as $i => $val) { | |
if (isset($header[$i])) { | |
$key = $header[$i]; | |
if ($val !== '') { | |
$arow[$key] = $val; | |
} | |
} | |
} | |
$assoc[] = $arow; | |
} | |
echo json_encode($assoc); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SRC_DIR = src | |
TMP_DIR = utf8 | |
DEST_DIR = json | |
CSVS = $(wildcard $(addsuffix /*.csv,$(SRC_DIR))) | |
JSONS = $(subst $(SRC_DIR)/,$(DEST_DIR)/,$(CSVS:.csv=.json)) | |
.PHONY: all | |
all: $(JSONS) | |
$(TMP_DIR)/%.csv: $(SRC_DIR)/%.csv | |
php tsv_utf16le_to_csv_utf8.php $< > $@ | |
$(DEST_DIR)/%.json: $(TMP_DIR)/%.csv | |
php csv_to_json.php $< > $@ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
main($argv[1]); | |
function main($src) { | |
$doc = file_get_contents($src); | |
if (preg_match('/^(?:\xff\xfe|\xfe\xff)/', $doc)) { | |
$doc = substr($doc, 2); // BOM を削除 | |
} | |
$doc = mb_convert_encoding($doc, 'UTF-8', 'UTF-16LE'); | |
$rows = explode(PHP_EOL, $doc); | |
foreach ($rows as &$row) { | |
$row = preg_replace('/[ ]{10,}/u', "\n", $row); | |
$cols = explode("\t", $row); | |
$row = implode(",", $cols); | |
} unset($row); | |
echo implode("\n", $rows); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
13行目くらいでスペースの連続を改行に直しているのは、セル内で改行する方法が分からなくてスペースをやたらと入れている人がいたため。