Created
May 6, 2017 11:19
-
-
Save ragusa87/a4f246840b9a27d18a12f64e75eff2f4 to your computer and use it in GitHub Desktop.
Data transform "page_history" column of a CSV file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* @author Laurent Constantin | |
* Alter the 'page_history' column of a CSV file with emotions. | |
* A map between page number and emotion's name is required in a csv format. | |
*/ | |
class WhatDoYouDo4Love | |
{ | |
/** | |
* Reads a CSV and return it as an array | |
* | |
* @param string $filename | |
* | |
* @param string $separator | |
* | |
* @return mixed[][] csv content | |
*/ | |
protected static function getCsvContent($filename, $separator = ",") | |
{ | |
$result = []; | |
if (($handle = fopen($filename, "r")) !== false) { | |
while (($data = fgetcsv($handle, null, $separator)) !== false) { | |
$result[] = $data; | |
} | |
fclose($handle); | |
} else { | |
throw new \RuntimeException("Cannot open file $filename"); | |
} | |
return $result; | |
} | |
/** | |
* Remove all empty lines from array | |
* | |
* @param array $data | |
* | |
* @return array | |
*/ | |
protected static function clearEmptyLines(array $data) | |
{ | |
return array_filter( | |
$data, | |
function ($el) { | |
if (is_array($el)) { | |
$el = self::clearEmptyLines($el); | |
} | |
return false === empty($el); | |
} | |
); | |
} | |
/** | |
* Read the codeBook that is used to replace values. | |
* Ex: | |
* > Happy1,5367060 | |
* > Neutral1,5367075 | |
* > Tendress1,5367064 | |
* Will be an array [5367060 => Happy1, 5367075 => Neutral1, etc..] | |
* | |
* @param $codeBookFile | |
* | |
* @return array Emotion indexed by code | |
*/ | |
public static function createCodeBookMap($codeBookFile) | |
{ | |
$data = self::getCsvContent($codeBookFile); | |
$data = self::clearEmptyLines($data); | |
$map = []; | |
foreach ($data as $line) { | |
$map[$line[1]] = $line[0]; | |
} | |
return $map; | |
} | |
/** | |
* Transform the column | |
* | |
* @param string $dataFile Filename for data | |
* @param string $codeBookFile Filename for codeBook | |
* @param string $columnName Column name to replace for the page history | |
* @param string $historySeparator Separator to use inside the page history | |
* | |
* @return mixed[][] | |
*/ | |
public static function replacePageHistory( | |
$dataFile, | |
$codeBookFile, | |
$columnName = "page_history", | |
$historySeparator = "," | |
) { | |
$data = self::getCsvContent($dataFile); | |
// Avoid empty file | |
if (empty($data)) { | |
throw new \RuntimeException("$dataFile is empty"); | |
} | |
// Search column 'page_history' in headers | |
$headers = array_shift($data); | |
$index = array_search($columnName, $headers); | |
if (false === $index) { | |
throw new \RuntimeException("Header $columnName not found"); | |
} | |
// Replace them with value in the map | |
$map = self::createCodeBookMap($codeBookFile); | |
foreach ($data as $key => $line) { | |
// get the page history value | |
$pageHistory = explode($historySeparator, $data[$key][$index]); | |
// Remove unknown values | |
$pageHistory = array_filter( | |
$pageHistory, | |
function ($el) use ($map) { | |
return isset($map[$el]); | |
} | |
); | |
// Replace values with emotions | |
$pageHistory = array_map( | |
function ($el) use ($map) { | |
return isset($map[$el]) ? $map[$el] : $el; | |
} | |
, | |
$pageHistory | |
); | |
// Put it back into data | |
$data[$key][$index] = implode($historySeparator, $pageHistory); | |
} | |
// Re-add header | |
array_unshift($data, $headers); | |
return $data; | |
} | |
/** | |
* Create a new CSV file | |
* @param string $fileName | |
* @param array $data | |
*/ | |
public static function buildCsvFile($fileName, array $data) | |
{ | |
$output = fopen($fileName, 'w'); | |
if (false === $output) { | |
throw new \RuntimeException("Cannot open $fileName for writing"); | |
} | |
foreach ($data as $line) { | |
fputcsv($output, $line); | |
} | |
fclose($output); | |
} | |
} | |
error_reporting(E_ALL); | |
$dataFile = "data_project_797147_2017_05_06.csv"; | |
$data = WhatDoYouDo4Love::replacePageHistory($dataFile, "codebook_project_797147_2017_05_05.csv"); | |
$output = sprintf("%s-%s.csv", time(), $dataFile); | |
WhatDoYouDo4Love::buildCsvFile($output, $data); | |
printf("File created as '%s'\n", $output); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment