Skip to content

Instantly share code, notes, and snippets.

@ragusa87
Created May 6, 2017 11:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ragusa87/a4f246840b9a27d18a12f64e75eff2f4 to your computer and use it in GitHub Desktop.
Save ragusa87/a4f246840b9a27d18a12f64e75eff2f4 to your computer and use it in GitHub Desktop.
Data transform "page_history" column of a CSV file
<?php
/**
* @author Laurent Constantin
* Alter the 'page_history' column of a CSV file with emotions.
* A map between page number and emotion's name is required in a csv format.
*/
class WhatDoYouDo4Love
{
/**
* Reads a CSV and return it as an array
*
* @param string $filename
*
* @param string $separator
*
* @return mixed[][] csv content
*/
protected static function getCsvContent($filename, $separator = ",")
{
$result = [];
if (($handle = fopen($filename, "r")) !== false) {
while (($data = fgetcsv($handle, null, $separator)) !== false) {
$result[] = $data;
}
fclose($handle);
} else {
throw new \RuntimeException("Cannot open file $filename");
}
return $result;
}
/**
* Remove all empty lines from array
*
* @param array $data
*
* @return array
*/
protected static function clearEmptyLines(array $data)
{
return array_filter(
$data,
function ($el) {
if (is_array($el)) {
$el = self::clearEmptyLines($el);
}
return false === empty($el);
}
);
}
/**
* Read the codeBook that is used to replace values.
* Ex:
* > Happy1,5367060
* > Neutral1,5367075
* > Tendress1,5367064
* Will be an array [5367060 => Happy1, 5367075 => Neutral1, etc..]
*
* @param $codeBookFile
*
* @return array Emotion indexed by code
*/
public static function createCodeBookMap($codeBookFile)
{
$data = self::getCsvContent($codeBookFile);
$data = self::clearEmptyLines($data);
$map = [];
foreach ($data as $line) {
$map[$line[1]] = $line[0];
}
return $map;
}
/**
* Transform the column
*
* @param string $dataFile Filename for data
* @param string $codeBookFile Filename for codeBook
* @param string $columnName Column name to replace for the page history
* @param string $historySeparator Separator to use inside the page history
*
* @return mixed[][]
*/
public static function replacePageHistory(
$dataFile,
$codeBookFile,
$columnName = "page_history",
$historySeparator = ","
) {
$data = self::getCsvContent($dataFile);
// Avoid empty file
if (empty($data)) {
throw new \RuntimeException("$dataFile is empty");
}
// Search column 'page_history' in headers
$headers = array_shift($data);
$index = array_search($columnName, $headers);
if (false === $index) {
throw new \RuntimeException("Header $columnName not found");
}
// Replace them with value in the map
$map = self::createCodeBookMap($codeBookFile);
foreach ($data as $key => $line) {
// get the page history value
$pageHistory = explode($historySeparator, $data[$key][$index]);
// Remove unknown values
$pageHistory = array_filter(
$pageHistory,
function ($el) use ($map) {
return isset($map[$el]);
}
);
// Replace values with emotions
$pageHistory = array_map(
function ($el) use ($map) {
return isset($map[$el]) ? $map[$el] : $el;
}
,
$pageHistory
);
// Put it back into data
$data[$key][$index] = implode($historySeparator, $pageHistory);
}
// Re-add header
array_unshift($data, $headers);
return $data;
}
/**
* Create a new CSV file
* @param string $fileName
* @param array $data
*/
public static function buildCsvFile($fileName, array $data)
{
$output = fopen($fileName, 'w');
if (false === $output) {
throw new \RuntimeException("Cannot open $fileName for writing");
}
foreach ($data as $line) {
fputcsv($output, $line);
}
fclose($output);
}
}
error_reporting(E_ALL);
$dataFile = "data_project_797147_2017_05_06.csv";
$data = WhatDoYouDo4Love::replacePageHistory($dataFile, "codebook_project_797147_2017_05_05.csv");
$output = sprintf("%s-%s.csv", time(), $dataFile);
WhatDoYouDo4Love::buildCsvFile($output, $data);
printf("File created as '%s'\n", $output);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment