Skip to content

Instantly share code, notes, and snippets.

@juampi92
Created May 17, 2023 15:03
Show Gist options
  • Save juampi92/863c42eaf17f6f0e1d23c0c90d4b976d to your computer and use it in GitHub Desktop.
Save juampi92/863c42eaf17f6f0e1d23c0c90d4b976d to your computer and use it in GitHub Desktop.

Chunk-read CSV from S3

I can't manage to do this using Leage\Csv\Read for some reason. The CSV starts from an arbitrary point.

This is why I made this.

Reading from S3

$s3 = new S3Client([...]);
$s3->registerStreamWrapper();

$stream = fopen("s3://{$bucket}/{$path}", 'r');

if (!is_resource($stream)) {
    throw new RuntimeException('Could not stream the csv');
}

Using the generator

$csv = new ChunkCsvReader($stream, chunkSize: 1_000);

$iterator = $csv->iterator();

foreach ($iterator as $chunk) {
  dump('Memory usage in kb: ' . memory_get_usage(true) / 1024);
  
  foreach ($chunk as $item) {
    dump($item['id'] - $item['type']);
  }
}
<?php
namespace App\Support;
use Generator;
class ChunkCsvReader
{
protected array|false $header;
/**
* @param resource $stream
*/
public function __construct(
private $stream,
private readonly string $separator = ',',
private readonly string $enclosure = '"',
private readonly string $escape = '\\',
private readonly int $chunkSize = 1000,
) {
$this->header = fgetcsv($this->stream, 0, $this->separator, $this->enclosure, $this->escape);
}
public function iterator(): Generator
{
while (true) {
$chunk = $this->chunk();
if (empty($chunk)) {
return;
}
yield $chunk;
}
}
private function chunk(): array
{
$chunk = [];
for ($index = 0; $index < $this->chunkSize; $index++) {
$next = fgetcsv($this->stream, 0, $this->separator, $this->enclosure, $this->escape);
if ($next === false) {
break;
}
$chunk[] = array_combine($this->header, $next);
}
return $chunk;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment