Skip to content

Instantly share code, notes, and snippets.

@mRoca
Last active February 9, 2022 17:32
Show Gist options
  • Save mRoca/2193c84a03924fb062f95e6b0623a5eb to your computer and use it in GitHub Desktop.
Save mRoca/2193c84a03924fb062f95e6b0623a5eb to your computer and use it in GitHub Desktop.
Export all MySQL related data - One row and all its relationships
<?php
namespace App\Service;
use Doctrine\DBAL\Connection;
use Doctrine\DBAL\Schema\AbstractSchemaManager;
use Doctrine\DBAL\Schema\ForeignKeyConstraint;
use Doctrine\DBAL\Schema\Table;
use Doctrine\ORM\EntityManagerInterface;
use Psr\Log\LoggerInterface;
class EntityExporter
{
private Connection $conn;
private AbstractSchemaManager $schemaManager;
private LoggerInterface $logger;
private array $cachedTables = [];
private array $cachedAlreadyIncludedIDs = [];
private int $cachedAlreadyIncludedRowsCount = 0; // We could count $cachedAlreadyIncludedIDs, but it's less efficient
private ?\Closure $currentExportCallback = null;
public function __construct(Connection $conn, LoggerInterface $logger)
{
$this->conn = $conn;
$this->schemaManager = $this->conn->getSchemaManager();
$this->logger = $logger;
}
/**
* Get a Doctrine ORM entity table name and ID.
* This method should remain static, as we don't want Doctrine ORM to be a requirement.
*
* Usage:
* $exporter->exportRowAndRelationships(...EntityExporter::getEntityTableAndId($myEntity))
*/
public static function getEntityTableAndId(EntityManagerInterface $em, object $entity): array
{
try {
$metadata = $em->getClassMetadata(get_class($entity));
} catch (\Exception $e) {
throw new \InvalidArgumentException('Unable to get the entity class metadata.');
}
return [$metadata->getTableName(), $metadata->getIdentifierValues($entity)];
}
/**
* Export all data related to a row identied by a table and a primary key.
* This method only uses Doctrine DBAL.
*
* @param array<string, mixed>|int|string $id
* @param null|\Closure $progressCallback A callback called after each exported row
*/
public function exportRowAndRelationships(string $tableName, $id, ?\Closure $progressCallback = null): array
{
// Flushing the cache, as this class is stateful. It would be better to use a Factory instead, but it's okay for now.
$this->flushPreviousCache();
$this->currentExportCallback = $progressCallback;
if (!$this->schemaManager->tablesExist($tableName)) {
throw new \InvalidArgumentException("The `{$tableName}` table does not exist");
}
$table = $this->schemaManager->listTableDetails($tableName);
if (!is_array($id)) {
// Checking if the table has a primary key with at least one column
if (null === ($primaryKey = $table->getPrimaryKey()) || empty($primaryKey->getColumns())) {
throw new \InvalidArgumentException('Unable to get the table primary key');
}
if (1 !== count($primaryKey->getColumns())) {
throw new \InvalidArgumentException('You must provide an array as ID for a table with multiple primary key columns');
}
// If the $id value is a string or an int, tranforming it into an array
$id = [$primaryKey->getColumns()[0] => $id];
}
set_time_limit(0);
return $this->exportTableRowAndRelationships($table, $id);
}
/**
* @param array<string, mixed> $id The row to export primary key, as an array in order to deal with one or many columns
*
* @return array<string, array<string, mixed>> The list of all rows to export, indexed per table
*/
private function exportTableRowAndRelationships(Table $table, array $id): array
{
$this->logger->debug('Exporting row', ['table' => $table->getName(), 'id' => $id, 'count' => $this->cachedAlreadyIncludedRowsCount]);
// Infinite loop protection
if (in_array($id, $this->cachedAlreadyIncludedIDs[$table->getName()] ?? [], true)) {
$this->logger->debug('Row already exported, ignoring', ['table' => $table->getName(), 'id' => $id]);
return [];
}
$this->cachedAlreadyIncludedIDs[$table->getName()][] = $id;
// Checking if the table has a primary key with at least one column
if (null === ($primaryKey = $table->getPrimaryKey()) || empty($primaryKey->getColumns())) {
throw new \RuntimeException(sprintf("Unable to get the '%s' table primary key", $table->getName()));
}
// Checking if the given ID contains all required primary key columns
if (array_keys($id) != $primaryKey->getUnquotedColumns()) { // Caution: using "==" and not "===" here, as we don't care about the order
$this->logger->error('The given ID does not contain all table primary key columns', ['table' => $table->getName(), 'id' => $id]);
return [];
}
// Avoiding a SELECT if the table only contains the primary key columns, e.g. for a n-n table.
if (count($table->getColumns()) === count($primaryKey->getColumns())) {
$this->logger->debug('The table only contains primary key columns, skiping select', ['table' => $table->getName()]);
$rowData = $id;
} else {
// Get the current row
try {
$rowData = $this->conn->fetchAssociative(...$this->getFetchWhereArguments('*', $table, $id));
} catch (\Exception $e) {
$this->logger->error('Exception when selecting the row', ['table' => $table->getName(), 'id' => $id, 'exception' => $e]);
return [];
}
}
if (empty($rowData)) {
$this->logger->error('Empty row', ['table' => $table->getName(), 'id' => $id]);
return [];
}
// Building an array containing all exported rows
/** @var array<int, array<string, array<string, mixed>>> $rowsArrays an array of data indexed per table */
$rowsArrays = [
[$table->getName() => [$rowData]],
];
$this->callRowCallback();
// Get all related rows
$foreignKeys = $this->listReferencingForeignKeys($table);
foreach ($foreignKeys as $foreignKey) {
$localTable = $foreignKey->getLocalTable();
if (null === $localPrimaryKey = $localTable->getPrimaryKey()) {
$this->logger->error('The linked table does not have any primary key', ['table' => $localTable->getName()]);
continue;
}
// Building the "WHERE ? = ? AND ? = ?" part
$localColumns = $foreignKey->getLocalColumns();
$foreignColumns = $foreignKey->getForeignColumns();
$foreignWhere = [];
foreach ($localColumns as $i => $localColumn) {
$foreignWhere[$localColumn] = $rowData[$foreignColumns[$i]];
}
$selectedCols = implode(', ', $localPrimaryKey->getQuotedColumns($this->conn->getDatabasePlatform()));
$linkedRowsIds = $this->conn->fetchAllAssociative(...$this->getFetchWhereArguments($selectedCols, $localTable, $foreignWhere));
foreach ($linkedRowsIds as $linkedRowId) {
$rowsArrays[] = $this->exportTableRowAndRelationships($localTable, $linkedRowId);
}
}
return array_merge_recursive(...$rowsArrays);
}
/**
* Returns an array containing 3 values: the WHERE sql, the params and the params types.
*/
private function getFetchWhereArguments(string $select, Table $table, array $where): array
{
if (empty($where)) {
throw new \InvalidArgumentException('The "where" argument cannot be empty');
}
$selectSql = sprintf('SELECT %s FROM %s WHERE 1 = 1', $select, $table->getQuotedName($this->conn->getDatabasePlatform()));
$selectParams = $selectTypes = [];
foreach ($where as $columnName => $value) {
$column = $table->getColumn($columnName);
$selectSql .= sprintf(' AND %s = ?', $column->getQuotedName($this->conn->getDatabasePlatform()));
$selectParams[] = $value;
$selectTypes[] = $column->getType();
}
return [$selectSql, $selectParams, $selectTypes];
}
/**
* @return ForeignKeyConstraint[] returns a list of all tables having a foreign key targetting the source one
*/
private function listReferencingForeignKeys(Table $sourceTable): array
{
if (empty($this->cachedTables)) {
$this->cachedTables = $this->schemaManager->listTables();
}
$matchingForeignKeys = [];
foreach ($this->cachedTables as $table) {
$foreignKeys = $table->getForeignKeys();
foreach ($foreignKeys as $foreignKey) {
if ($foreignKey->getForeignTableName() !== $sourceTable->getName()) {
continue;
}
// We don't want to deal with custom foreign keys for now
if ($foreignKey->getForeignColumns() !== $sourceTable->getPrimaryKeyColumns()) {
throw new \LogicException(sprintf('The [%s] foreign key columns must match the foreign table primary keys', $foreignKey->getName()));
}
$matchingForeignKeys[] = $foreignKey;
}
}
return $matchingForeignKeys;
}
private function callRowCallback(): void
{
++$this->cachedAlreadyIncludedRowsCount;
if (null !== $this->currentExportCallback) {
($this->currentExportCallback)($this->cachedAlreadyIncludedRowsCount);
}
}
private function flushPreviousCache(): void
{
$this->cachedTables = [];
$this->cachedAlreadyIncludedIDs = [];
$this->cachedAlreadyIncludedRowsCount = 0;
$this->currentExportCallback = null;
}
}
<?php
namespace App\Command;
use App\Service\EntityExporter;
use Doctrine\ORM\EntityManagerInterface;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Helper\ProgressBar;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Component\Filesystem\Filesystem;
use Symfony\Component\Yaml\Yaml;
/**
* This command dumps all the SQL data related to a main row (e.g. a User or a Parish) in a YAML file.
*/
class ExportEntityCommand extends Command
{
protected static $defaultName = 'app:export-entity';
protected static $defaultDescription = 'Export an entity and all its relationships';
private EntityManagerInterface $em;
private EntityExporter $exporter;
private string $cacheDir;
public function __construct(EntityManagerInterface $em, EntityExporter $exporter, string $cacheDir)
{
$this->em = $em;
$this->exporter = $exporter;
$this->cacheDir = $cacheDir;
parent::__construct();
}
protected function configure(): void
{
$this
->addArgument('entityOrTable', InputArgument::REQUIRED, 'The entity FQDN or its table name, example: "App/Entity/Entite", or "entite"')
->addArgument('id', InputArgument::REQUIRED, 'The entity primary key, example: "42"')
;
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
$entityOrTable = $input->getArgument('entityOrTable');
$scalarId = $input->getArgument('id');
if (empty($entityOrTable) || empty($scalarId)) {
throw new \InvalidArgumentException('You must provide an entity or a table, and an ID');
}
if (class_exists($entityOrTable)) {
$io->comment('Getting the entity');
$entity = $this->em->getRepository($entityOrTable)->find($scalarId);
if (null === $entity) {
throw new \InvalidArgumentException('Unable to find the entity');
}
[$tableName, $id] = EntityExporter::getEntityTableAndId($this->em, $entity);
$io->comment("Entity table found: {$tableName}");
} else {
$tableName = $entityOrTable;
$id = $scalarId;
}
$io->comment('Exporting all the SQL data. Caution: you\'ll need 500 MB of free memory per 100.000 exported rows.');
$progressBar = new ProgressBar($io, 0, 1);
$progressBar->setFormat('[%bar%] Rows: %current% - Time: %elapsed:6s% - Memory: %memory:6s%');
$progressBar->start();
$progressCallback = function ($rowsCount) use ($progressBar): void {
$progressBar->setProgress($rowsCount);
};
$data = $this->exporter->exportRowAndRelationships($tableName, $id, $progressCallback);
$progressBar->finish();
$exportDir = implode(DIRECTORY_SEPARATOR, [$this->cacheDir, 'export', date('Y-m-d_H-i-s')."_{$tableName}_{$scalarId}"]);
$io->comment("Creating the export directory: {$exportDir}");
$fs = new Filesystem();
if ($fs->exists($exportDir)) {
throw new \RuntimeException("The {$exportDir} directory already exists, please try again after 1 second");
}
$fs->mkdir($exportDir);
$io->comment('Dumping data into the export directory');
$fs->dumpFile($exportDir.DIRECTORY_SEPARATOR.'data.yaml', Yaml::dump($data));
$io->success("Export finished with success into {$exportDir}");
return Command::SUCCESS;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment