Skip to content

Instantly share code, notes, and snippets.

@dhaupin
Last active October 3, 2018 04:47
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dhaupin/abc6a4f937d4f64cc409dbe81086a49e to your computer and use it in GitHub Desktop.
Save dhaupin/abc6a4f937d4f64cc409dbe81086a49e to your computer and use it in GitHub Desktop.
Opencart - Crawls over database looking for images/pdfs used and outputs them as a simple text list. CP/Rclone/PHP can use this list to truncate out unused files.
<?php
# Idea from: https://github.com/zenseo/opencart-needless-image
# PHP is not good enough to run this through a large store with many images
# Bash cp cant hold enough arguments from file output unless its batched
# Rclone is the best bet:
# rsync -asv --dry-run --recursive --remove-source-files --checksum --files-from=some-file.txt . destination/
function findImages() {
$tables_to_check = array(
'banner_image' => 'image',
'category' => 'image',
'download' => 'filename',
'manufacturer' => 'image',
'option_value' => 'image',
'order_download' => 'filename',
'product' => 'image',
'product_image' => 'image',
'setting' => 'value',
'voucher_theme' => 'image',
);
$tables_desc_to_check = array(
'bf_attribute_value' => 'value',
'category_description' => 'description',
//'category_description' => 'description_two',
'information_description' => 'description',
'product_attribute' => 'text',
'product_description' => 'description',
'product_tab' => 'text'
);
$escape_spaces = false;
$img_matches = array();
$img_regex = "/data\/.*?\.(jp(e)?g|png|gif|bmp)/i";
foreach ($tables_desc_to_check as $name => $column) {
$_parts_sql = 'SELECT `' . $column . '` image FROM `' . DB_PREFIX . $name . '` WHERE `' . $column . "` LIKE '%data/%'";
$_parts = $this->db->query($_parts_sql);
foreach ($_parts->rows as $_part_row) {
preg_match_all($img_regex, $_part_row['image'], $img_matches);
//$img_matches = array_unique($img_matches[0]);
if (isset($img_matches[0])) {
foreach ($img_matches[0] as $img_path) {
$img_path = str_replace('%20', '', $img_path);
if ($escape_spaces) {
$img_path = str_replace(' ', '\\ ', $img_path);
}
$parts[] = $img_path;
}
unset($img_matches);
}
}
}
foreach ($tables_to_check as $name => $column) {
$_parts_sql = 'SELECT `' . $column . '` image FROM `' . DB_PREFIX . $name . '` WHERE `' . $column . "` LIKE 'data/%'";
$_parts = $this->db->query($_parts_sql);
foreach ($_parts->rows as $_part_row) {
$img_path = str_replace('%20', '', $_part_row['image']);
if ($escape_spaces) {
$img_path = str_replace(' ', '\\ ', $_part_row['image']);
}
$parts[] = $img_path;
}
}
$parts = array_values(array_unique($parts));
foreach ($parts as $echo) {
echo $echo . PHP_EOL;
}
exit();
}
public function findPdfs() {
$tables_desc_to_check = array(
'category_description' => 'description',
//'category_description' => 'description_two',
'information_description' => 'description',
'product_attribute' => 'text',
'product_description' => 'description',
'product_tab' => 'text'
);
$escape_spaces = false;
$pdf_matches = array();
$pdf_regex = "/PDFFILES\/.*?\.(jp(e)?g|png|gif|bmp|pdf)/i";
foreach ($tables_desc_to_check as $name => $column) {
$_parts_sql = 'SELECT `' . $column . '` pdf FROM `' . DB_PREFIX . $name . '` WHERE `' . $column . "` LIKE '%.pdf%'";
$_parts = $this->db->query($_parts_sql);
foreach ($_parts->rows as $_part_row) {
preg_match_all($pdf_regex, $_part_row['pdf'], $pdf_matches);
//$pdf_matches = array_unique($pdf_matches[0]);
if (isset($pdf_matches[0])) {
foreach ($pdf_matches[0] as $pdf_path) {
$pdf_path = str_replace('%20', '', $pdf_path);
if ($escape_spaces) {
$pdf_path = str_replace(' ', '\\ ', $pdf_path);
}
$parts[] = $pdf_path;
}
unset($pdf_matches);
}
}
}
$parts = array_values(array_unique($parts));
foreach ($parts as $echo) {
echo $echo . PHP_EOL;
}
exit();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment