Skip to content

Instantly share code, notes, and snippets.

@t-book
Created July 12, 2024 09:08
Show Gist options
  • Save t-book/85239e8edb962777d9c829ec6b4e3256 to your computer and use it in GitHub Desktop.
Save t-book/85239e8edb962777d9c829ec6b4e3256 to your computer and use it in GitHub Desktop.
import os
import argparse
import xml.etree.ElementTree as ET
def get_published_layers(workspace_dir):
published_layers = set()
for root, dirs, files in os.walk(workspace_dir):
for file in files:
if file.endswith('.xml'):
xml_path = os.path.join(root, file)
try:
tree = ET.parse(xml_path)
root_elem = tree.getroot()
for layer in root_elem.findall(".//layer/name"):
published_layers.add(layer.text)
for coverage in root_elem.findall(".//coverage/name"):
published_layers.add(coverage.text)
except Exception as e:
print(f"Error parsing {xml_path}: {e}")
return published_layers
def scan_data_directory(data_dir, published_layers, delete=False):
obsolete_files = []
total_size = 0
raster_extensions = ('.tif', '.tiff', '.img', '.ecw', '.jp2', '.nc', '.hdf', '.geotiff', '.geotif')
vector_extensions = ('.shp', '.geojson')
for root, dirs, files in os.walk(data_dir):
for file in files:
full_path = os.path.join(root, file)
if file.lower().endswith(raster_extensions):
layer_name = os.path.splitext(file)[0]
if layer_name not in published_layers:
obsolete_files.append(full_path)
total_size += os.path.getsize(full_path)
if delete:
try:
os.remove(full_path)
print(f"Deleted: {full_path}")
except Exception as e:
print(f"Error deleting {full_path}: {e}")
elif file.lower().endswith(vector_extensions):
file_name, file_ext = os.path.splitext(file)
layer_name = f"{os.path.basename(root)}:{file_name}"
if layer_name not in published_layers:
for ext in ['.shp', '.shx', '.dbf', '.prj', '.cpg']:
file_to_remove = os.path.join(root, file_name + ext)
if os.path.exists(file_to_remove):
obsolete_files.append(file_to_remove)
total_size += os.path.getsize(file_to_remove)
if delete:
try:
os.remove(file_to_remove)
print(f"Deleted: {file_to_remove}")
except Exception as e:
print(f"Error deleting {file_to_remove}: {e}")
return obsolete_files, total_size
def main():
parser = argparse.ArgumentParser(description="Scan GeoServer data directory for obsolete files")
parser.add_argument("data_dir", help="Path to the GeoServer data directory")
parser.add_argument("workspace_dir", help="Path to the GeoServer workspace directory containing XML configuration files")
parser.add_argument("--delete", action="store_true", help="Delete obsolete files")
args = parser.parse_args()
try:
published_layers = get_published_layers(args.workspace_dir)
except Exception as e:
print(f"Error: {e}")
return
obsolete_files, total_size = scan_data_directory(args.data_dir, published_layers, args.delete)
if args.delete:
print("Obsolete files deleted:")
else:
print("Obsolete files found (dry run):")
for file in obsolete_files:
print(file)
print(f"\nTotal obsolete files: {len(obsolete_files)}")
print(f"Total space used by obsolete files: {total_size / (1024 * 1024):.2f} MB")
if __name__ == "__main__":
main()
@t-book
Copy link
Author

t-book commented Jul 12, 2024

Run with

python script.py /path/to/data/directory /path/to/workspace/directory --delete

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment