Skip to content

Instantly share code, notes, and snippets.

@brandonsturgeon
Created November 24, 2015 21:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brandonsturgeon/766fda0f62bbef12838c to your computer and use it in GitHub Desktop.
Save brandonsturgeon/766fda0f62bbef12838c to your computer and use it in GitHub Desktop.
import os
import logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
class Main():
def __init__(self):
self.files = {}
self.get_files()
unreferenced = self.find_unreferenced()
print "-- Unreferenced Files --"
for file_id in unreferenced:
print self.files[file_id]
def get_files(self):
logging.debug("Getting list of all uploaded files")
path = "lib/csfiles/home_dir"
files = []
for name in os.listdir(path):
if os.path.isfile(os.path.join(path, name)):
files.append(name)
for f in files:
# If it's not an xml file
if f[-4:] != ".xml":
# Fancy string splitting
name, file_id = f.split("__")
xid = file_id.split(".")[0]
file_id = xid.split("-")[1]
if name in self.files.values():
print "{} is already in values!"
full_path = os.path.join(path, f)
self.files[file_id] = full_path
logging.debug("Done creating dictionary of uploaded files")
def get_dat_files(self):
for dat in os.listdir("lib/"):
if os.path.isfile(os.path.join("lib/", dat)):
if dat[-4:] == ".dat":
yield dat
def find_unreferenced(self):
""" Open each dat file, look for each uploaded file id,
doing it this way is far more efficient than opening
all .dat files for each uploaded file. It's more efficient,
but less clear. """
referenced = set()
uploaded_files = set(self.files.keys())
# Loop through all .dat files in lib/
for dat in self.get_dat_files():
# Open them
path = os.path.join("lib", dat)
# Loop through uploaded file ids
for uploaded in uploaded_files:
# If the uploaded file id is in the .dat file, then it's referenced
# NOTE: with open() didn't work here for some reason
opened = open(path, "r")
is_referenced = uploaded in opened.read()
if is_referenced:
referenced.add(uploaded)
opened.close()
return uploaded_files - referenced
if __name__ == "__main__":
Main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment