Created
November 24, 2015 21:43
-
-
Save brandonsturgeon/766fda0f62bbef12838c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import logging | |
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') | |
class Main(): | |
def __init__(self): | |
self.files = {} | |
self.get_files() | |
unreferenced = self.find_unreferenced() | |
print "-- Unreferenced Files --" | |
for file_id in unreferenced: | |
print self.files[file_id] | |
def get_files(self): | |
logging.debug("Getting list of all uploaded files") | |
path = "lib/csfiles/home_dir" | |
files = [] | |
for name in os.listdir(path): | |
if os.path.isfile(os.path.join(path, name)): | |
files.append(name) | |
for f in files: | |
# If it's not an xml file | |
if f[-4:] != ".xml": | |
# Fancy string splitting | |
name, file_id = f.split("__") | |
xid = file_id.split(".")[0] | |
file_id = xid.split("-")[1] | |
if name in self.files.values(): | |
print "{} is already in values!" | |
full_path = os.path.join(path, f) | |
self.files[file_id] = full_path | |
logging.debug("Done creating dictionary of uploaded files") | |
def get_dat_files(self): | |
for dat in os.listdir("lib/"): | |
if os.path.isfile(os.path.join("lib/", dat)): | |
if dat[-4:] == ".dat": | |
yield dat | |
def find_unreferenced(self): | |
""" Open each dat file, look for each uploaded file id, | |
doing it this way is far more efficient than opening | |
all .dat files for each uploaded file. It's more efficient, | |
but less clear. """ | |
referenced = set() | |
uploaded_files = set(self.files.keys()) | |
# Loop through all .dat files in lib/ | |
for dat in self.get_dat_files(): | |
# Open them | |
path = os.path.join("lib", dat) | |
# Loop through uploaded file ids | |
for uploaded in uploaded_files: | |
# If the uploaded file id is in the .dat file, then it's referenced | |
# NOTE: with open() didn't work here for some reason | |
opened = open(path, "r") | |
is_referenced = uploaded in opened.read() | |
if is_referenced: | |
referenced.add(uploaded) | |
opened.close() | |
return uploaded_files - referenced | |
if __name__ == "__main__": | |
Main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment