Skip to content

Instantly share code, notes, and snippets.

@twerp
Created December 11, 2017 18:14
Show Gist options
  • Save twerp/95f087e2e11ca9badbc96160004e680e to your computer and use it in GitHub Desktop.
Save twerp/95f087e2e11ca9badbc96160004e680e to your computer and use it in GitHub Desktop.
WIP tool to "analyze" dokuwiki contents (currently prints out folders, pages and page revisions)
import os
import datetime
TOPFOLDER = r'..\wiki\data\attic'
class Folder():
_page_blacklist = ['sidebar']
blacklist = ['tag', 'testi', 'wiki']
def __init__(self, name, parent=None):
self.name = name if name else '<root>'
self.parent = parent
self.subfolders = []
self._pages = []
def __str__(self):
return self.name
def __repr__(self):
return self.name
def getpage(self, pagename):
page = None
for p in self._pages:
if p.name == pagename:
page = p
break
return page
def addpage(self, page):
if page not in self._pages and page.name not in Folder._page_blacklist:
self._pages.append(page)
def printpages(self):
print(self._pages)
def getpages(self):
for p in self._pages:
yield p
def printfiles(self, pagename):
page = self.getpage(pagename)
print(page.files)
def getfolder(self, foldername):
folder = self
for f in self.subfolders:
if f.name == foldername:
folder = f
break
return folder
def printall(self):
if self.parent:
print("".join([self.parent.name, ':', self.name]))
else:
print(self)
self.printpages()
for f in self.subfolders:
f.printall()
def findfolder(self, names):
# names = ['web-ohjelmointi', 'javascript']
for f in self.subfolders:
if f.name == names[0]:
names = names[1:]
if names:
return f.findfolder(names)
else:
return f
class Page():
def __init__(self, name, folder=None):
self.name = name
self.folder = folder
self.files = []
def __str__(self):
return "".join([self.name, ' (', str(len(self.files)), ')'])
def __repr__(self):
return "".join([self.name, ' (', str(len(self.files)), ')'])
class File():
def __init__(self, name, date=None, page=None):
self.name = name
self.date = date
self.page = page
def __str__(self):
return self.name
def __repr__(self):
return self.name
if __name__ == '__main__':
first = None
rootfolder = None
for root, dirs, files in os.walk(TOPFOLDER):
foldername = root[len(TOPFOLDER)+1:]
if foldername in Folder.blacklist:
continue
if first:
rootfolder = first.findfolder(foldername.split('\\'))
else:
rootfolder = Folder(foldername)
first = rootfolder
for d in dirs:
if d in Folder.blacklist:
continue
folder = Folder(d, parent=rootfolder)
rootfolder.subfolders.append(folder)
for f in files:
if not f.endswith('.txt.gz'):
continue
pagename = f[:f.index('.')]
page = rootfolder.getpage(pagename)
if page:
file = File(f, page=page)
page.files.append(file)
else:
folder = rootfolder.getfolder(pagename)
page = Page(pagename, folder=folder)
rootfolder.addpage(page)
first.printall()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment