Skip to content

Instantly share code, notes, and snippets.

@tmeissner
Last active August 29, 2015 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tmeissner/e6b47776d562b22ab629 to your computer and use it in GitHub Desktop.
Save tmeissner/e6b47776d562b22ab629 to your computer and use it in GitHub Desktop.
Extract menu data from html file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import xml.dom.minidom as DOM
class SpeisePlan:
'''Class for parsing and extracting the meal data for each day of a week
lying in a given HTML file'''
def __init__(self, fileName):
'''Initialise the wochenplan{} dictionary and call the parse() method
to load the html file given by the fileName parameter.'''
self.days = ("Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag")
self.wochenplan = dict()
for index in range(len(self.days)):
self.wochenplan[self.days[index]] = []
self.parse(fileName)
def parse(self, fileName):
'''Load (and parse) the HTML file given by fileName parameter using the
parse() method of the xml.dom.minidom package'''
self.domTree = DOM.parse(fileName)
def findDayNodes(self):
'''Find DOM nodes for the days of the week and push them into the
dayNodes[] array'''
self.dayNodes = []
divNodes = self.domTree.getElementsByTagName('div')
index = 0
for element in divNodes:
if(element.getAttribute('class') == 'zeile_tage'):
if (3 <= index <= 6):
self.dayNodes.append(element)
index = index + 1
def genPlan(self):
'''Generate meal data plan for the whole week by going through the
DOM nodes in the dayNodes[] array and extracting the data of all divs
with the suitable 'spalte_tag' class attribute'''
self.findDayNodes()
for element in self.dayNodes:
children = element.childNodes
index = 0
for child in children:
if(child.nodeName == 'div'):
if(child.getAttribute('class') == 'spalte_tag'):
content = child.firstChild.data.replace('"', '')
self.wochenplan[self.days[index]].append(content)
index = index + 1
def printPlan(self):
'''Print out the wochenplan dictionary'''
print(self.wochenplan)
def main():
plan = SpeisePlan('speiseplan.html')
plan.genPlan()
plan.printPlan()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment