Extract menu data from html file
# -*- coding: utf-8 -*-
import xml.dom.minidom as DOM
class SpeisePlan:
'''Class for parsing and extracting the meal data for each day of a week
lying in a given HTML file'''
def __init__(self, fileName):
'''Initialise the wochenplan{} dictionary and call the parse() method
to load the html file given by the fileName parameter.'''
self.days = ("Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag")
self.wochenplan = dict()
for index in range(len(self.days)):
self.wochenplan[self.days[index]] = []
def parse(self, fileName):
'''Load (and parse) the HTML file given by fileName parameter using the
parse() method of the xml.dom.minidom package'''
self.domTree = DOM.parse(fileName)
def findDayNodes(self):
'''Find DOM nodes for the days of the week and push them into the
dayNodes[] array'''
self.dayNodes = []
divNodes = self.domTree.getElementsByTagName('div')
index = 0
for element in divNodes:
if(element.getAttribute('class') == 'zeile_tage'):
if (3 <= index <= 6):
index = index + 1
def genPlan(self):
'''Generate meal data plan for the whole week by going through the
DOM nodes in the dayNodes[] array and extracting the data of all divs
with the suitable 'spalte_tag' class attribute'''
for element in self.dayNodes:
children = element.childNodes
index = 0
for child in children:
if(child.nodeName == 'div'):
if(child.getAttribute('class') == 'spalte_tag'):
content ='"', '')
index = index + 1
def printPlan(self):
'''Print out the wochenplan dictionary'''
def main():
plan = SpeisePlan('speiseplan.html')
if __name__ == "__main__":
