Skip to content

Instantly share code, notes, and snippets.

@utgwkk
Created August 24, 2015 16:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save utgwkk/125c9f31f78fcfa986b1 to your computer and use it in GitHub Desktop.
Save utgwkk/125c9f31f78fcfa986b1 to your computer and use it in GitHub Desktop.
Convert <TABLE> to TSV.
# coding: utf-8
from html.parser import HTMLParser
import sys
text = sys.stdin.read()
output = ''
class TableToTSV(HTMLParser):
def initialize(self):
self.cols = []
self.rows = []
self.nowTd = False
def handle_starttag(self, tag, attrs):
if tag == 'td':
self.nowTd = True
def handle_endtag(self, tag):
if tag == 'table':
global output
for cols in self.rows:
output += '\t'.join(cols) + '\n'
elif tag == 'tr':
self.rows.append(self.cols)
self.cols = []
elif tag == 'td':
self.nowTd = False
def handle_data(self, data):
if self.nowTd:
self.cols.append(data)
parser = TableToTSV()
parser.initialize()
parser.feed(text)
print(output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment