Skip to content

Instantly share code, notes, and snippets.

@JenkinsDev
Last active August 29, 2015 14:15
Show Gist options
  • Save JenkinsDev/24f650074d3ab86d243f to your computer and use it in GitHub Desktop.
Save JenkinsDev/24f650074d3ab86d243f to your computer and use it in GitHub Desktop.
HTML table to CSV
import sys
def generate_csv_data_from_html_table(html):
csv_header = ""
csv_rows = []
rows = create_rows_from_trs(remove_tab_and_newline(html))
for row in rows:
if not row.find("<th>") == -1:
csv_header = row.replace("<th>", "").replace("</th>", ",")
elif not row.find("<td>") == -1:
csv_rows.append(row.replace("<td>", "").replace("</td>", ","))
return (csv_header, csv_rows)
def remove_tab_and_newline(string):
return string.replace("\n", "").replace("\t", "")
def create_rows_from_trs(string):
return string.replace("<table", "").replace("</table>", "") \
.replace("</tr>", "").split("<tr>")
if __name__ == '__main__':
file = sys.argv[1]
out_file = sys.argv[2]
with open(file) as f:
(header, rows) = generate_csv_data_from_html_table(f.read())
with open(out_file, 'a') as out_f:
# We want to make sure the file is empty, so beware!
out_f.truncate()
out_f.write("{0}\n".format(header))
for row in rows:
out_f.write("{0}\n".format(row))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment