Skip to content

Instantly share code, notes, and snippets.

@MehdiFal

MehdiFal/test.py Secret

Created October 31, 2019 00:09
Show Gist options
  • Save MehdiFal/7c8e8c555ead68f6b244a5552069e1f0 to your computer and use it in GitHub Desktop.
Save MehdiFal/7c8e8c555ead68f6b244a5552069e1f0 to your computer and use it in GitHub Desktop.
import csv
from itertools import zip_longest
def grouper(iterable, n, fillvalue=None):
args = [iter(iterable)] * n
return zip_longest(*args, fillvalue=fillvalue)
def remove_title(s, title):
"""if s starts with the title, erase it; otherwise, do nothing"""
if s is None:
return ""
s = s.upper()
if s.startswith(title):
s = s[len(title):]
return s
def sanitize(s):
"""
Remove newlines from the string and replace them with something else.
Useful if you want each row in your csv file to take up exactly one line.
If you don't care about that, then you don't need to call this later.
"""
return s.replace("\n", "\\n")
with open("contentfile.txt", encoding="utf-8") as file:
paragraphs = [paragraph.strip() for paragraph in file.read().split("\n\n")]
with open("output.csv", "w", newline='') as csvfile:
fieldnames = ["title", "desc", "shortdesc", "tech", "spec"]
writer = csv.DictWriter(csvfile, fieldnames)
writer.writeheader()
for description_paragraph, tech_paragraph, spec_paragraph in grouper(paragraphs, 3):
name, _, description_and_link = description_paragraph.partition("\n")
short_description, _, link = description_and_link.rpartition("\n")
tech = remove_title(tech_paragraph, "TECHNOLOGY")
specs = remove_title(spec_paragraph, "SPECIFICATIONS")
writer.writerow({
"title": name,
"desc": link,
"shortdesc": sanitize(short_description),
"tech": sanitize(tech),
"spec": sanitize(specs)
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment