Skip to content

Instantly share code, notes, and snippets.

@nt92
Last active May 24, 2019 17:32
Show Gist options
  • Select an option

  • Save nt92/3886577b104b5c41e5edbf9de86cf421 to your computer and use it in GitHub Desktop.

Select an option

Save nt92/3886577b104b5c41e5edbf9de86cf421 to your computer and use it in GitHub Desktop.
# pattern matcher my date formats
def date_match(strg, search=re.compile(r'[^0-9./\- ]').search):
return strg != '' and strg != ' ' and not bool(search(strg))
# parse folder of .docx files
for file in os.listdir('./entry_files'):
document = docx.Document('./entry_files/'+file)
current_date = document.paragraphs[0].text
current_entry = ''
for paragraph in document.paragraphs[1:]:
if date_match(paragraph.text):
cursor.execute("INSERT INTO entries VALUES (?, ?)", (current_date, current_entry.decode('utf-8')))
current_date = paragraph.text
current_entry = ''
else:
current_entry += '\n' + paragraph.text.encode('utf-8')
# insert final entry
cursor.execute("INSERT INTO entries VALUES (?, ?)", (current_date, current_entry.decode('utf-8')))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment