Last active
May 24, 2019 17:32
-
-
Save nt92/3886577b104b5c41e5edbf9de86cf421 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # pattern matcher my date formats | |
| def date_match(strg, search=re.compile(r'[^0-9./\- ]').search): | |
| return strg != '' and strg != ' ' and not bool(search(strg)) | |
| # parse folder of .docx files | |
| for file in os.listdir('./entry_files'): | |
| document = docx.Document('./entry_files/'+file) | |
| current_date = document.paragraphs[0].text | |
| current_entry = '' | |
| for paragraph in document.paragraphs[1:]: | |
| if date_match(paragraph.text): | |
| cursor.execute("INSERT INTO entries VALUES (?, ?)", (current_date, current_entry.decode('utf-8'))) | |
| current_date = paragraph.text | |
| current_entry = '' | |
| else: | |
| current_entry += '\n' + paragraph.text.encode('utf-8') | |
| # insert final entry | |
| cursor.execute("INSERT INTO entries VALUES (?, ?)", (current_date, current_entry.decode('utf-8'))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment