Skip to content

Instantly share code, notes, and snippets.

@wolfmanstout
wolfmanstout / extract_sentences.py
Created Jul 30, 2017
Extracts all sentences from posts and pages in exported WordPress XML.
View extract_sentences.py
#!/usr/bin/env python
import sys
from bs4 import BeautifulSoup
from bs4 import SoupStrainer
from nltk import tokenize
from xml.etree import ElementTree
NAMESPACES = {
'content': 'http://purl.org/rss/1.0/modules/content/',
@wolfmanstout
wolfmanstout / test_winrt_ocr.py
Created Jul 29, 2020
Demonstrates using Python winrt to run OCR. Requires Python 3.7+.
View test_winrt_ocr.py
import asyncio
import os
import winrt
from PIL import Image
from winrt.windows.graphics.imaging import BitmapDecoder, BitmapPixelFormat, SoftwareBitmap
from winrt.windows.media.ocr import OcrEngine
from winrt.windows.storage import StorageFile, FileAccessMode
import winrt.windows.storage.streams as streams