Skip to content

Instantly share code, notes, and snippets.

wolfmanstout /
Created July 30, 2017 22:46
Extracts all sentences from posts and pages in exported WordPress XML.
#!/usr/bin/env python
import sys
from bs4 import BeautifulSoup
from bs4 import SoupStrainer
from nltk import tokenize
from xml.etree import ElementTree
'content': '',
wolfmanstout /
Created July 29, 2020 06:40
Demonstrates using Python winrt to run OCR. Requires Python 3.7+.
import asyncio
import os
import winrt
from PIL import Image
from import BitmapDecoder, BitmapPixelFormat, SoftwareBitmap
from import OcrEngine
from import StorageFile, FileAccessMode
import as streams