Skip to content

Instantly share code, notes, and snippets.

View ns-mkusper's full-sized avatar
👋

Mark Kusper ns-mkusper

👋
  • Chicago
View GitHub Profile
@ns-mkusper
ns-mkusper / betterocr-pdf-to-txt.py
Created September 12, 2025 11:21
Convert PDF to TXT via OCR with BetterOCR
#!/usr/bin/env python3
import os
import sys
from pathlib import Path
import betterocr
from pdf2image import convert_from_path
def run_ocr_on_pdf(pdf_path: str, langs=("en",), context=""):
@ns-mkusper
ns-mkusper / ocr-openai.py
Created June 6, 2025 12:35
pdf ocr with openai
import os
import sys
import base64
import openai
from pdf2image import convert_from_path
import tempfile
from PIL import Image
def update_progress(progress):
"""
#!/usr/bin/env python
import boto3
# Configure the source and target DynamoDB clients
source_session = boto3.Session(
profile_name="SOURCE_PROFILE",
region_name="us-east-1",
)
source_client = source_session.client("dynamodb")
target_session = boto3.Session(
@ns-mkusper
ns-mkusper / make-vs2017-env.bat
Created June 23, 2024 00:31 — forked from vvuk/make-vs2017-env.bat
Generate "vcvarsall" equivalent for msys2 bash from vcvarsall
@ECHO OFF
set OLDPATH=%PATH%
call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\VC\Auxiliary\Build\vcvarsall.bat" x64 > NUL:
echo export INCLUDE='%INCLUDE%'
echo export LIB='%LIB%'
echo export LIBPATH='%LIBPATH%'
@ns-mkusper
ns-mkusper / gist:9e65e781033c5a50d2549e13296f0372
Created June 22, 2024 23:08 — forked from RangelReale/gist:3e6392289d8ba1a52b6e70cdd7e10282
How to compile ffmpeg + x264 using Visual Studio 2015
##### How to compile ffmpeg + x264 using Visual Studio 2015 #####
##### Building this way will make the DLLs compatible with SEH, so there will be no need to use /SAFESEH:NO when compiling your code #####
##### SOURCES:
### https://pracucci.com/compile-ffmpeg-on-windows-with-visual-studio-compiler.html
### https://gist.github.com/sailfish009/8d6761474f87c074703e187a2bc90bbc
### http://roxlu.com/2016/057/compiling-x264-on-windows-with-msvc
* Download "MSYS2 x86_64" from "http://msys2.github.io" and install into "C:\workspace\windows\msys64"
@ns-mkusper
ns-mkusper / .lsp-docker
Created December 13, 2023 23:45 — forked from yosisa/.lsp-docker
Enable lsp-docker per project basis.
docker/image:name
--
-- PostgreSQL database dump
--
-- Dumped from database version 15.2
-- Dumped by pg_dump version 15.3 (Homebrew)
SET statement_timeout = 0;
SET lock_timeout = 0;
SET idle_in_transaction_session_timeout = 0;
@ns-mkusper
ns-mkusper / kafka_python_sasl_scram.py
Created January 19, 2023 17:19 — forked from alexlopes/kafka_python_sasl_scram.py
Kafka Python with SASL/SCRAM Authentication Example
import os
from kafka import KafkaProducer, KafkaConsumer
BOOTSTRAP_SERVERS=os.gentenv("KAFKA_BOOTSTRAP_SERVERS").split(",")
TOPIC_NAME="the-topic"
SASL_USERNAME=os.gentenv("KAFKA_SASL_USERNAME")
SASL_PASSWORD=os.gentenv("KAFKA_SASL_PASSWORD")
def consume():
consumer = KafkaConsumer(TOPIC_NAME, security_protocol="SASL_SSL", sasl_mechanism="SCRAM-SHA-512", sasl_plain_username=SASL_USERNAME, sasl_plain_password=SASL_PASSWORD, bootstrap_servers=BOOTSTRAP_SERVERS)
@ns-mkusper
ns-mkusper / txt2mp3
Last active November 25, 2022 19:47
#!/bin/bash
# requires flite, sox and ffmpeg
INPUT_TXT=$1
OUTPUT_WAV=${INPUT_TXT/txt/wav}
OUTPUT_MP3=${INPUT_TXT/txt/mp3}
OUTPUT_WAV_FAST=${INPUT_TXT/.txt/_fast.wav}
OUTPUT_MP3_FAST=${INPUT_TXT/.txt/_fast.mp3}
flite -f "${INPUT_TXT}" -o "$OUTPUT_WAV" 2>&1 > /dev/null
@ns-mkusper
ns-mkusper / remove_older_hdfs_files.sh
Last active June 6, 2022 21:24
script for removing older files in an hdfs directory
#!/bin/bash
usage="Usage: ./remove_older_hdfs_files.sh [path] [days]"
# use if working with incredibly large directories
# export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Xmx5g"
if [ ! "$1" ]
then
echo $usage;
exit 1;