This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import sys | |
from pathlib import Path | |
import betterocr | |
from pdf2image import convert_from_path | |
def run_ocr_on_pdf(pdf_path: str, langs=("en",), context=""): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import base64 | |
import openai | |
from pdf2image import convert_from_path | |
import tempfile | |
from PIL import Image | |
def update_progress(progress): | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import boto3 | |
# Configure the source and target DynamoDB clients | |
source_session = boto3.Session( | |
profile_name="SOURCE_PROFILE", | |
region_name="us-east-1", | |
) | |
source_client = source_session.client("dynamodb") | |
target_session = boto3.Session( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@ECHO OFF | |
set OLDPATH=%PATH% | |
call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\VC\Auxiliary\Build\vcvarsall.bat" x64 > NUL: | |
echo export INCLUDE='%INCLUDE%' | |
echo export LIB='%LIB%' | |
echo export LIBPATH='%LIBPATH%' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##### How to compile ffmpeg + x264 using Visual Studio 2015 ##### | |
##### Building this way will make the DLLs compatible with SEH, so there will be no need to use /SAFESEH:NO when compiling your code ##### | |
##### SOURCES: | |
### https://pracucci.com/compile-ffmpeg-on-windows-with-visual-studio-compiler.html | |
### https://gist.github.com/sailfish009/8d6761474f87c074703e187a2bc90bbc | |
### http://roxlu.com/2016/057/compiling-x264-on-windows-with-msvc | |
* Download "MSYS2 x86_64" from "http://msys2.github.io" and install into "C:\workspace\windows\msys64" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
docker/image:name |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- | |
-- PostgreSQL database dump | |
-- | |
-- Dumped from database version 15.2 | |
-- Dumped by pg_dump version 15.3 (Homebrew) | |
SET statement_timeout = 0; | |
SET lock_timeout = 0; | |
SET idle_in_transaction_session_timeout = 0; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from kafka import KafkaProducer, KafkaConsumer | |
BOOTSTRAP_SERVERS=os.gentenv("KAFKA_BOOTSTRAP_SERVERS").split(",") | |
TOPIC_NAME="the-topic" | |
SASL_USERNAME=os.gentenv("KAFKA_SASL_USERNAME") | |
SASL_PASSWORD=os.gentenv("KAFKA_SASL_PASSWORD") | |
def consume(): | |
consumer = KafkaConsumer(TOPIC_NAME, security_protocol="SASL_SSL", sasl_mechanism="SCRAM-SHA-512", sasl_plain_username=SASL_USERNAME, sasl_plain_password=SASL_PASSWORD, bootstrap_servers=BOOTSTRAP_SERVERS) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# requires flite, sox and ffmpeg | |
INPUT_TXT=$1 | |
OUTPUT_WAV=${INPUT_TXT/txt/wav} | |
OUTPUT_MP3=${INPUT_TXT/txt/mp3} | |
OUTPUT_WAV_FAST=${INPUT_TXT/.txt/_fast.wav} | |
OUTPUT_MP3_FAST=${INPUT_TXT/.txt/_fast.mp3} | |
flite -f "${INPUT_TXT}" -o "$OUTPUT_WAV" 2>&1 > /dev/null |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
usage="Usage: ./remove_older_hdfs_files.sh [path] [days]" | |
# use if working with incredibly large directories | |
# export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Xmx5g" | |
if [ ! "$1" ] | |
then | |
echo $usage; | |
exit 1; |
NewerOlder