Skip to content

Instantly share code, notes, and snippets.

@sueszli
sueszli / ssd.py
Last active August 8, 2023 23:46
scrape all pdf files containing old exams from the semi-structred-data course at TU Wien
import requests
from bs4 import BeautifulSoup
import os
import PyPDF2
EXAM_DIRECTORY_PATH = "./exams"
SOLUTION_DIRECTORY_PATH = "./solutions"
# parse page
url = "https://dbai.tuwien.ac.at/education/ssd/pruefung/"
@sueszli
sueszli / ep1.js
Last active August 8, 2023 23:47
track your unused working hours in ep1
// open up "https://git.logic.at/.../stundenlisten/your_name.csv"
// then run this in your javascript console
const sum = Array.from(document.querySelectorAll('td:nth-child(3)'))
.map((cell) => parseFloat(cell.textContent.trim()) || 0)
.reduce((acc, val) => acc + val, 0)
const sws = sum / 25
console.log(`total: ${sum}h ⌛️ -- equivalent to ${sws} SWS (semester wochenstunden)`)
@sueszli
sueszli / whatsapp.user.js
Created August 8, 2023 23:48
track all chats from your whatsapp browser client
// ==UserScript==
// @name WhatsApp Chats
// @namespace sueszli
// @match *://*web.whatsapp.com/*
// @grant none
// @version 1.0
// @author sueszli
// @description Simple utility functions to read chat elements from active and archived chats.
// ==/UserScript==
@sueszli
sueszli / merge.py
Created August 8, 2023 23:52
merge pdf files together
from PyPDF2 import PdfFileMerger
import os
inputPath = "./extractedPages"
total = len(os.listdir(inputPath))
count = 1
merger = PdfFileMerger()
for pdf in os.listdir(inputPath):
merger.append(open(inputPath + "/" + pdf, "rb"))
@sueszli
sueszli / png2pdf.py
Created August 8, 2023 23:52
insert a png image into a pdf file
from reportlab.pdfgen import canvas
from PyPDF2 import PdfFileWriter, PdfFileReader
import webbrowser
inputPath = "input.pdf"
outputPath = "output.pdf"
imgPath = "logo.png"
tempPath = "temp.pdf"
imgSize = 70
x = 500
@sueszli
sueszli / extractPages.py
Created August 8, 2023 23:53
limit pdf files to specific pages
from PyPDF2 import PdfFileReader, PdfFileWriter
import os
inputPath = "./solutions"
outputPath = "./extractedPages"
chosenPages = [1]
if not os.path.exists(outputPath):
os.makedirs(outputPath)
@sueszli
sueszli / kijiji.js
Last active January 12, 2024 14:22
kijiji.com scraper
import axios from 'axios'
import * as cheerio from 'cheerio'
import { assert } from 'console'
import open from 'open'
const main = async () => {
let url = process.argv[2]
assert(process.argv.length !== 2, 'illegal number of arguments')
assert(url, 'missing url as argument')
@sueszli
sueszli / puppeteer.config.cjs
Last active January 12, 2024 14:23
rentals.com scraper
const { join } = require("path");
module.exports = {
cacheDirectory: join(__dirname, ".cache", "puppeteer"),
};
import { assert, log } from 'console'
import fs from 'fs'
import playwright from 'playwright'
const DOWNLOAD_PATH = 'downloads'
const main = async () => {
console.clear()
// init download dir
import webbrowser
import time
from playwright.sync_api import sync_playwright
URL = "https://www.amazon.jobs/en/search?___________INSERT YOUR URL HERE___________"
page = sync_playwright().start().chromium.launch().new_page()
page.goto(URL)