Skip to content

Instantly share code, notes, and snippets.

View saattrupdan's full-sized avatar
🐢

Dan Saattrup Smart saattrupdan

🐢
View GitHub Profile
@saattrupdan
saattrupdan / install_nvcc_with_cuda128.sh
Last active September 6, 2025 12:51
Install NVCC with CUDA 12.8
#!/bin/bash
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get -y install cuda-toolkit-12-8
echo 'CUDA_VERSION="12.8"' >> ~/.bashrc
echo 'export PATH=/usr/local/cuda-${CUDA_VERSION}/bin${PATH:+:${PATH}}$' >> ~/.bashrc
echo 'export LD_LIBRARY_PATH=/usr/local/cuda-${CUDA_VERSION}/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}' >> ~/.bashrc
source ~/.bashrc
echo "Done! Here's the NVCC version:"
@saattrupdan
saattrupdan / openai_generation.py
Last active October 29, 2024 12:44
OpenAI usage
"""Example use of generating with OpenAI models.
Installation:
$ pip install openai
"""
from openai import OpenAI
from openai.types.chat import (
ChatCompletionMessageParam,
ChatCompletionSystemMessageParam,
import pandas as pd
import requests as rq
from PyPDF2 import PdfReader
import io
import re
from tqdm.auto import tqdm
from datasets import Dataset
def new_record(test_type: str, year: int, version: str) -> dict:
return dict(
'''Convert a CONLLU file with coreference data to a JSONL file with clusters.
Usage:
python parse_dacoref.py <input_file>
Author:
Dan Saattrup Nielsen (dan.nielsen@alexandra.dk)
'''
import conllu
@saattrupdan
saattrupdan / download_ftspeech.py
Last active December 12, 2022 16:04
This downloads the FTSpeech corpus on a server where only a terminal is available.
"""Downloads the FTSpeech corpus."""
from selenium import webdriver
from selenium.webdriver.common.by import By
from pathlib import Path
import time
from getpass import getpass
def main() -> None:
@saattrupdan
saattrupdan / create_danfever_splits.py
Last active December 1, 2022 07:09
The DanFEVER dataset (https://huggingface.co/datasets/strombergnlp/danfever) only comes with a training split, making evaluations on it non-reproducible. This gist creates validation- and test splits in a deterministic fashion.
from datasets import load_dataset
# Load the DanFEVER dataset
dataset = load_dataset("strombergnlp/danfever", split="train")
# Convert the dataset to a Pandas DataFrame
df = dataset.to_pandas()
# Get list unique `evidence_extract` values, along with their counts
evidence_extract_counts = df.evidence_extract.value_counts()
@saattrupdan
saattrupdan / create-danish-wit.py
Created November 14, 2022 10:50
Create Danish WIT
"""Unpack the WIT dataset and extract the Danish samples."""
from datasets.arrow_dataset import Example
from datasets.dataset_dict import DatasetDict
from datasets.load import load_dataset
from pathlib import Path
from tqdm.auto import tqdm
import re