Skip to content

Instantly share code, notes, and snippets.

@xinzhel
xinzhel / openai_schema.py
Created December 11, 2023 04:49
openai_schema
from docstring_parser import epydoc, google, numpydoc, rest
from docstring_parser.attrdoc import add_attribute_docstrings
from docstring_parser.common import (
Docstring,
DocstringStyle,
ParseError,
RenderingStyle,
)
from pydantic import BaseModel, create_model, validate_arguments
import typing as T
from instructor import OpenAISchema
from pydantic import Field
from typing import List
import enum
import openai
class Source(enum.Enum):
VIDEO = "VIDEO"
TRANSCRIPT = "TRANSCRIPT"
@xinzhel
xinzhel / test_openaischema.json
Last active November 26, 2023 08:31
test_openaischema.json
{
"name": "MultiSearch",
"description": "correct segmentation of `Search` tasks",
"parameters": {
"properties": {
"tasks": {
"items": {
"$ref": "#/$defs/Search"
},
"type": "array"
@xinzhel
xinzhel / find_available_port.py
Created May 29, 2022 00:53
Finding an Available Port
import socket
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
 sock.bind(("", 0)
 primary_port = sock.getsockname()[1]
@xinzhel
xinzhel / nlp_publications.md
Last active November 18, 2022 06:06
All you need for NLP Publications (Focusing on Conferences; Keep Updating...)

Arxiv and PubMed

https://github.com/armancohan/long-summarization

CNN Daily Mail

$wget https://storage.googleapis.com/allennlp-public-data/cnndm-combined-data-2020.07.13.tar.gz 
$tar -xzf cnndm-combined-data-2020.07.13.tar.gz
$mv cnndm-combined-data-2020.07.13 cnn_dm
#!/bin/bash
# Script to create Azure DSVM Spot instance with NVidia P100 GPU
read -p "Azure VM Name (default: dsvm): " vminput
vmname=${vminput:=dsvm}
while [ $password != $password2 ] ; do
read -s -p "Choose your Password: " password
echo
read -s -p "Re-enter Password: " password2
echo
sudo apt update && upgrade
# install pip, ipython3
sudo apt install python3-pip
# 1. load pyenv
curl -L https://github.com/pyenv/pyenv-installer/raw/master/bin/pyenv-installer | bash
@xinzhel
xinzhel / reuters-json.py
Last active November 14, 2021 23:13
convert Reuters dataset on kaggle to json file
import logging
import os
import sys
import json
from typing import Dict, Optional
from tqdm.auto import tqdm
import timeit
import numpy as np
test = []
train = []
@xinzhel
xinzhel / learning-resource-for-ml-and-nlp-practitioner.md
Last active October 31, 2021 08:55
learning-resource-for-machine learning and natural language processing