Skip to content

Instantly share code, notes, and snippets.

from itertools import chain
import requests
from datasets import Dataset
class DataFetcher:
def __init__(
self,
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from typing import Sequence, Dict, Callable
import numpy as np
from rapidfuzz.distance import Levenshtein
from scipy.optimize import linear_sum_assignment
def aligned_edit_distance(
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Simple fuzzy grouping of the list of the dictionaries using any string field and string similarities functions
Dependencies:
- similarities
`pip install rapidfuzz jarowinkler -q`
- scipy for connected components (DisjointSet)
"""
from itertools import combinations, tee
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import requests
class GoogleTranslate:
def __init__(self):
self.session = requests.Session()
self.session.headers.update(self.make_header())
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@imvladikon
imvladikon / datasets_sql.py
Last active January 29, 2023 01:56
duckdb + huggingface datasets
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import duckdb
import pyarrow as pa
from datasets import Dataset
try:
from ibis.backends.base.sql.alchemy import AlchemyTable
IBIS_AVAILABLE = True
except:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import dataclasses
import inspect
import json
import os
import sys
from argparse import ArgumentDefaultsHelpFormatter, ArgumentTypeError
from copy import copy
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import hashlib
import numpy as np
from typing import List, Set
def iter_ngrams(s, ngram=2):
for i in range(len(s) - ngram + 1):
yield s[i: i + ngram]
@imvladikon
imvladikon / GitConfigHttpProxy.md
Created July 23, 2022 13:24 — forked from evantoli/GitConfigHttpProxy.md
Configure Git to use a proxy

Configure Git to use a proxy

In Brief

You may need to configure a proxy server if you're having trouble cloning or fetching from a remote repository or getting an error like unable to access '...' Couldn't resolve host '...'.

Consider something like:

@imvladikon
imvladikon / run_glue.py
Last active February 5, 2023 16:59
run_glue + adapter layers
#!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#