malcolmgreaves /
Created Nov 18, 2019 — forked from stefan-it/
NER fine-tuning with PyTorch-Transformers (heavily based on
from __future__ import absolute_import, division, print_function
import argparse
import glob
import logging
import os
import random
import numpy as np
import torch
malcolmgreaves /
Last active Sep 5, 2019
Group by key func & CSV reader + data types.
from typing import (
malcolmgreaves /
Created Sep 5, 2019
Class for registering transformation functions alongside the values of an enum.
from enum import Enum
from functools import lru_cache
from typing import Dict, Sequence, Any, Callable, Iterable, Tuple, Type, Generic, TypeVar, Union
E = TypeVar("E", bound=Enum)
Transformer = Callable[[Any], Any]
def name_of(enum_or_name: Union[E, str]) -> str:
malcolmgreaves /
Created Sep 5, 2019
Splits a sequence according to a boolean function.
from typing import Callable, TypeVar, Sequence, Tuple
T = TypeVar('T')
def split_seq(items: Sequence[T], is_left: Callable[[T], bool]) -> Tuple[Sequence[T], Sequence[T]]:
l, r = [], []
for x in items:
a = l if is_left(x) else r
malcolmgreaves /
Last active Sep 4, 2019
Data type for wrapping an indexed value.
from typing import Generic, TypeVar, Union, Dict, Any, Type, cast
T = TypeVar("T")
class Indexed(Generic[T]):
__slots__ = ("_index", "_value")
def __init__(self, index: int, value: T) -> None:
self._index = index
malcolmgreaves /
Created Sep 3, 2019
Function to load JSON objects as a stream from a local filepath. Works with JSON list and JSON object per newline.
import json
from pathlib import Path
from typing import Any, Dict, Iterable
def load_json(json_file_path: Path) -> Iterable[Dict[str, Any]]:
"""Loads a sequence of JSON objects: either a JSON list or a newline-separated list of objects.
:raises TypeError If the
malcolmgreaves /
Last active Aug 30, 2019
Parse a name-serialized enum from a string.
from enum import Enum
from typing import TypeVar, Type
E = TypeVar('E', bound=Enum)
def parse_enum(enum_type: Type[E], enum_name:str) -> E:
for known_e_name, enum_val in enum_type._member_map_.items():
if enum_name == known_e_name:
from typing import TypeVar, Collection, Callable, Any
T = TypeVar("T")
def not_in(collection: Collection[T]) -> Callable[[T], bool]:
"""Evaluates to a function that tests that an item is _not_ in a collection.
Inverse of `x in collection`.
from typing import Iterable, Any, Dict, Callable, Optional
def merge_dicts(
dictionaries: Iterable[Dict[str, Any]],
resolve_duplicate: Optional[Dict[str, Callable[[Any, Any], Any]]] = None,
) -> Dict[str, Any]:
"""Merge several dictionaries together, allowing for custom duplicate key-value pair resolution.
:raises ValueError Iff :param:`resolve_duplicate` function is not specified and duplicate
malcolmgreaves /
Created Aug 20, 2019
String name for a Python type @ runtime. Handles Union & Optional + classes w/ type parameters well.
def type_name(t: type) -> str:
"""Complete name, module & specific type name, for the given type.
Does not supply the module in the returned complete name for built-in types.
When possible, also adds generic type arguments (w/ their at-runtime values)
in the returned full type name.
mod = t.__module__
if mod == "builtins":
return t.__name__
