Skip to content

Instantly share code, notes, and snippets.

Malcolm Greaves malcolmgreaves

Block or report user

Report or block malcolmgreaves

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
malcolmgreaves /
Created Nov 18, 2019 — forked from stefan-it/
NER fine-tuning with PyTorch-Transformers (heavily based on
from __future__ import absolute_import, division, print_function
import argparse
import glob
import logging
import os
import random
import numpy as np
import torch
malcolmgreaves /
Last active Sep 5, 2019
Group by key func & CSV reader + data types.
from typing import (
malcolmgreaves /
Created Sep 5, 2019
Class for registering transformation functions alongside the values of an enum.
from enum import Enum
from functools import lru_cache
from typing import Dict, Sequence, Any, Callable, Iterable, Tuple, Type, Generic, TypeVar, Union
E = TypeVar("E", bound=Enum)
Transformer = Callable[[Any], Any]
def name_of(enum_or_name: Union[E, str]) -> str:
malcolmgreaves /
Created Sep 5, 2019
Splits a sequence according to a boolean function.
from typing import Callable, TypeVar, Sequence, Tuple
T = TypeVar('T')
def split_seq(items: Sequence[T], is_left: Callable[[T], bool]) -> Tuple[Sequence[T], Sequence[T]]:
l, r = [], []
for x in items:
a = l if is_left(x) else r
malcolmgreaves /
Last active Sep 4, 2019
Data type for wrapping an indexed value.
from typing import Generic, TypeVar, Union, Dict, Any, Type, cast
T = TypeVar("T")
class Indexed(Generic[T]):
__slots__ = ("_index", "_value")
def __init__(self, index: int, value: T) -> None:
self._index = index
malcolmgreaves /
Created Sep 3, 2019
Function to load JSON objects as a stream from a local filepath. Works with JSON list and JSON object per newline.
import json
from pathlib import Path
from typing import Any, Dict, Iterable
def load_json(json_file_path: Path) -> Iterable[Dict[str, Any]]:
"""Loads a sequence of JSON objects: either a JSON list or a newline-separated list of objects.
:raises TypeError If the
malcolmgreaves /
Last active Aug 30, 2019
Parse a name-serialized enum from a string.
from enum import Enum
from typing import TypeVar, Type
E = TypeVar('E', bound=Enum)
def parse_enum(enum_type: Type[E], enum_name:str) -> E:
for known_e_name, enum_val in enum_type._member_map_.items():
if enum_name == known_e_name:
from typing import TypeVar, Collection, Callable, Any
T = TypeVar("T")
def not_in(collection: Collection[T]) -> Callable[[T], bool]:
"""Evaluates to a function that tests that an item is _not_ in a collection.
Inverse of `x in collection`.
from typing import Iterable, Any, Dict, Callable, Optional
def merge_dicts(
dictionaries: Iterable[Dict[str, Any]],
resolve_duplicate: Optional[Dict[str, Callable[[Any, Any], Any]]] = None,
) -> Dict[str, Any]:
"""Merge several dictionaries together, allowing for custom duplicate key-value pair resolution.
:raises ValueError Iff :param:`resolve_duplicate` function is not specified and duplicate
malcolmgreaves /
Created Aug 20, 2019
String name for a Python type @ runtime. Handles Union & Optional + classes w/ type parameters well.
def type_name(t: type) -> str:
"""Complete name, module & specific type name, for the given type.
Does not supply the module in the returned complete name for built-in types.
When possible, also adds generic type arguments (w/ their at-runtime values)
in the returned full type name.
mod = t.__module__
if mod == "builtins":
return t.__name__
You can’t perform that action at this time.