Skip to content

Instantly share code, notes, and snippets.

Malcolm Greaves malcolmgreaves

Block or report user

Report or block malcolmgreaves

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
@malcolmgreaves
malcolmgreaves / run_ner.py
Created Nov 18, 2019 — forked from stefan-it/run_ner.py
NER fine-tuning with PyTorch-Transformers (heavily based on https://github.com/kamalkraj/BERT-NER)
View run_ner.py
from __future__ import absolute_import, division, print_function
import argparse
import glob
import logging
import os
import random
import numpy as np
import torch
@malcolmgreaves
malcolmgreaves / group_by.py
Last active Sep 5, 2019
Group by key func & CSV reader + data types.
View group_by.py
from typing import (
Dict,
TypeVar,
Sequence,
Iterable,
Tuple,
Callable,
Optional,
Set,
Any,
@malcolmgreaves
malcolmgreaves / enum_callable.py
Created Sep 5, 2019
Class for registering transformation functions alongside the values of an enum.
View enum_callable.py
from enum import Enum
from functools import lru_cache
from typing import Dict, Sequence, Any, Callable, Iterable, Tuple, Type, Generic, TypeVar, Union
E = TypeVar("E", bound=Enum)
Transformer = Callable[[Any], Any]
def name_of(enum_or_name: Union[E, str]) -> str:
@malcolmgreaves
malcolmgreaves / split_seq.py
Created Sep 5, 2019
Splits a sequence according to a boolean function.
View split_seq.py
from typing import Callable, TypeVar, Sequence, Tuple
T = TypeVar('T')
def split_seq(items: Sequence[T], is_left: Callable[[T], bool]) -> Tuple[Sequence[T], Sequence[T]]:
l, r = [], []
for x in items:
a = l if is_left(x) else r
a.append(x)
@malcolmgreaves
malcolmgreaves / indexed.py
Last active Sep 4, 2019
Data type for wrapping an indexed value.
View indexed.py
from typing import Generic, TypeVar, Union, Dict, Any, Type, cast
T = TypeVar("T")
class Indexed(Generic[T]):
__slots__ = ("_index", "_value")
def __init__(self, index: int, value: T) -> None:
self._index = index
@malcolmgreaves
malcolmgreaves / load_json_objects.py
Created Sep 3, 2019
Function to load JSON objects as a stream from a local filepath. Works with JSON list and JSON object per newline.
View load_json_objects.py
import json
from pathlib import Path
from typing import Any, Dict, Iterable
def load_json(json_file_path: Path) -> Iterable[Dict[str, Any]]:
"""Loads a sequence of JSON objects: either a JSON list or a newline-separated list of objects.
:raises TypeError If the
"""
@malcolmgreaves
malcolmgreaves / parse_enum.py
Last active Aug 30, 2019
Parse a name-serialized enum from a string.
View parse_enum.py
from enum import Enum
from typing import TypeVar, Type
E = TypeVar('E', bound=Enum)
def parse_enum(enum_type: Type[E], enum_name:str) -> E:
try:
for known_e_name, enum_val in enum_type._member_map_.items():
if enum_name == known_e_name:
View misc_utils.py
from typing import TypeVar, Collection, Callable, Any
T = TypeVar("T")
def not_in(collection: Collection[T]) -> Callable[[T], bool]:
"""Evaluates to a function that tests that an item is _not_ in a collection.
Inverse of `x in collection`.
"""
View merge_dicts.py
from typing import Iterable, Any, Dict, Callable, Optional
def merge_dicts(
dictionaries: Iterable[Dict[str, Any]],
resolve_duplicate: Optional[Dict[str, Callable[[Any, Any], Any]]] = None,
) -> Dict[str, Any]:
"""Merge several dictionaries together, allowing for custom duplicate key-value pair resolution.
:raises ValueError Iff :param:`resolve_duplicate` function is not specified and duplicate
@malcolmgreaves
malcolmgreaves / type_name.py
Created Aug 20, 2019
String name for a Python type @ runtime. Handles Union & Optional + classes w/ type parameters well.
View type_name.py
def type_name(t: type) -> str:
"""Complete name, module & specific type name, for the given type.
Does not supply the module in the returned complete name for built-in types.
When possible, also adds generic type arguments (w/ their at-runtime values)
in the returned full type name.
"""
mod = t.__module__
if mod == "builtins":
return t.__name__
You can’t perform that action at this time.