Skip to content

Instantly share code, notes, and snippets.

View afparsons's full-sized avatar
🏠
Working from home

Andrew Parsons afparsons

🏠
Working from home
  • Boston, Massachusetts
  • 07:40 (UTC -04:00)
View GitHub Profile
@afparsons
afparsons / bibliographer.py
Created January 23, 2023 22:01
spaCy 3.x RegularExpressionMatcher: Bibliographer
"""
A quick excerpt demonstrating usage of a custom `RegularExpressionMatcher` for spaCy 3.
This is from one of my personal projects (HaleyNLP/Irnerius). Module-level imports and other code blocks have been elided.
"""
class ComponentExtractionBibliographer(
AbstractComponentMatcher,
matcher=RegularExpressionMatcher,
):
"""
@afparsons
afparsons / regular_expression_matcher.py
Created January 23, 2023 21:56
spaCy 3.x RegularExpressionMatcher: Class
"""
A quick excerpt demonstrating usage of a custom `RegularExpressionMatcher` for spaCy 3.
This is from one of my personal projects (HaleyNLP/Irnerius). Module-level imports and other code blocks have been elided.
"""
class RegularExpressionMatcher:
"""
Akin to spaCy's Token Matcher, although this runs regular expressions
on the entire doc text.
@afparsons
afparsons / bibliographer_patterns.py
Last active January 23, 2023 22:04
spaCy 3.x RegularExpressionMatcher: Patterns and on_match pattern handlers
"""
A quick excerpt demonstrating usage of a custom `RegularExpressionMatcher` for spaCy 3.
This is from one of my personal projects (HaleyNLP/Irnerius). Module-level imports and other code blocks have been elided.
"""
@registry.misc('haleynlp.common.extraction.handler.on_match.bibliography._european_union_ecli')
def _european_union_ecli(
span: Span,
match: re.Match,
) -> None:
@afparsons
afparsons / query.sparql
Last active August 27, 2022 13:39
Wikidata currency information
SELECT ?cur ?curLabel ?code ?char ?symbol ?endTime ?countryLabel ?altLabel
WHERE
{
?sign wdt:P31 wd:Q308229.
?sign wdt:P487 ?char .
?cur wdt:P489 ?sign .
?cur wdt:P31 wd:Q8142 .
?cur wdt:P498 ?code .
OPTIONAL { ?cur wdt:P5061 ?symbol . }
?cur wdt:P17 ?country .
@afparsons
afparsons / bytes_to_human_readable.py
Created August 19, 2022 19:18
Convert size in bytes to human-readable amount
"""
Adpated from: https://stackoverflow.com/a/14822210/4189676
"""
from math import floor, log
def bytes_to_human_readable(number_of_bytes: int) -> str:
magnitude: int = int(floor(log(number_of_bytes, 1024)))
value: float = number_of_bytes / pow(1024, magnitude)
if magnitude > 3:
@afparsons
afparsons / django_taggit_tag_adder.py
Last active September 24, 2021 02:59
django-taggit TagAdder
from typing import List, Set, Tuple, Type
from taggit.models import Tag, TaggedItem
from django.db.models import Model, QuerySet
from django.contrib.contenttypes.models import ContentType
class TagAdder:
"""
An object which adds specific tags to given Django Model objects in bulk.
"""
@afparsons
afparsons / get_taggit_tag_counts.py
Created September 23, 2021 02:15
Get 'django-taggit' tag counts for each django model type
from typing import Dict
from pandas import DataFrame
from taggit.models import TaggedItem
from django.db.models import Q, Count
def get_tag_counts() -> DataFrame:
"""
Returns a DataFrame with the counts of django-taggit TaggedItems by
Tag name and ContentType model name.
@afparsons
afparsons / analyze_tokens.py
Last active August 2, 2022 13:19
Spacy: Tabular View of Token Attributes
# standard library imports
from operator import attrgetter
from typing import Union, Generator
# third-party library imports
from pandas import DataFrame
from spacy.tokens import Token, Span, Doc
def analyze_tokens(
@afparsons
afparsons / pprint_queryset.py
Last active August 24, 2023 08:06
Django QuerySet PrettyPrint
# Enhanced Django QuerySet printing using PrettyPrinter
# Example usage: dropped into and employed within an IPython notebook.
# --- PRETTYPRINT -------------------------------------------------------------
# A PrettyPrinter object contains a _dispatch dictionary.
# This lookup table contains (key, value) pairs wherein the key corresponds to
# an object's __repr__ method, and the value is a special _pprint_<OBJECT>
# method. The PrettyPrint method pprint() queries the dictionary to call the
# appropriate object printer.
ZCTA5CE10 count
49008 15
49036 1
49740 1
49506 5
48170 4
48302 5
49009 10
49017 1
48306 4