Skip to content

Instantly share code, notes, and snippets.

@itsthejoker
Last active December 18, 2023 14:47
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save itsthejoker/9968cf3dc54086e3ef9198d980c44649 to your computer and use it in GitHub Desktop.
Save itsthejoker/9968cf3dc54086e3ef9198d980c44649 to your computer and use it in GitHub Desktop.
####################################
# ISO8601 time strings + UTC offsets
####################################
from time import strftime
from time import gmtime
from datetime import datetime
@staticmethod
def local_datetime(utc_offset=True):
"""
Returns an ISO8601 formatted string depicting the current local time.
Appending the UTC offset to the end is optional, and will have the
following formatting: '2016-11-11T14:56:05.004707-0500'
NOTE: DOES NOT WORK IF THE TARGET SYSTEM DOES NOT HAVE THE strftime
C EXTENSION INSTALLED
"""
# server timezone UTC offset, e.g. "-0500"
local_timezone = strftime("%z", gmtime())
if not utc_offset:
return datetime.now().isoformat()
return datetime.now().isoformat() + local_timezone
####################################
# Graceful Interrupt Handler
####################################
https://gist.github.com/itsthejoker/6b497f2098916cefd8a8e2f9a1ff7b5d
####################################
# Auto Column Formatter
####################################
https://github.com/Samrux/Python-Scripts/blob/master/columnize.py
####################################
# Recursively update values in dict
####################################
from typing import Any
def replace_item(
obj: dict | list, key_to_replace: str, replace_value: Any
) -> dict | list:
if isinstance(obj, list):
temp_list = []
for subitem in obj:
subitem = replace_item(subitem, key_to_replace, replace_value)
temp_list.append(subitem)
return temp_list
obj = {
key: replace_value if key == key_to_replace else value
for key, value in obj.items()
}
for key in obj.keys():
if type(obj[key]) in [dict, list]:
obj[key] = replace_item(obj[key], key_to_replace, replace_value)
return obj
thing2 = replace_item(thing, "IsThin", True)
print(thing)
print(thing2)
####################################
# Pure Python End of Month Datemath
####################################
# total days in every month during non leap years
M_DAYS = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
def isleap(year):
"""Return True for leap years, False for non-leap years."""
return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
def days_in_month(year, month):
"""Returns total number of days in a month accounting for leap years."""
return M_DAYS[month] + (month == 2 and isleap(year))
def is_monthend(ref_date):
"""Checks whether a date is also a monthend"""
return ref_date.day == days_in_month(ref_date.year, ref_date.month)
####################
# Spaceship Operator
####################
# in ruby, the spaceship operator `<=>` is used to easily tell if if one number
# is greater or lesser than the other. For example, `a <=> b` will return a -1 if
# a is smaller, a 1 if a is larger, and a 0 if they are equal.
def spaceship(a: int, b: int) -> int:
if a < b:
return -1
elif a > b:
return 1
else:
return 0
####################
# Text Normalization
####################
def normalize_quotes(text: str) -> str:
single_quotes_list = [
"\u0027", # APOSTROPHE
"\u0060", # GRAVE ACCENT
"\u00B4", # ACUTE ACCENT
"\u2018", # LEFT SINGLE QUOTATION MARK
"\u2019", # RIGHT SINGLE QUOTATION MARK
"\u201A", # SINGLE LOW-9 QUOTATION MARK
"\u201B", # SINGLE HIGH-REVERSED-9 QUOTATION MARK
"\u2032", # PRIME
"\u2035", # REVERSED PRIME
"\u2039", # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
"\u203A", # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
"\u300C", # LEFT CORNER BRACKET
"\u300D", # RIGHT CORNER BRACKET
"\u300E", # LEFT WHITE CORNER BRACKET
"\u300F", # RIGHT WHITE CORNER BRACKET
"\uFF07", # FULLWIDTH APOSTROPHE
"\uFF62", # HALFWIDTH LEFT CORNER BRACKET
"\uFF63", # HALFWIDTH RIGHT CORNER BRACKET
]
double_quotes_list = [
"\u0022", # QUOTATION MARK
"\u201C", # LEFT DOUBLE QUOTATION MARK
"\u201D", # RIGHT DOUBLE QUOTATION MARK
"\u201E", # DOUBLE LOW-9 QUOTATION MARK
"\u201F", # DOUBLE HIGH-REVERSED-9 QUOTATION MARK
"\u301D", # REVERSED DOUBLE PRIME QUOTATION MARK
"\u301F", # LOW DOUBLE PRIME QUOTATION MARK
"\u2034", # TRIPLE PRIME
"\u2036", # REVERSED DOUBLE PRIME
"\u2037", # REVERSED TRIPLE PRIME
"\u301E", # DOUBLE PRIME QUOTATION MARK
"\uFF02", # FULLWIDTH QUOTATION MARK
]
text = text.translate(str.maketrans({x: "'" for x in single_quotes_list}))
return text.translate(str.maketrans({x: '"' for x in double_quotes_list}))
def normalize_hyphens(text: str) -> str:
hyphen_list = [
"\u002D", # HYPHEN-MINUS
"\u007E", # TILDE
"\u00AD", # SOFT HYPHEN
"\u058A", # ARMENIAN HYPHEN
"\u05BE", # HEBREW PUNCTUATION MAQAF
"\u1173", # HANGUL JUNGSEONG EU
"\u1400", # CANADIAN SYLLABICS HYPHEN
"\u1806", # MONGOLIAN TODO SOFT HYPHEN
"\u2010", # HYPHEN
"\u2011", # NON-BREAKING HYPHEN
"\u2012", # FIGURE DASH
"\u2013", # EN DASH
"\u2014", # EM DASH
"\u2015", # HORIZONTAL BAR
"\u2043", # HYPHEN BULLET
"\u2053", # SWUNG DASH
"\u2E17", # DOUBLE OBLIQUE HYPHEN
"\u2E1A", # HYPHEN WITH DIAERESIS
"\u2E3A", # TWO-EM DASH
"\u2E3B", # THREE-EM DASH
"\u2E40", # DOUBLE HYPHEN
"\u301C", # WAVE DASH
"\u30FC", # KATAKANA-HIRAGANA PROLONGED SOUND MARK
"\u3030", # WAVY DASH
"\u30A0", # KATAKANA-HIRAGANA DOUBLE HYPHEN
"\u3161", # HANGUL LETTER YO-YA
"\u4E00", # CJK UNIFIED IDEOGRAPH-4E00
"\uA4FE", # LISU PUNCTUATION FULL STOP
"\uFE31", # PRESENTATION FORM FOR VERTICAL EM DASH
"\uFE32", # PRESENTATION FORM FOR VERTICAL EN DASH
"\uFE58", # SMALL EM DASH
"\uFE63", # SMALL HYPHEN-MINUS
"\uFF0D", # FULLWIDTH HYPHEN-MINUS
"\U00010ead", # YEZIDI HYPHENATION MARK
"\U00010f55", # SOGDIAN HYPHENATION MARK
"\U00010110", # AEGEAN NUMBER TEN
"\U00010191", # ROMAN SEMUNCIA SIGN
"\U0001104B", # BRAHMI PUNCTUATION LINE
"\U00011052", # BRAHMI NUMBER TEN
"\U000110BE", # KAITHI SECTION MARK
]
return text.translate(str.maketrans({x: "-" for x in hyphen_list}))
def text_to_ascii(
text: str,
punctuation_to_keep: Optional[str] = None,
keep_first_occurrence_of: str = None,
) -> str:
"""
Reduces text to bare ASCII with optional ability to keep some punctuation.
Example: "Eärendil's ship, Vingilótë" -> "Earendils ship Vingilote"
To remove international characters while keeping all punctuation, use like
this:
```
>>> import string, unicodedata
>>> my_text = "Eärendil's ship, Vingilótë"
>>> text_to_ascii(my_text, punctuation_to_keep=string.punctuation)
"Earendil's ship, Vingilote"
```
Alternatively, use the helper function `normalize_text` with the second arg
of "all".
Use the arg `keep_first_occurrence_of` to do what it says; use a string of
characters, like "$%" to keep the first occurrence of either of those
symbols in the string.
"""
punctuation = string.punctuation
if punctuation_to_keep:
punctuation = "".join([x for x in punctuation if x not in punctuation_to_keep])
if keep_first_occurrence_of:
punctuation = "".join(
[x for x in punctuation if x not in keep_first_occurrence_of]
)
for char in keep_first_occurrence_of:
# change them all to something identifiable, save the first, nuke the rest
text = text.replace(char, "|||").replace("|||", char, 1).replace("|||", "")
text = text.strip().translate(str.maketrans("", "", punctuation))
return unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode()
def normalize_text(
text: str, punctuation_to_keep="-.&'_", keep_first_occurrence_of=None
) -> str:
"""Take unicode text and return ASCII text.
By default, the following punctuation will be retained in the final string:
- . & ' _
Other punctuation will be removed. If you wish to keep all punctuation, pass
the string "all" as the second argument. If you wish to remove all punctuation,
pass None.
"""
if punctuation_to_keep == "all":
punctuation_to_keep = string.punctuation
return text_to_ascii(
normalize_hyphens(normalize_quotes(text)),
punctuation_to_keep=punctuation_to_keep,
keep_first_occurrence_of=keep_first_occurrence_of,
).strip()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment