Skip to content

Instantly share code, notes, and snippets.

@knu2xs
knu2xs / get_pyspark_schema.py
Last active April 17, 2024 22:07
Get PySpark Schema
import re
from pyspark.sql import DataFrame
def get_pyspark_dataframe_schema(df: DataFrame) -> str:
"""Output the DataFrame schema to easily be included when constructing a new PySpark DataFrame."""
# characters to include for tab, since python, using four spaces
tab = ' '
@knu2xs
knu2xs / copy_to_clipboard.py
Created January 29, 2024 14:08
Copy to Clipboard Python
def copy_to_clipboard(string: str) -> None:
"""
Copy a string to the system clipboard.
.. note::
This leans on Pandas, so it will not work unless Pandas is installed.
"""
# ensure Pandas is installed and available
if not importlib.util.find_spec('pandas'):
raise EnvironmentError('copy_to_clipboard requires Pandas. Please ensure Pandas is installed in the environment to use.')
@knu2xs
knu2xs / preprocessing.py
Created July 11, 2022 18:27
Integration of Python API in Sci-Kit Transformers
from functools import lru_cache
from typing import Union, List, Optional
from arcgis.geoenrichment import Country
from arcgis.geometry import Polygon
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
__all__ = ['EnrichBase', 'EnrichPolygon', 'EnrichStandardGeography', 'KeepOnlyEnrichColumns', 'ArrayToDataFrame']
@knu2xs
knu2xs / hashing.py
Last active November 3, 2023 13:30
Add a MD5 hash column to a Pandas data frame for change analysis.
from hashlib import md5
import pandas as pd
from typing import Optional, Iterable
def get_md5_from_series(input_iterable: Iterable) -> str:
"""
Create a MD5 hash from an Iterable, typically a row from a Pandas ``DataFrame``, but can be any
Iterable object instance such as a list, tuple or Pandas ``Series``.
Args:
@knu2xs
knu2xs / get_logger.py
Created September 29, 2021 18:26
concisely get an easy to use logger
import logging
from pathlib import Path
from typing import Union
def get_logger(log_path: Union[str, Path] = 'logfile.log', log_name: str = 'logger', log_level: int = logging.ERROR):
"""
Standardized way to create a console and file logger in one.
Args:
import psutil
import platform
from datetime import datetime
def get_size(bytes, suffix="B"):
"""
Scale bytes to its proper format
e.g:
1253656 => '1.20MB'
1253656678 => '1.17GB'
@knu2xs
knu2xs / polsby_popper.py
Last active September 29, 2021 03:48
Calculate Polsby-Popper as measure of compactness
import math
from arcgis.geometry import Polygon
def get_simplified_polygon(geometry: Polygon, simplification_factor: float = 0.1) -> Polygon:
# ensure working with a polygon
assert isinstance(geometry, Polygon)
@knu2xs
knu2xs / environment.yml
Created September 28, 2021 21:58
Example conda environment file with PyDOBC
name: arcgis
channels:
- esri
- conda-forge
dependencies:
- arcgis
- arcpy
- jupyterlab
def pro_at_least_version(version: str) -> bool:
"""
Test the current ArcGIS Pro version to be equal or greater than.
Args:
version: Version number eg ('2', '2.1', '2.8.1')
Returns:
Boolean indicating if version is at least or greater than specified version.
"""
# late import since may or may not already be loaded
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.