Skip to content

Instantly share code, notes, and snippets.

@eliorc
eliorc / fsspec_open_any_file.py
Created February 20, 2024 13:18
Open any file
CLOUD_PROTOCOLS = ("s3", "s3n", "s3a", "gcs", "gs", "adl", "abfs", "abfss", "gdrive")
HTTP_PROTOCOLS = ("http", "https")
S3_PROTOCOLS = ("s3", "s3a", "s3n")
PROTOCOL_DELIMITER = "://"
def _parse_filepath(filepath: str) -> dict[str, str]:
"""
Split filepath on protocol and path. Based on `fsspec.utils.infer_storage_options`.
@eliorc
eliorc / open_any_file.py
Created September 30, 2023 07:24
Global open file (from local, remote, http etc.)
# Based of Kedro's code, from this example https://docs.kedro.org/en/stable/data/how_to_create_a_custom_dataset.html#the-complete-example
import re
import typing as t
from contextlib import contextmanager
from pathlib import PurePath, PurePosixPath
from urllib.parse import urlsplit
import fsspec
CLOUD_PROTOCOLS = ("s3", "s3n", "s3a", "gcs", "gs", "adl", "abfs", "abfss", "gdrive")
@eliorc
eliorc / inspections.txt
Created May 24, 2022 11:13 — forked from pylover/inspections.txt
PyCharm inspections
# Extracted using: $ unzip -p lib/pycharm.jar com/jetbrains/python/PyBundle.properties | grep -B1 INSP.NAME | grep '^#' | sed 's|Inspection||g' | sed -e 's|#\s\{,1\}|# noinspection |'
# noinspection PyPep8
# noinspection PyPep8Naming
# noinspection PyTypeChecker
# noinspection PyAbstractClass
# noinspection PyArgumentEqualDefault
# noinspection PyArgumentList
# noinspection PyAssignmentToLoopOrWithParameter
# noinspection PyAttributeOutsideInit
@eliorc
eliorc / add_to_pythonpath_pwd.sh
Last active July 27, 2022 07:29
Adding to PYTHONPATH with .pth files
#!/usr/bin/env sh
# Add current folder to PYTHONPATH
CURRENT_FOLDER=$(pwd)
SITE_PACKAGES_FOLDER="$(ls -d $(poetry env info -p)/lib/python*/site-packages/)project_dir.pth"
echo "$CURRENT_FOLDER" > "$SITE_PACKAGES_FOLDER"
@eliorc
eliorc / should_i_comment.py
Last active February 25, 2021 11:08
DataMapCallback no comments
class DataMapCallback(tf.keras.callbacks.Callback):
"""
Gather training dynamics for data map generation. Assumes a binary or multi-class model, no support for multi label.
Arguments
---------
- `dataset` (``tf.data.: Dataset``): Usually, as the paper suggests, this is the training dataset. It should be:
1. Non-shuffled, so each iteration over the dataset should yield samples in the same order
class ContextualDynamicMetaEmbedding(tf.keras.layers.Layer):
def __init__(self,
embedding_matrices: List[tf.keras.layers.Embedding],
output_dim: Optional[int] = None,
n_lstm_units: int = 2,
name: str = 'contextual_dynamic_meta_embedding',
**kwargs):
"""
:param embedding_matrices: List of embedding layers
:param n_lstm_units: Number of units in each LSTM, (notated as `m` in the original article)
class DynamicMetaEmbedding(tf.keras.layers.Layer):
def __init__(self,
embedding_matrices: List[tf.keras.layers.Embedding],
output_dim: Optional[int] = None,
name: str = 'dynamic_meta_embedding',
**kwargs):
"""
:param embedding_matrices: List of embedding layers
:param output_dim: Dimension of the output embedding
:param name: Layer name
@eliorc
eliorc / layer_normalization_test.py
Created July 6, 2019 08:57
LayerNormalization test
import tensorflow as tf
from tavolo.normalization import LayerNormalization
def test_shapes():
""" Test input-output shapes """
# Inputs shape
input_shape_2d = (56, 10)
@eliorc
eliorc / layer_normalization.py
Last active July 6, 2019 08:33
LayerNormalization
import tensorflow as tf
class LayerNormalization(tf.keras.layers.Layer):
"""
Apply layer normalization
Arguments
---------
@eliorc
eliorc / cyclical_encoder.py
Last active February 27, 2019 14:32
Generate cyclic time features on pandas.DataFrame using a pandas.DateTime column
from itertools import count
import pandas as pd
import numpy as np
def cyclical_encoder(data: pd.DataFrame,
time_column: str,
time_unit: str,
normalize_val: float,
label_suffix: str) -> pd.DataFrame: