Skip to content

Instantly share code, notes, and snippets.

View jkmackie's full-sized avatar

jkmackie

View GitHub Profile
@jkmackie
jkmackie / gmail_mbox_parser.py
Created July 21, 2023 17:57 — forked from benwattsjones/gmail_mbox_parser.py
Quick python code to parse mbox files, specifically those used by GMail. Extracts sender, date, plain text contents etc., ignores base64 attachments.
#! /usr/bin/env python3
# ~*~ utf-8 ~*~
import mailbox
import bs4
def get_html_text(html):
try:
return bs4.BeautifulSoup(html, 'lxml').body.get_text(' ', strip=True)
except AttributeError: # message contents empty
def crop_imag(imageFilePath: str, shakerange=20, shake_on=None):
'''Crop 1100x1100 images to 224X224. Optionally shake from center-crop
location (438,438,662,662). Return 224X224 image.'''
from PIL import Image
im = Image.open(imageFilePath)
rix = random.randint(-shakerange, shakerange) #get random int for x1, x2
riy = random.randint(-shakerange, shakerange) #get random int for y1, y2
if shake_on == True:
shaken_centercrop = (438+rix, 438+riy, 662+rix, 662+riy)
imCropped = im.crop(shaken_centercrop) #im.crop(x1, y1, x2, y2) - (x1,y1) top-left coord; (x2,y2) bottom-right coord
#Load test dataset for evaluation.
test_ds = tf.keras.utils.image_dataset_from_directory(
'./images/test',
labels='inferred',
seed=None,
image_size=(224, 224),
batch_size=32)
model.evaluate(test_ds)
# Load and freeze VGG16 model.
# Include_top=False removes classification layer trained on ImageNet.
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224,224,3))
base_model.trainable = False ## Not trainable weights
base_model.summary()
from keras.applications.vgg16 import VGG16
#Create dataset. The file_paths method shows train_ds and val_ds are mutually exclusive.
train_ds = tf.keras.utils.image_dataset_from_directory(
'./images/train/',
labels='inferred',
shuffle=True,
seed=8,
image_size=(224, 224),
batch_size=32)
from dataclasses import dataclass, field
import numpy as np
from scipy.fft import fftshift, fft2, ifftshift, fft
from scipy.linalg import toeplitz
import matplotlib.pyplot as plt
@dataclass(repr=False)
class Bispectrum2D:
'''
Make Bispectrum dataclass from 1D signal with frequency in Hertz.
#Train model and get metrics.
model.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'],
)
from keras.callbacks import Callback
class Histories(Callback):
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
def annotate_w_xy_corr(x, y, **kwargs):
coef = np.corrcoef(x, y)[0][1]
label = r'corr = ' + str(round(coef,3))
ax = plt.gca()
ax.annotate(label, xy = (0.3, .07), xycoords = ax.transAxes, c='darkred') #size = 18
#Add classification layers.
from tensorflow.keras import layers, models
flatten_layer = layers.Flatten()
dense_layer_1 = layers.Dense(10, activation='relu')
dropout_1 = layers.Dropout(rate=0.3)
prediction_layer = layers.Dense(1, activation='sigmoid') #sigmoid for binary activation
#Transfer learning - base_model plus classifcation layers.
model = models.Sequential([
#Preprocess dataset per https://www.tensorflow.org/tutorials/load_data/images
train_ds = train_ds.map(lambda x, y: (preprocess_input(x), y))
val_ds = val_ds.map(lambda x, y: (preprocess_input(x), y))
# Show min/max of first image. Notice the pixel values after preprocess.
image_batch, labels_batch = next(iter(train_ds))
first_image = image_batch[0]
print('image min and max values:', np.min(first_image), np.max(first_image), '\n\n')
# Load and freeze VGG16 model.