jkmackie

## regex_pandas_func.py
def match_regex_patt(df, target_col, regex_patt, no_match_value='None'):
    '''Returns regex_patt matches as list.  Case is ignored.'''
    matchList=[]
    cnt = 0
    for idx in df.index:
        m = re.search(regex_patt, df.loc[idx, target_col], flags=re.IGNORECASE) #re.search(pattern, string) gets first match
        if m is not None:
            matchList.append(m.group(1).lower())
        else:
            cnt+=1

## append_list_vs_df_performance.py
########################################################################
#                                                                      #
# Compare appending performance - DataFrames versus lists              #
#                                                                      #
########################################################################

import time
import pandas as pd
from random import randint
import sys

## write_to_sqlite_simple.py
import sqlite3

def write_table(df, table_name_str):
    '''Writes dataframe to sqlite table named table_name_str.'''
    with sqlite3.connect('auto.sqlite', isolation_level = None) as conn:  # autocommit mode
        return df.to_sql(table_name_str, con=conn, if_exists='replace', index=False)

## write_to_postgre.py
#Save tables to PostgreSQL 11.3 database.

import psycopg2  #version 2.8.4
from sqlalchemy import create_engine  #version 1.3.11
from sqlalchemy.dialects.postgresql import JSON, JSONB

#The text at the end, postgres, is the database name.
engine=create_engine("postgresql://postgres@localhost:5432/postgres")

#Write vehicle table with json columns

## clspider.py
# -*- coding: utf-8 -*-
import scrapy

#scrape with this terminal command:  scrapy crawl clspider -o mycity.json
#scrapy version = 1.60.  Shift + Alt + F to format JSON in VS Code.

class ClspiderSpider(scrapy.Spider):
    name = 'clspider'
    allowed_domains = ['craigslist.org']
    start_urls = ['https://elpaso.craigslist.org/search/cta?auto_make_model=ford']  #cta is cars + trucks by ALL

## circulant_matrix_decomposition.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                jkmackie
                / circulant_matrix_decomposition.ipynb
            
            
              Last active
              September 20, 2022 19:46
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## batch_image_details.py
#Class names and shapes
print('1 train class names:', train_ds.class_names)
print('2 val class names:', val_ds.class_names)

for image_batch, labels_batch in train_ds:
    print('3 images, xpixels, ypixels, color_channels:',image_batch.shape)
    print('4 labels:', labels_batch.shape)
    break

## preprocess_and_init_VGG16.py
#Preprocess dataset per https://www.tensorflow.org/tutorials/load_data/images
train_ds = train_ds.map(lambda x, y: (preprocess_input(x), y))
val_ds = val_ds.map(lambda x, y: (preprocess_input(x), y))

# Show min/max of first image. Notice the pixel values after preprocess.
image_batch, labels_batch = next(iter(train_ds))
first_image = image_batch[0]
print('image min and max values:', np.min(first_image), np.max(first_image), '\n\n')

# Load and freeze VGG16 model.

## pairplot_with_corr.py
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

def annotate_w_xy_corr(x, y, **kwargs):
    coef = np.corrcoef(x, y)[0][1]
    label = r'corr = ' + str(round(coef,3))
    ax = plt.gca()
    ax.annotate(label, xy = (0.3, .07), xycoords = ax.transAxes, c='darkred') #size = 18

## segregated_datasets.py
from keras.applications.vgg16 import VGG16

#Create dataset.  The file_paths method shows train_ds and val_ds are mutually exclusive.
train_ds = tf.keras.utils.image_dataset_from_directory(
  './images/train/',
  labels='inferred',
  shuffle=True,
  seed=8,
  image_size=(224, 224),
  batch_size=32)
	def match_regex_patt(df, target_col, regex_patt, no_match_value='None'):
	'''Returns regex_patt matches as list. Case is ignored.'''
	matchList=[]
	cnt = 0
	for idx in df.index:
	m = re.search(regex_patt, df.loc[idx, target_col], flags=re.IGNORECASE) #re.search(pattern, string) gets first match
	if m is not None:
	matchList.append(m.group(1).lower())
	else:
	cnt+=1
	########################################################################
	# #
	# Compare appending performance - DataFrames versus lists #
	# #
	########################################################################

	import time
	import pandas as pd
	from random import randint
	import sys
	import sqlite3

	def write_table(df, table_name_str):
	'''Writes dataframe to sqlite table named table_name_str.'''
	with sqlite3.connect('auto.sqlite', isolation_level = None) as conn: # autocommit mode
	return df.to_sql(table_name_str, con=conn, if_exists='replace', index=False)
	#Save tables to PostgreSQL 11.3 database.

	import psycopg2 #version 2.8.4
	from sqlalchemy import create_engine #version 1.3.11
	from sqlalchemy.dialects.postgresql import JSON, JSONB

	#The text at the end, postgres, is the database name.
	engine=create_engine("postgresql://postgres@localhost:5432/postgres")

	#Write vehicle table with json columns
	# -- coding: utf-8 --
	import scrapy

	#scrape with this terminal command: scrapy crawl clspider -o mycity.json
	#scrapy version = 1.60. Shift + Alt + F to format JSON in VS Code.

	class ClspiderSpider(scrapy.Spider):
	name = 'clspider'
	allowed_domains = ['craigslist.org']
	start_urls = ['https://elpaso.craigslist.org/search/cta?auto_make_model=ford'] #cta is cars + trucks by ALL
	#Class names and shapes
	print('1 train class names:', train_ds.class_names)
	print('2 val class names:', val_ds.class_names)

	for image_batch, labels_batch in train_ds:
	print('3 images, xpixels, ypixels, color_channels:',image_batch.shape)
	print('4 labels:', labels_batch.shape)
	break
	#Preprocess dataset per https://www.tensorflow.org/tutorials/load_data/images
	train_ds = train_ds.map(lambda x, y: (preprocess_input(x), y))
	val_ds = val_ds.map(lambda x, y: (preprocess_input(x), y))

	# Show min/max of first image. Notice the pixel values after preprocess.
	image_batch, labels_batch = next(iter(train_ds))
	first_image = image_batch[0]
	print('image min and max values:', np.min(first_image), np.max(first_image), '\n\n')

	# Load and freeze VGG16 model.
	import matplotlib.pyplot as plt
	import seaborn as sns
	import pandas as pd

	def annotate_w_xy_corr(x, y, **kwargs):
	coef = np.corrcoef(x, y)[0][1]
	label = r'corr = ' + str(round(coef,3))
	ax = plt.gca()
	ax.annotate(label, xy = (0.3, .07), xycoords = ax.transAxes, c='darkred') #size = 18
	from keras.applications.vgg16 import VGG16

	#Create dataset. The file_paths method shows train_ds and val_ds are mutually exclusive.
	train_ds = tf.keras.utils.image_dataset_from_directory(
	'./images/train/',
	labels='inferred',
	shuffle=True,
	seed=8,
	image_size=(224, 224),
	batch_size=32)