This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def match_regex_patt(df, target_col, regex_patt, no_match_value='None'): | |
'''Returns regex_patt matches as list. Case is ignored.''' | |
matchList=[] | |
cnt = 0 | |
for idx in df.index: | |
m = re.search(regex_patt, df.loc[idx, target_col], flags=re.IGNORECASE) #re.search(pattern, string) gets first match | |
if m is not None: | |
matchList.append(m.group(1).lower()) | |
else: | |
cnt+=1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######################################################################## | |
# # | |
# Compare appending performance - DataFrames versus lists # | |
# # | |
######################################################################## | |
import time | |
import pandas as pd | |
from random import randint | |
import sys |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sqlite3 | |
def write_table(df, table_name_str): | |
'''Writes dataframe to sqlite table named table_name_str.''' | |
with sqlite3.connect('auto.sqlite', isolation_level = None) as conn: # autocommit mode | |
return df.to_sql(table_name_str, con=conn, if_exists='replace', index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Save tables to PostgreSQL 11.3 database. | |
import psycopg2 #version 2.8.4 | |
from sqlalchemy import create_engine #version 1.3.11 | |
from sqlalchemy.dialects.postgresql import JSON, JSONB | |
#The text at the end, postgres, is the database name. | |
engine=create_engine("postgresql://postgres@localhost:5432/postgres") | |
#Write vehicle table with json columns |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import scrapy | |
#scrape with this terminal command: scrapy crawl clspider -o mycity.json | |
#scrapy version = 1.60. Shift + Alt + F to format JSON in VS Code. | |
class ClspiderSpider(scrapy.Spider): | |
name = 'clspider' | |
allowed_domains = ['craigslist.org'] | |
start_urls = ['https://elpaso.craigslist.org/search/cta?auto_make_model=ford'] #cta is cars + trucks by ALL |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Class names and shapes | |
print('1 train class names:', train_ds.class_names) | |
print('2 val class names:', val_ds.class_names) | |
for image_batch, labels_batch in train_ds: | |
print('3 images, xpixels, ypixels, color_channels:',image_batch.shape) | |
print('4 labels:', labels_batch.shape) | |
break |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Preprocess dataset per https://www.tensorflow.org/tutorials/load_data/images | |
train_ds = train_ds.map(lambda x, y: (preprocess_input(x), y)) | |
val_ds = val_ds.map(lambda x, y: (preprocess_input(x), y)) | |
# Show min/max of first image. Notice the pixel values after preprocess. | |
image_batch, labels_batch = next(iter(train_ds)) | |
first_image = image_batch[0] | |
print('image min and max values:', np.min(first_image), np.max(first_image), '\n\n') | |
# Load and freeze VGG16 model. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import pandas as pd | |
def annotate_w_xy_corr(x, y, **kwargs): | |
coef = np.corrcoef(x, y)[0][1] | |
label = r'corr = ' + str(round(coef,3)) | |
ax = plt.gca() | |
ax.annotate(label, xy = (0.3, .07), xycoords = ax.transAxes, c='darkred') #size = 18 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.applications.vgg16 import VGG16 | |
#Create dataset. The file_paths method shows train_ds and val_ds are mutually exclusive. | |
train_ds = tf.keras.utils.image_dataset_from_directory( | |
'./images/train/', | |
labels='inferred', | |
shuffle=True, | |
seed=8, | |
image_size=(224, 224), | |
batch_size=32) |
OlderNewer