Skip to content

Instantly share code, notes, and snippets.

df = pd.read_sql_query('''SELECT od.*,
o.CustomerId, o.EmployeeId,
e.LastName AS EmpLastName,
c.Region,
p.CategoryId,
cat.CategoryName
FROM OrderDetail od
JOIN [Order] o
ON od.OrderId = o.Id
@rwilleynyc
rwilleynyc / NW_query_results.csv
Created August 8, 2019 17:24
sample results of sql query
Id OrderId ProductId UnitPrice Quantity Discount CustomerId EmployeeId EmpLastName Region CategoryId CategoryName
10248/11 10248 11 14.0 12 0.0 VINET 5 Buchanan Western Europe 4 Dairy Products
10248/42 10248 42 9.8 10 0.0 VINET 5 Buchanan Western Europe 5 Grains/Cereals
10248/72 10248 72 34.8 5 0.0 VINET 5 Buchanan Western Europe 4 Dairy Products
10249/14 10249 14 18.6 9 0.0 TOMSP 6 Suyama Western Europe 7 Produce
10249/51 10249 51 42.4 40 0.0 TOMSP 6 Suyama Western Europe 7 Produce
10250/41 10250 41 7.7 10 0.0 HANAR 4 Peacock South America 8 Seafood
10250/51 10250 51 42.4 35 0.15 HANAR 4 Peacock South America 7 Produce
10250/65 10250 65 16.8 15 0.15 HANAR 4 Peacock South America 2 Condiments
10251/22 10251 22 16.8 6 0.05 VICTE 3 Leverling Western Europe 5 Grains/Cereals
def get_columns_info(tbl_name):
# Create function to display column names
# and respective data type for a given table
cols_list = inspector.get_columns(tbl_name)
print(f'\nTable Name: {tbl_name}')
for column in cols_list:
print("\nName: {}\nType: {}".format(column['name'], column['type']))
@rwilleynyc
rwilleynyc / NW_sql_conn.py
Last active August 8, 2019 17:26
Create engine & show table names
import sqlalchemy
from sqlalchemy import create_engine, inspect
from sqlalchemy.orm import Session, sessionmaker
import pandas as pd
# Connect to the database
engine = create_engine('sqlite:///Northwind_small.sqlite', echo=True)
Session = sessionmaker(bind=engine)
session = Session()
con = engine.connect()
@rwilleynyc
rwilleynyc / nchs_classification_report.csv
Last active August 6, 2019 13:36
Classification report for SVM
class precision recall f1-score
low 0.96 0.94 0.95
medium 0.86 0.91 0.89
high 0.94 0.88 0.91
@rwilleynyc
rwilleynyc / nchs_alc_data.csv
Created August 5, 2019 22:27
Alcohol results from web scraping
Index State Year beer wine spirits all_bevs
0 Alabama 2016 1.09 0.26 0.65 2.01
1 Alabama 2015 1.11 0.26 0.64 2.01
2 Alabama 2014 1.13 0.25 0.62 2.0
3 Alabama 2013 1.14 0.25 0.6 1.99
4 Alabama 2012 1.16 0.24 0.6 2.0
5 Alabama 2011 1.16 0.24 0.59 1.98
6 Alabama 2010 1.17 0.23 0.58 1.98
7 Alabama 2009 1.19 0.22 0.57 1.98
8 Alabama 2008 1.22 0.22 0.57 2.01
@rwilleynyc
rwilleynyc / nchs_alc_html.html
Created August 5, 2019 22:20
Sample of html from alcohol data site
<tbody>
<tr>
<th id="u2_0" scope="row" abbr="Alabama" colspan="6">
<div align="left">
<strong>Alabama</strong></div>
</th>
</tr>
<tr>
<th class="stubs" id="u60_0" scope="row" abbr="2016">2016</th>
<td class="datagray" headers="u60_0 u1_0 u0_1">1.09</td>
@rwilleynyc
rwilleynyc / nchs_get_alc_data.py
Last active August 5, 2019 22:30
Beautiful Soup to get alcohol data
from bs4 import BeautifulSoup
import requests
years = []
datagray = []
data = []
states = []
# Use loop to scan site data and append to lists
for i in range(1, 17, 1):
@rwilleynyc
rwilleynyc / nchs_ny_smoke.csv
Created August 5, 2019 20:35
Smoking data for New York
Index Year State Name Value Lower CI Upper CI Source
31 1999 New York 24.1 26.0 22.2 AHR data
82 2000 New York 21.8 23.6 20.0 AHR data
133 2001 New York 21.6 23.2 20.0 AHR data
184 2002 New York 23.2 24.8 21.6 AHR data
235 2003 New York 22.3 23.8 20.8 AHR data
286 2004 New York 21.6 22.9 20.3 AHR data
337 2005 New York 19.9 21.2 18.6 AHR data
388 2006 New York 20.5 21.7 19.3 AHR data
439 2007 New York 18.2 19.5 16.9 AHR data
@rwilleynyc
rwilleynyc / nchs_smoking.py
Last active August 5, 2019 20:36
Get smoking data from UHF
# Save individual files from United Health Foundation's website
for i in range(1999, 2017):
url = f'https://www.americashealthrankings.org/api/v1/downloads/report/1/{i}'
s=requests.get(url).content
pd.read_csv(io.StringIO(s.decode('utf-8'))).to_csv(f'data_files/smoking/{i}-Annual.csv')
# Create initial dataframe for 1999 data
smoking = pd.read_csv('data_files/smoking/1999-Annual.csv')
# Append data for every other year