Raymond Willey rwilleynyc

## NW_sql_query.py
df = pd.read_sql_query('''SELECT od.*,
                       o.CustomerId, o.EmployeeId,
                       e.LastName AS EmpLastName,
                       c.Region,
                       p.CategoryId,
                       cat.CategoryName

                       FROM OrderDetail od
                       JOIN [Order] o
                       ON od.OrderId = o.Id

## NW_query_results.csv

          
            Id
            OrderId
            ProductId
            UnitPrice
            Quantity
            Discount
            CustomerId
            EmployeeId
            EmpLastName
            Region
            CategoryId
            CategoryName

            
              10248/11
              10248
              11
              14.0
              12
              0.0
              VINET
              5
              Buchanan
              Western Europe
              4
              Dairy Products

            
              10248/42
              10248
              42
              9.8
              10
              0.0
              VINET
              5
              Buchanan
              Western Europe
              5
              Grains/Cereals

            
              10248/72
              10248
              72
              34.8
              5
              0.0
              VINET
              5
              Buchanan
              Western Europe
              4
              Dairy Products

            
              10249/14
              10249
              14
              18.6
              9
              0.0
              TOMSP
              6
              Suyama
              Western Europe
              7
              Produce

            
              10249/51
              10249
              51
              42.4
              40
              0.0
              TOMSP
              6
              Suyama
              Western Europe
              7
              Produce

            
              10250/41
              10250
              41
              7.7
              10
              0.0
              HANAR
              4
              Peacock
              South America
              8
              Seafood

            
              10250/51
              10250
              51
              42.4
              35
              0.15
              HANAR
              4
              Peacock
              South America
              7
              Produce

            
              10250/65
              10250
              65
              16.8
              15
              0.15
              HANAR
              4
              Peacock
              South America
              2
              Condiments

            
              10251/22
              10251
              22
              16.8
              6
              0.05
              VICTE
              3
              Leverling
              Western Europe
              5
              Grains/Cereals

## NW_db_structure.py
def get_columns_info(tbl_name):
    # Create function to display column names
    # and respective data type for a given table
    cols_list = inspector.get_columns(tbl_name)

    print(f'\nTable Name: {tbl_name}')

    for column in cols_list:
        print("\nName: {}\nType: {}".format(column['name'], column['type']))

## NW_sql_conn.py
import sqlalchemy
from sqlalchemy import create_engine, inspect
from sqlalchemy.orm import Session, sessionmaker
import pandas as pd

# Connect to the database
engine = create_engine('sqlite:///Northwind_small.sqlite', echo=True)
Session = sessionmaker(bind=engine)
session = Session()
con = engine.connect()

## nchs_classification_report.csv

          
            class
             precision
             recall
             f1-score

            
              low
               0.96
               0.94
               0.95

            
              medium
               0.86
               0.91
               0.89

            
              high
               0.94
               0.88
               0.91

## nchs_alc_data.csv

          
            Index
            State
            Year
            beer
            wine
            spirits
            all_bevs

            
              0
              Alabama
              2016
              1.09
              0.26
              0.65
              2.01

            
              1
              Alabama
              2015
              1.11
              0.26
              0.64
              2.01

            
              2
              Alabama
              2014
              1.13
              0.25
              0.62
              2.0

            
              3
              Alabama
              2013
              1.14
              0.25
              0.6
              1.99

            
              4
              Alabama
              2012
              1.16
              0.24
              0.6
              2.0

            
              5
              Alabama
              2011
              1.16
              0.24
              0.59
              1.98

            
              6
              Alabama
              2010
              1.17
              0.23
              0.58
              1.98

            
              7
              Alabama
              2009
              1.19
              0.22
              0.57
              1.98

            
              8
              Alabama
              2008
              1.22
              0.22
              0.57
              2.01

## nchs_alc_html.html
<tbody>
  <tr>
        <th id="u2_0" scope="row" abbr="Alabama" colspan="6">
          <div align="left">
          <strong>Alabama</strong></div>
        </th>
  </tr>
  <tr>
        <th class="stubs" id="u60_0" scope="row" abbr="2016">2016</th>
        <td class="datagray" headers="u60_0 u1_0 u0_1">1.09</td>

## nchs_get_alc_data.py
from bs4 import BeautifulSoup
import requests

years = []
datagray = []
data = []
states = []

# Use loop to scan site data and append to lists
for i in range(1, 17, 1):

## nchs_ny_smoke.csv

          
            Index
            Year
            State Name
            Value
            Lower CI
            Upper CI
            Source

            
              31
              1999
              New York
              24.1
              26.0
              22.2
              AHR data

            
              82
              2000
              New York
              21.8
              23.6
              20.0
              AHR data

            
              133
              2001
              New York
              21.6
              23.2
              20.0
              AHR data

            
              184
              2002
              New York
              23.2
              24.8
              21.6
              AHR data

            
              235
              2003
              New York
              22.3
              23.8
              20.8
              AHR data

            
              286
              2004
              New York
              21.6
              22.9
              20.3
              AHR data

            
              337
              2005
              New York
              19.9
              21.2
              18.6
              AHR data

            
              388
              2006
              New York
              20.5
              21.7
              19.3
              AHR data

            
              439
              2007
              New York
              18.2
              19.5
              16.9
              AHR data

## nchs_smoking.py
# Save individual files from United Health Foundation's website
for i in range(1999, 2017):
    url = f'https://www.americashealthrankings.org/api/v1/downloads/report/1/{i}'
    s=requests.get(url).content
    pd.read_csv(io.StringIO(s.decode('utf-8'))).to_csv(f'data_files/smoking/{i}-Annual.csv')

# Create initial dataframe for 1999 data
smoking = pd.read_csv('data_files/smoking/1999-Annual.csv')

# Append data for every other year
	df = pd.read_sql_query('''SELECT od.*,
	o.CustomerId, o.EmployeeId,
	e.LastName AS EmpLastName,
	c.Region,
	p.CategoryId,
	cat.CategoryName

	FROM OrderDetail od
	JOIN [Order] o
	ON od.OrderId = o.Id
Id	OrderId	ProductId	UnitPrice	Quantity	Discount	CustomerId	EmployeeId	EmpLastName	Region	CategoryId	CategoryName
10248/11	10248	11	14.0	12	0.0	VINET	5	Buchanan	Western Europe	4	Dairy Products
10248/42	10248	42	9.8	10	0.0	VINET	5	Buchanan	Western Europe	5	Grains/Cereals
10248/72	10248	72	34.8	5	0.0	VINET	5	Buchanan	Western Europe	4	Dairy Products
10249/14	10249	14	18.6	9	0.0	TOMSP	6	Suyama	Western Europe	7	Produce
10249/51	10249	51	42.4	40	0.0	TOMSP	6	Suyama	Western Europe	7	Produce
10250/41	10250	41	7.7	10	0.0	HANAR	4	Peacock	South America	8	Seafood
10250/51	10250	51	42.4	35	0.15	HANAR	4	Peacock	South America	7	Produce
10250/65	10250	65	16.8	15	0.15	HANAR	4	Peacock	South America	2	Condiments
10251/22	10251	22	16.8	6	0.05	VICTE	3	Leverling	Western Europe	5	Grains/Cereals
	def get_columns_info(tbl_name):
	# Create function to display column names
	# and respective data type for a given table
	cols_list = inspector.get_columns(tbl_name)

	print(f'\nTable Name: {tbl_name}')

	for column in cols_list:
	print("\nName: {}\nType: {}".format(column['name'], column['type']))
	import sqlalchemy
	from sqlalchemy import create_engine, inspect
	from sqlalchemy.orm import Session, sessionmaker
	import pandas as pd

	# Connect to the database
	engine = create_engine('sqlite:///Northwind_small.sqlite', echo=True)
	Session = sessionmaker(bind=engine)
	session = Session()
	con = engine.connect()
class	precision	recall	f1-score
low	0.96	0.94	0.95
medium	0.86	0.91	0.89
high	0.94	0.88	0.91
Index	State	Year	beer	wine	spirits	all_bevs
0	Alabama	2016	1.09	0.26	0.65	2.01
1	Alabama	2015	1.11	0.26	0.64	2.01
2	Alabama	2014	1.13	0.25	0.62	2.0
3	Alabama	2013	1.14	0.25	0.6	1.99
4	Alabama	2012	1.16	0.24	0.6	2.0
5	Alabama	2011	1.16	0.24	0.59	1.98
6	Alabama	2010	1.17	0.23	0.58	1.98
7	Alabama	2009	1.19	0.22	0.57	1.98
8	Alabama	2008	1.22	0.22	0.57	2.01
	<tbody>
	<tr>
	<th id="u2_0" scope="row" abbr="Alabama" colspan="6">
	<div align="left">
	<strong>Alabama</strong></div>
	</th>
	</tr>
	<tr>
	<th class="stubs" id="u60_0" scope="row" abbr="2016">2016</th>
	<td class="datagray" headers="u60_0 u1_0 u0_1">1.09</td>
	from bs4 import BeautifulSoup
	import requests

	years = []
	datagray = []
	data = []
	states = []

	# Use loop to scan site data and append to lists
	for i in range(1, 17, 1):
Index	Year	State Name	Value	Lower CI	Upper CI	Source
31	1999	New York	24.1	26.0	22.2	AHR data
82	2000	New York	21.8	23.6	20.0	AHR data
133	2001	New York	21.6	23.2	20.0	AHR data
184	2002	New York	23.2	24.8	21.6	AHR data
235	2003	New York	22.3	23.8	20.8	AHR data
286	2004	New York	21.6	22.9	20.3	AHR data
337	2005	New York	19.9	21.2	18.6	AHR data
388	2006	New York	20.5	21.7	19.3	AHR data
439	2007	New York	18.2	19.5	16.9	AHR data
	# Save individual files from United Health Foundation's website
	for i in range(1999, 2017):
	url = f'https://www.americashealthrankings.org/api/v1/downloads/report/1/{i}'
	s=requests.get(url).content
	pd.read_csv(io.StringIO(s.decode('utf-8'))).to_csv(f'data_files/smoking/{i}-Annual.csv')

	# Create initial dataframe for 1999 data
	smoking = pd.read_csv('data_files/smoking/1999-Annual.csv')

	# Append data for every other year