Skip to content

Instantly share code, notes, and snippets.

View josephmachado's full-sized avatar
:octocat:
Working

Start Data Engineering josephmachado

:octocat:
Working
View GitHub Profile
We can make this file beautiful and searchable if this error is corrected: It looks like row 9 should actually have 7 columns, instead of 1 in line 8.
s_suppkey,s_name,s_address,s_nationkey,s_phone,s_acctbal,s_comment
1,Supplier#000000001,"sdrGnXCDRcfriBvY0KL,ipCanOTyK t NN1",17,27-918-335-1736,5755.94, instructions. slyly unusual
2,Supplier#000000002,TRMhVHz3XiFuhapxucPo1,5,15-679-861-2259,4032.68, the pending packages. furiously expres
3,Supplier#000000003,BZ0kXcHUcHjx62L7CjZSql7gbWQ6RPn5X,1,11-383-516-1199,4192.40,ong the fluffily idle packages: bold foxes mold silent package
4,Supplier#000000004,qGTQJXogS83a7MBnEweGHKevK,15,25-843-787-7479,4641.08,al braids affix through the regular
5,Supplier#000000005,lONEYAh9sFWAF75tO,11,21-151-690-3663,-283.84,accounts. instructions ha
6,Supplier#000000006,zaux5FTzToEg,14,24-696-997-4969,1365.79, sleep fluffily against the blithely special dugouts. furiously express th
7,Supplier#000000007, 0W7IPdkpWycUbQ9Adp6B,23,33-990-965-2201,6820.35,ke across the slyly ironic packages. carefully special pinto beans wake blithely. even de
posits los
8,Supplier#000000008,S8AWPqjYlanEQlcDO2WV 97uCVtxCk,17,27-498-742-3860,7627.85,i
{"name":"Data Engineering","icon":"database","settings":"{\"settings\":\"{\\r\\n \\\"python.analysis.autoImportCompletions\\\": true,\\r\\n \\\"python.analysis.fixAll\\\": [\\\"source.unusedImports\\\"], \\r\\n \\\"editor.defaultFormatter\\\": \\\"charliermarsh.ruff\\\",\\r\\n \\\"[python]\\\": {\\r\\n \\\"editor.formatOnType\\\": true,\\r\\n \\\"editor.formatOnSave\\\": true,\\r\\n \\\"editor.codeActionsOnSave\\\": {\\r\\n \\\"source.fixAll\\\": \\\"explicit\\\",\\r\\n \\\"source.organizeImports\\\": \\\"explicit\\\"\\r\\n }\\r\\n },\\r\\n \\\"editor.inlineSuggest.enabled\\\": true,\\r\\n \\\"editor.lineHeight\\\": 17,\\r\\n \\\"breadcrumbs.enabled\\\": false,\\r\\n \\\"notebook.output.scrolling\\\": true,\\r\\n \\\"jupyter.themeMatplotlibPlots\\\": true,\\r\\n \\\"jupyter.widgetScriptSources\\\": [\\r\\n \\\"unpkg.com\\\",\\r\\n \\\"jsdelivr.com\\\"\\r\\n ],\\r\\n \\\"files.exclude\\\": {\\r\\n \\\
{"name":"Data Engineering","icon":"database","settings":"{\"settings\":\"{\\r\\n \\\"python.analysis.autoImportCompletions\\\": true,\\r\\n \\\"python.analysis.fixAll\\\": [\\\"source.unusedImports\\\"], \\r\\n \\\"editor.defaultFormatter\\\": \\\"charliermarsh.ruff\\\",\\r\\n \\\"[python]\\\": {\\r\\n \\\"editor.formatOnType\\\": true,\\r\\n \\\"editor.formatOnSave\\\": true\\r\\n },\\r\\n \\\"editor.inlineSuggest.enabled\\\": true,\\r\\n \\\"editor.lineHeight\\\": 17,\\r\\n \\\"breadcrumbs.enabled\\\": false,\\r\\n \\\"notebook.output.scrolling\\\": true,\\r\\n \\\"jupyter.themeMatplotlibPlots\\\": true,\\r\\n \\\"jupyter.widgetScriptSources\\\": [\\r\\n \\\"unpkg.com\\\",\\r\\n \\\"jsdelivr.com\\\"\\r\\n ],\\r\\n \\\"files.exclude\\\": {\\r\\n \\\"**/__pycache__\\\": true,\\r\\n \\\"**/.cache\\\": true,\\r\\n \\\"**/.coverage\\\": true,\\r\\n \\\"**/.coverage.*\\\": true,\\r\\n \\\"**/.hypothesis\\\": tru
import json
from fasthtml.common import *
import requests
import logging
import sys
app, rt = fast_app(hdrs=(Script(src="https://cdn.plot.ly/plotly-2.32.0.min.js"),))
# Extract function
with orders as (
select
order_id,
customer_id,
order_status,
order_purchase_timestamp::TIMESTAMP AS order_purchase_timestamp,
order_approved_at::TIMESTAMP AS order_approved_at,
order_delivered_carrier_date::TIMESTAMP AS order_delivered_carrier_date,
order_delivered_customer_date::TIMESTAMP AS order_delivered_customer_date,
order_estimated_delivery_date::TIMESTAMP AS order_estimated_delivery_date
-- Create the employees table
CREATE TABLE employees (
employee_id INTEGER PRIMARY KEY,
name TEXT,
position TEXT
);
-- Insert initial data into employees table
INSERT INTO employees (employee_id, name, position) VALUES
(1, 'John Doe', 'Manager'),
import polars as pl
from ydata_profiling import ProfileReport
# Load the Iris dataset using Seaborn
import seaborn as sns
df = sns.load_dataset('iris')
# Convert to Polars DataFrame
df_polars = pl.DataFrame(df)
@josephmachado
josephmachado / et_dependecy_injection.py
Created July 15, 2024 13:58
Dependency injection example
import requests
import logging
import sys
# Extract function
def get_exchange_data(url):
try:
r = requests.get(url)
except requests.ConnectionError as ce:
logging.error(f"There was an error with the request, {ce}")
@josephmachado
josephmachado / fake_coincap_exchange_data.py
Created July 6, 2024 19:24
fake_coincap_exchange_data.py
from faker import Faker
import json
import multiprocessing
fake = Faker()
def generate_fake_entry(_):
return {
"exchangeId": fake.domain_word(),
"name": fake.company(),
@josephmachado
josephmachado / trend_line.sql
Created July 1, 2024 15:18
Trend line calculation in SQL
WITH DailyTotals AS (
SELECT
o_orderdate as orderdate,
SUM(o_totalprice) AS totalprice
FROM orders
GROUP BY o_orderdate
),
DailyTotalsWithLag AS (
SELECT
orderdate,