Skip to content

Instantly share code, notes, and snippets.

import json
def read_json(filename='data.json'):
with open(filename,'r') as f:
data = json.loads(f.read())
return data
def write_json(data, filename='data.json'):
with open(filename,'w') as f:
json.dump(data, f, indent=4)
echo "Starting setup"
# install xcode CLI
xcode-select —-install
# install brew
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
# Update homebrew recipes
brew update
import os
import asyncio
import contextlib
from pprint import pprint
from dotenv import load_dotenv
from netsuite import NetSuite, Config, TokenAuth
load_dotenv() # take environment variables from .env.
config = Config(

Problem

Find out a schema of JSON files so it can be used during table creation.

0. Create a VM in Google Cloud

gcloud compute instances create duckdb-vs-spark \
  --project=PROJECT_ID \
  --zone=europe-west4-a \
date url
2020-01-01 github.com
2020-01-02 google.com

Style guide

// bad
var func_dbl=function(n){return n*2}

var my_numbers=[1,2,3];

var d=my_numbers.map(func_dbl);
from kafka import KafkaProducer
from kafka.errors import KafkaError
import logging
logging.basicConfig(level=logging.DEBUG)
producer = KafkaProducer(bootstrap_servers=['localhost:9092'])
topic_name = 'raw_data'
def on_send_success(record_metadata):
# df_raw - input dataset
def parse_and_clean(data_frame: pd.DataFrame) -> pd.DataFrame:
# parse json
df = data_frame.join(data_frame["user_json"].apply(json.loads).apply(pd.Series))
df["user_json"] = df["user_json"].apply(lambda x: x.replace('\n',''))
# explode visits
df2 = pd.DataFrame({
"uid": df.uid.repeat(df.visits.str.len()),
"sites" : np.concatenate(df.visits.values)}
# app/auth/resources.py
# import section
from flask_jwt_extended import get_raw_jwt
from app.auth.utils import add_token_to_db, revoke_token
class UserRegistration(Resource):
def post(self):
#...
# app/auth/utils.py
from flask_jwt_extended import decode_token
from datetime import datetime
from app.models import TokenModel
from app import db
def add_token_to_db(encoded_token):
decoded_token = decode_token(encoded_token)