Skip to content

Instantly share code, notes, and snippets.

View nehiljain's full-sized avatar

Nehil Jain nehiljain

View GitHub Profile
@nehiljain
nehiljain / parse_screenshot_openai.py
Last active January 28, 2024 21:46
Google Hotel Search Scraper V1
import instructor
import base64
import logging
import os
from openai import AsyncOpenAI, OpenAI
import asyncio
from pydantic import Field, BaseModel
from typing import List
from dotenv import find_dotenv, load_dotenv
#! /usr/bin/env sh
DIR=$(dirname "$0")
cd "$DIR"
echo $DIR
[ -d temp-dco ] && rm -rf temp-dco
git clone https://github.com/nehiljain/data-cost-optimizer.git temp-dco
cd "$DIR/temp-dco"
@nehiljain
nehiljain / outline.md
Created April 4, 2020 21:01
Outline for Airflow Summit CFP
@nehiljain
nehiljain / snowpipe_alembic_utils.py
Created November 19, 2019 12:18
Snowpipe Alembic Utils
'''This is the extension on the alembic API to facilitate the custom autogenerate script'''
import json
import logging
import os
import boto3
from alembic.autogenerate import comparators, renderers
from alembic.operations import MigrateOperation, Operations
from alembic.operations.ops import CreateTableOp, ModifyTableOps
@nehiljain
nehiljain / README-Template.md
Created July 28, 2019 17:09 — forked from PurpleBooth/README-Template.md
A template to make good README.md

Project Title

One Paragraph of project description goes here

Getting Started

These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system.

Prerequisites

@nehiljain
nehiljain / fluentd.config
Created July 15, 2019 11:50
Setup Fluentd on Elasticbeanstalk 2019
files:
"/etc/td-agent/td-agent.conf":
owner: root
group: root
content: |
<source>
@type tail
path /var/log/eb-docker/containers/log.file
exclude_path ["/var/log/eb-docker/containers/exception.file"]
pos_file /var/log/td-agent/fluentd.log.pos
@nehiljain
nehiljain / main.tf
Last active July 15, 2019 11:35
Terraform to create kinesis firehose to s3
provider "aws" {
region = "us-east-1"
}
resource "aws_s3_bucket" "pipeline_bucket" {
bucket = "pipeline-logs"
acl = "private"
force_destroy = false
}
@nehiljain
nehiljain / s3sensor-example-airflow-part-2.py
Created June 14, 2018 14:06
Code Sample for Airflow II blog
from datetime import datetime, timedelta
from airflow.models import DAG
from airflow.operators.s3_key_sensor import S3KeySensor
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago
schedule = timedelta(minutes=5)
args = {
 'owner': 'airflow',
 'start_date': days_ago(1),
 'depends_on_past': False,
# suppose my data file name has the following format "datatfile_YYYY_MM_DD.csv"; this file arrives in S3 every day.
file_suffix = "{{ execution_date.strftime('%Y-%m-%d') }}"
bucket_key_template = 's3://[bucket_name]/datatfile_{}.csv'.format(file_suffix)
file_sensor = S3KeySensor(
 task_id='s3_key_sensor_task',
 poke_interval=60 * 30, # (seconds); checking file every half an hour
 timeout=60 * 60 * 12, # timeout in 12 hours
 bucket_key=bucket_key_template,
 bucket_name=None,
 wildcard_match=False,
@nehiljain
nehiljain / lesson-airflow-2-block1.py
Created March 24, 2018 18:01
lesson-airflow-2-block1
import sys
import math
print("Python Version {}".format(sys.version_info))
print("Math ceil function is built in: "
"{} and returns {}".format(math.ceil, type(math.ceil(3.6))))
from airflow.models import DAG
print("Math ceil function is overridden "