Skip to content

Instantly share code, notes, and snippets.

View Mageswaran1989's full-sized avatar

Mageswaran Mageswaran1989

  • Hyderabad, India
View GitHub Profile
# https://dzone.com/articles/execute-spark-applications-with-apache-livy
# https://www.statworx.com/at/blog/access-your-spark-cluster-from-everywhere-with-apache-livy/
# https://livy.incubator.apache.org/docs/latest/rest-api.html
import time
from typing import Optional
import requests
import argparse
import json
from datasets import load_metric
import datasets
from transformers import AutoTokenizer
metric = load_metric("seqeval")
logger = datasets.logging.get_logger(__name__)
class HFTokenizer(object):
NAME = "HFTokenizer"
@Mageswaran1989
Mageswaran1989 / sroie2019_dataset.py
Created October 28, 2021 05:01
sroie2019_dataset.py
class SROIE2019(datasets.GeneratorBasedBuilder):
"""SROIE2019 dataset."""
BUILDER_CONFIGS = [
SROIE2019Config(name="SROIE2019", version=datasets.Version("1.0.0"), description="SROIE2019 dataset"),
]
def __init__(self,
*args,
cache_dir,
# https://twitter.com/karpathy/status/1333217287155847169/photo/1
"""
Say where you live, 1 in 1,000 actively have covid-19.
You feel fatigued and have a slight sore throat, so you take a test, get a positive result.
You learn the test has a 1% false positives, and 10% false negatives.
What's your best guess for your chances of having covid-19?
"""
import pandas as pd
import gin
from sklearn.base import BaseEstimator, TransformerMixin
import nltk
from snorkel.labeling import labeling_function
from snorkel.labeling import LFApplier
from snorkel.labeling import LFAnalysis
from snorkel.labeling import LabelModel
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
#!/usr/bin/env python
__author__ = "Mageswaran Dhandapani"
__copyright__ = "Copyright 2020, The Spark Structured Playground Project"
__credits__ = []
__license__ = "Apache License"
__version__ = "2.0"
__maintainer__ = "Mageswaran Dhandapani"
__email__ = "mageswaran1989@gmail.com"
__status__ = "Education Purpose"
This file has been truncated, but you can view the full file.
<!DOCTYPE html>
<html>
<head><meta charset="utf-8">
<title>SparkOptimization</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.1.10/require.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.3/jquery.min.js"></script>
SCHEDULE_INTERVAL = "0 * * * *"
cluster_ids = None
fernet = get_fernet()
default_args = {
"depends_on_past": False,
"start_date": airflow.utils.dates.days_ago(1),
"retries": 1,
"retry_delay": datetime.timedelta(hours=1),
}
from airflow.models import BaseOperator
class EMRMonitorOperator(BaseOperator):
@apply_defaults
def __init__(self, job_flow_id, job_flow_name, *args, **kwargs):
super(EMRMonitorOperator, self).__init__(*args, **kwargs)
self._emr = EMRMonitor(job_flow_id=job_flow_id, job_flow_name=job_flow_name)
self._job_flow_id = job_flow_id
self._job_flow_name = job_flow_name