Lakshay lakshay-arora

## define_function_af.py
from collections import Counter

# define the python function
def my_function():
    # get the variable value
    file_path = Variable.get("data_path")
    # open the file
    file_ = open(file_path)
    # read the file and calculate the word count
    data = Counter(file_.read().split())

## airflow_python_operator.py
### importing the required libraries
from datetime import timedelta
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago

## airflow_import.py
from datetime import timedelta

# The DAG object; we'll need this to instantiate a DAG
from airflow import DAG
# Operators; we need this to operate!
from airflow.operators.bash_operator import BashOperator
from airflow.utils.dates import days_ago

## dataframe.py
spark.createDataFrame(
    [
        (1, 'Lakshay'), # create your data here, make sure to be consistent in the types.
        (2, 'Aniruddha'),
        .
        .
        .
        .
        (100, 'Siddhart')
    ],

## rdd_23.py
# parallelizing data collection
my_list = [1, 2, 3, 4, 5]
my_list_rdd = sc.parallelize(my_list)

## 2. Referencing to external data file
file_rdd = sc.textFile("path_of_file")

## collection_1.py
# create weekly demand collection
database.create_collection("weekly_demand")

## 1_ag.py
result_1 = weekly_demand_collection.aggregate([
    ## stage 1
    {
        "$match" : {
            "center_id" : {
                "$eq" : 11
            }
        }
    },
    ## stage 2

## find_one.py
weekly_demand_collection.find_one()

## add_sheet.py
# add a sheet with 20 rows and 2 columns
sheet.add_worksheet(rows=20,cols=2,title='runs')

# get the instance of the second sheet
sheet_runs = sheet.get_worksheet(1)

## generate_html.py
# define function to add the image in the html file with the class name
def get_picture_html(path, tag):
    image_html = """<p> {tag_name} </p> <picture> <img src= "../{path_name}"  height="300" width="400"> </picture>"""
    return image_html.format(tag_name=tag, path_name=path)

# define function to add the list element in the html file
def get_count_html(category, count):
    count_html = """<li> {category_name} : {count_} </li>"""
    return count_html.format(category_name = category, count_ = count)
	from collections import Counter

	# define the python function
	def my_function():
	# get the variable value
	file_path = Variable.get("data_path")
	# open the file
	file_ = open(file_path)
	# read the file and calculate the word count
	data = Counter(file_.read().split())
	### importing the required libraries
	from datetime import timedelta
	from airflow import DAG
	from airflow.operators.python_operator import PythonOperator
	from airflow.utils.dates import days_ago
	from datetime import timedelta

	# The DAG object; we'll need this to instantiate a DAG
	from airflow import DAG
	# Operators; we need this to operate!
	from airflow.operators.bash_operator import BashOperator
	from airflow.utils.dates import days_ago
	spark.createDataFrame(
	[
	(1, 'Lakshay'), # create your data here, make sure to be consistent in the types.
	(2, 'Aniruddha'),
	.
	.
	.
	.
	(100, 'Siddhart')
	],
	# parallelizing data collection
	my_list = [1, 2, 3, 4, 5]
	my_list_rdd = sc.parallelize(my_list)

	## 2. Referencing to external data file
	file_rdd = sc.textFile("path_of_file")
	# create weekly demand collection
	database.create_collection("weekly_demand")
	result_1 = weekly_demand_collection.aggregate([
	## stage 1
	{
	"$match" : {
	"center_id" : {
	"$eq" : 11
	}
	}
	},
	## stage 2
	# add a sheet with 20 rows and 2 columns
	sheet.add_worksheet(rows=20,cols=2,title='runs')

	# get the instance of the second sheet
	sheet_runs = sheet.get_worksheet(1)
	# define function to add the image in the html file with the class name
	def get_picture_html(path, tag):
	image_html = """<p> {tag_name} </p> <picture> <img src= "../{path_name}" height="300" width="400"> </picture>"""
	return image_html.format(tag_name=tag, path_name=path)

	# define function to add the list element in the html file
	def get_count_html(category, count):
	count_html = """<li> {category_name} : {count_} </li>"""
	return count_html.format(category_name = category, count_ = count)