Skip to content

Instantly share code, notes, and snippets.

View lakshay-arora's full-sized avatar
🇮🇳

Lakshay lakshay-arora

🇮🇳
  • Walmart
  • Bengaluru
View GitHub Profile
from collections import Counter
# define the python function
def my_function():
# get the variable value
file_path = Variable.get("data_path")
# open the file
file_ = open(file_path)
# read the file and calculate the word count
data = Counter(file_.read().split())
### importing the required libraries
from datetime import timedelta
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago
from datetime import timedelta
# The DAG object; we'll need this to instantiate a DAG
from airflow import DAG
# Operators; we need this to operate!
from airflow.operators.bash_operator import BashOperator
from airflow.utils.dates import days_ago
spark.createDataFrame(
[
(1, 'Lakshay'), # create your data here, make sure to be consistent in the types.
(2, 'Aniruddha'),
.
.
.
.
(100, 'Siddhart')
],
# parallelizing data collection
my_list = [1, 2, 3, 4, 5]
my_list_rdd = sc.parallelize(my_list)
## 2. Referencing to external data file
file_rdd = sc.textFile("path_of_file")
# create weekly demand collection
database.create_collection("weekly_demand")
result_1 = weekly_demand_collection.aggregate([
## stage 1
{
"$match" : {
"center_id" : {
"$eq" : 11
}
}
},
## stage 2
weekly_demand_collection.find_one()
# add a sheet with 20 rows and 2 columns
sheet.add_worksheet(rows=20,cols=2,title='runs')
# get the instance of the second sheet
sheet_runs = sheet.get_worksheet(1)
# define function to add the image in the html file with the class name
def get_picture_html(path, tag):
image_html = """<p> {tag_name} </p> <picture> <img src= "../{path_name}" height="300" width="400"> </picture>"""
return image_html.format(tag_name=tag, path_name=path)
# define function to add the list element in the html file
def get_count_html(category, count):
count_html = """<li> {category_name} : {count_} </li>"""
return count_html.format(category_name = category, count_ = count)