Skip to content

Instantly share code, notes, and snippets.

PRODUCT_CATALOG_PATH = "mnt/PRODGEN2/OUTPUT/RUSSIA_DATA_FOUNDATION/UNIVERSALCATALOG/MASTERDATA/MARS_UNIVERSAL_PETCARE_MATERIALS.csv"
# product_catalog = spark.read.csv(PRODUCT_CATALOG_PATH, header = "True", sep=chr(1), schema = product_catalogSchema)
# Убрал привязку к схеме (если схема меняется... а такое случается, то надо ее обновлять, т.к. перестает загружать новые записи)
product_catalog = spark.read.csv(PRODUCT_CATALOG_PATH, header = "True", sep=chr(1))
product_catalog = (
product_catalog
.withColumn("VMSTD", F.to_date(col("VMSTD"),'yyyy.MM.dd')) # Cahnge date format
.withColumn("START_DATE", F.to_date(col("START_DATE"),'yyyy-MM-dd')) # Cahnge date format
CALENDAR_PATH = "mnt/PRODGEN2/OUTPUT/RUSSIA_DATA_FOUNDATION/UNIVERSALCATALOG/MASTERDATA/MARS_UNIVERSAL_CALENDAR.csv"
calendarSchema = StructType([
StructField("OriginalDate", DateType(), False),
StructField("MarsYear", IntegerType(), False),
StructField("MarsPeriod", IntegerType(), False),
StructField("MarsWeek", IntegerType(), False),
StructField("MarsDay", IntegerType(), False),
StructField("MarsPeriodName", StringType(), False),
StructField("MarsPeriodFullName", StringType(), False),
@kolommik
kolommik / forecasting_metrics.py
Created February 21, 2020 07:58 — forked from Kalki5/forecasting_metrics.py
Python Numpy functions for most common forecasting metrics
import numpy as np
EPSILON = 1e-10
def _error(actual: np.ndarray, predicted: np.ndarray):
""" Simple error """
return actual - predicted
@kolommik
kolommik / gist:e7a534ae39aca9384d2945ddcdcf8f93
Created September 5, 2019 13:43
display Pandas DataFrame in DataBricks as HTML table
def show_pandas_DF(pd_DF):
displayHTML(pd_DF.to_html())
show_pandas_DF( DATAFRAME.head(3) )
@kolommik
kolommik / connect_and_load.py
Created August 2, 2018 20:07 — forked from hunterowens/connect_and_load.py
Pandas and MSSQL
import pymssql
import pandas as pd
## instance a python db connection object- same form as psycopg2/python-mysql drivers also
conn = pymssql.connect(server="172.0.0.1", user="howens",password="some_fake_password", port=63642) # You can lookup the port number inside SQL server.
## Hey Look, college data
stmt = "SELECT * FROM AlumniMirror..someTable"
# Excute Query here
df = pd.read_sql(stmt,conn)