This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql.types import * | |
# Auxiliar functions | |
# Pandas Types -> Sparks Types | |
def equivalent_type(f): | |
if f == 'datetime64[ns]': return DateType() | |
elif f == 'int64': return LongType() | |
elif f == 'int32': return IntegerType() | |
elif f == 'float64': return FloatType() | |
else: return StringType() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
# text = "april 23 january 11 2020" | |
text = "enero 01 diciembre 31 2020" | |
def multi_date_text(text): | |
month_dict = {"enero":"january","febrero":"february","marzo":"march","abril":"april", | |
"mayo":"may","junio":"june","julio":"july","agosto":"august", | |
"septiembre":"september","octubre":"october","noviembre":"november","diciembre":"december"} | |
text = [month_dict[i] if i.lower() in month_dict.keys() else i for i in text.split(" ") ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import json | |
f_path = "ContactsPage.java" | |
with open(f_path,'r') as f: | |
content = f.read() | |
#def flattern_json(d): | |
# if len(d) == 0: | |
# return {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import json | |
f_path = "HomePage.java" | |
with open(f_path,'r') as f: | |
content = f.read() | |
def flattern_json(d): | |
if len(d) == 0: | |
return {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import javalang as jl | |
tree = jl.parse.parse(content) | |
def json_ast_encoder(o): | |
if type(o) is set and len(o) == 0: | |
return [] | |
if hasattr(o, "__dict__"): | |
return o.__dict__ | |
return "" | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
Coalesce( | |
try(date_parse(multi_date_format, '%Y-%m-%d %H:%i:%s')), | |
try(date_parse(multi_date_format, '%Y/%m/%d %H:%i:%s')), | |
try(date_parse(multi_date_format, '%Y/%m/%d')), | |
try(date_parse(multi_date_format, '%d %M %Y')), | |
try(date_parse(multi_date_format, '%d %M %Y %H:%i:%s')), | |
try(date_parse(multi_date_format, '%d/%m/%Y %H:%i:%s')), | |
try(date_parse(multi_date_format, '%d-%m-%Y %H:%i:%s')) | |
) as DateConvertedToTimestamp, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT * | |
FROM | |
( | |
SELECT '2021-01-15 13:01:01' AS multi_date_format | |
UNION ALL | |
SELECT '2021/01/15 13:01:02' | |
UNION ALL | |
SELECT '2021/01/03' | |
UNION ALL | |
SELECT '04 JAN 2021' |
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
multi_date_format | |
07/01/2020 13:01 | |
03/01/2020 | |
02/01/2020 13:01 | |
01/01/2020 13:01 | |
05/01/2020 13:01 | |
04-Jan-20 | |
06/01/2020 13:01 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
select date_parse('2021-12-31 00:00:00','%Y-%m-%d %H:%i:%s') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import time | |
import numpy as np | |
#http://eforexcel.com/wp/wp-content/uploads/2020/09/5m-Sales-Records.zip | |
df = pd.read_csv("5m Sales Records.csv") | |
def filter1(df): | |
start_time = time.time() | |
for i in df.Country.unique(): |
NewerOlder