Yvan Scher yvan

## pandaspyspark-datetime.py
# pandas
df[col5] = pd.to_datetime(df[col5], errors='coerce')

# pyspark
data_regex = r"\d{2,4}(\.|\-|\/|\\)+\d{2,4}(\.|\-|\/|\\)+\d{2,4}(\s)*(\d{2}\:\d{2}\:\d{2})?(\.\d{3})?|^$"
df = df.withColumn(col5, F.when(F.regexp_replace(F.col(col5), data_regex, '').isNotNull(),\
                                F.to_timestamp(F.col(col5), 'yyyy/MM/dd')).otherwise(None))

## pandas-pyspark-udf.py
# pandas
df[col4] = df[col4].apply(lambda m: None if m in [None, float('nan'), np.nan, math.nan] else int(float(m)))


# pyspark
def floatint(x):
    return int(float(x))
int_udf = F.udf(lambda m: None if m is None else floatint(m))
df = df.withColumn(col4, F.when(F.col(col4).isNotNull(), int_udf(F.col(col4))).otherwise(None))

## pandas-pyspark-regex-clean.py
# pandas
df[col3] = df[col3].replace(regex=r"[^0-9\\.]", value="")
df[col3] = pd.to_numeric(df[col3], errors='coerce')

# pyspark
df = df.withColumn(col3, F.regexp_replace(F.col(col3), r"[^0-9\\.]", '').cast("double"))

## pandas-pyspark-all-val.py
# pandas
row_ct = df.shape[0]
num_ct = pd.to_numeric(df[col3], errors='coerce').count() # coerce makes nan, count drops nan
# another check using regex
num_regex = r"^((-)?[0-9]+)(,[0-9]+)*(\.[0-9]+)?$|(^$)"
all_are_nums = all(df[col3].fillna('').astype(str).apply(lambda x: re.match(num_regex, x)))

if (num_ct == row_ct) or all_are_nums:
    df[col3] = pd.to_numeric(df[col3], errors='coerce')

## pandas-pyspark-conditionalt.py
# pandas
df[col2] = np.where(df[col2] == 1, True, False)

# pyspark
df = df.withColumn(col2, F.when(F.col(col2) == 1, True).otherwise(False))

## pandas-pyspark-simple.py
# in pandas
df[col1] = df[col1]*5

# in spark
df = df.withColumn(col1, F.col(col1)*5)

## basic_iterator26122019_usage.janet
(loop [val :generate (enumerate-iter buf)]
  (print val))

## basic_iterator26122019_2.janet
(defn enumerate-iter
  [iter]
  (var i -1)
  (generate [val :in iter]
    (++ i)
    val))

## basic_iterator26122019.py
iterable = ["a" "some" "data"]

for i,value in enumerate(iterable):
  print("this is the index", i)
  print("this is the vale", value)

## microblog0-asm0
section .text
global _start

section .data
hash db '1234'

_start:
mov edx,4
mov ecx,hash
mov ebx,1
	# pandas
	df[col5] = pd.to_datetime(df[col5], errors='coerce')

	# pyspark
	data_regex = r"\d{2,4}(\.\|\-\|\/\|\\)+\d{2,4}(\.\|\-\|\/\|\\)+\d{2,4}(\s)*(\d{2}\:\d{2}\:\d{2})?(\.\d{3})?\|^$"
	df = df.withColumn(col5, F.when(F.regexp_replace(F.col(col5), data_regex, '').isNotNull(),\
	F.to_timestamp(F.col(col5), 'yyyy/MM/dd')).otherwise(None))
	# pandas
	df[col4] = df[col4].apply(lambda m: None if m in [None, float('nan'), np.nan, math.nan] else int(float(m)))


	# pyspark
	def floatint(x):
	return int(float(x))
	int_udf = F.udf(lambda m: None if m is None else floatint(m))
	df = df.withColumn(col4, F.when(F.col(col4).isNotNull(), int_udf(F.col(col4))).otherwise(None))
	# pandas
	df[col3] = df[col3].replace(regex=r"[^0-9\\.]", value="")
	df[col3] = pd.to_numeric(df[col3], errors='coerce')

	# pyspark
	df = df.withColumn(col3, F.regexp_replace(F.col(col3), r"[^0-9\\.]", '').cast("double"))
	# pandas
	row_ct = df.shape[0]
	num_ct = pd.to_numeric(df[col3], errors='coerce').count() # coerce makes nan, count drops nan
	# another check using regex
	num_regex = r"^((-)?[0-9]+)(,[0-9]+)*(\.[0-9]+)?$\|(^$)"
	all_are_nums = all(df[col3].fillna('').astype(str).apply(lambda x: re.match(num_regex, x)))

	if (num_ct == row_ct) or all_are_nums:
	df[col3] = pd.to_numeric(df[col3], errors='coerce')
	# pandas
	df[col2] = np.where(df[col2] == 1, True, False)

	# pyspark
	df = df.withColumn(col2, F.when(F.col(col2) == 1, True).otherwise(False))
	# in pandas
	df[col1] = df[col1]*5

	# in spark
	df = df.withColumn(col1, F.col(col1)*5)
	(defn enumerate-iter
	[iter]
	(var i -1)
	(generate [val :in iter]
	(++ i)
	val))
	iterable = ["a" "some" "data"]

	for i,value in enumerate(iterable):
	print("this is the index", i)
	print("this is the vale", value)
	section .text
	global _start

	section .data
	hash db '1234'

	_start:
	mov edx,4
	mov ecx,hash
	mov ebx,1