Created
December 21, 2021 00:44
-
-
Save 1ambda/0f7e7163277c2cb9cc89f9797068f6cc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import Row | |
missing_days = 10 | |
# Spark 의 Row 는 read-only 입니다. 따라서 Python 에서 변경하기 위해 Dict 로 변경 후 다시 Row 로 되돌립니다. | |
# 효율적인 방법이 아니며, 내부 동작의 이해를 돕기 위해 만든 코드입니다. | |
def updateDaysLastLogin(row): | |
parsed = row.asDict() | |
parsed['days_last_login'] = parsed['days_last_login'] + missing_days | |
return Row(**parsed) | |
updated = list(map(updateDaysLastLogin, collected)) | |
# updated[0] 의 출력 결과 | |
Row(id=7196, year_birth=1950, education='PhD', count_kid=1, count_teen=1, date_customer='08-02-2014', days_last_login=30, date_joined=datetime.date(2020, 2, 8)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment