-
-
Save dq-hustlecoding/6132f159a47db4c36f978d2ba886c80a to your computer and use it in GitHub Desktop.
create dataset for AWS Personalize
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_dataset() -> None: | |
personalize = boto3.client('personalize', region_name='your region') | |
# dataset group 의 이름이 중복되면 안되므로 적당히 time 함수를 써서 생성시점에 따라 다른 이름으로 생성되도록 해줍니다. | |
response = personalize.create_dataset_group(name=f'dsg-{int(time.time())}') | |
dsg_arn = response['datasetGroupArn'] | |
description = personalize.describe_dataset_group(datasetGroupArn=dsg_arn)['datasetGroup'] | |
print('1. Name: ' + description['name']) | |
print('1. ARN: ' + description['datasetGroupArn']) | |
print('1. Status: ' + description['status']) | |
# dataset 생성과 코드실행간의 타이밍 오류를 방지하기 위해서 sleep을 사용합니다. | |
time.sleep(5) | |
# schema로 정의한 파일들은 이 코드와 동일한 폴더에 위치해야합니다. | |
# 14~25 line 을 통해 personalize 내부에 schema 를 생성해줍니다. | |
with open('event_schema.json') as f: | |
createSchemaResponse = personalize.create_schema( name=f'event_schema_{int(time.time())}', schema=f.read() ) | |
event_schema_arn = createSchemaResponse['schemaArn'] | |
print("1-1. EVENT SCHEMA :: ", event_schema_arn) | |
with open('user_schema.json') as f: | |
createSchemaResponse = personalize.create_schema( name=f'user_schema_{int(time.time())}', schema=f.read() ) | |
user_schema_arn = createSchemaResponse['schemaArn'] | |
print("1-1. USER SCHEMA :: ", user_schema_arn) | |
with open('item_schema.json') as f: | |
createSchemaResponse = personalize.create_schema( name=f'item_schema_{int(time.time())}', schema=f.read() ) | |
item_schema_arn = createSchemaResponse['schemaArn'] | |
print("1-1. item SCHEMA :: ", item_schema_arn) | |
# Dataset을 생성하는 코드입니다. | |
# Dataset이 생성되는 것에 시간이 오래걸리기 때문에 | |
# 이런식으로 30초에 한번씩 Dataset 생성을 체크하며 | |
# 모든 Dataset이 생성되기 전까지는 다음 코드로의 진행을 막아줍니다. | |
DSG_STATUS = '' | |
while DSG_STATUS != 'ACTIVE': | |
print("wait DSG to created... ::", DSG_STATUS) | |
time.sleep(30) | |
DSG_STATUS = personalize.describe_dataset_group( datasetGroupArn=dsg_arn )['datasetGroup']['status'] | |
e_response = personalize.create_dataset( | |
name=f'event-{int(time.time())}', | |
schemaArn=event_schema_arn, | |
datasetGroupArn=dsg_arn, | |
datasetType='interactions' | |
) | |
print('2. Event Dataset Arn: ' + e_response['datasetArn']) | |
event_ds_arn = e_response['datasetArn'] | |
u_response = personalize.create_dataset( | |
name=f'user-{int(time.time())}', | |
schemaArn=user_schema_arn, | |
datasetGroupArn=dsg_arn, | |
datasetType='users' | |
) | |
print('2. User Dataset Arn: ' + u_response['datasetArn']) | |
user_ds_arn = u_response['datasetArn'] | |
i_response = personalize.create_dataset( | |
name=f'item-{int(time.time())}', | |
schemaArn=item_schema_arn, | |
datasetGroupArn=dsg_arn, | |
datasetType='items' | |
) | |
print('2. Item Dataset Arn: ' + i_response['datasetArn']) | |
item_ds_arn = i_response['datasetArn'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment