Skip to content

Instantly share code, notes, and snippets.

@Hosuke
Last active November 15, 2023 14:10
Show Gist options
  • Save Hosuke/cffac46ee8034ce6f65a3729ac421d11 to your computer and use it in GitHub Desktop.
Save Hosuke/cffac46ee8034ce6f65a3729ac421d11 to your computer and use it in GitHub Desktop.
import pandas as pd
import yaml
# 读取CSV文件
df = pd.read_csv('dex_base_trades_seed.csv')
# 初始化schema字典
schema = {
"version": 2,
"seeds": []
}
# 按'project'和'blockchain'字段分组并处理CSV文件和schema
for (project, blockchain), group in df.groupby(['project', 'blockchain']):
# 只取'project'字段中的第一个词
project_name = project.split()[0]
filename = f"{project_name}_{blockchain}_base_trades_seed.csv"
group.to_csv(filename, index=False)
# 添加到schema字典
seed_name = f"{project_name}_{blockchain}_base_trades_seed"
seed_config = {
"name": seed_name,
"config": {
"column_types": {
"blockchain": "varchar",
"project": "varchar",
"version": "varchar",
"tx_hash": "varbinary",
"evt_index": "uint256",
"block_number": "uint256",
"token_bought_address": "varbinary",
"token_sold_address": "varbinary",
"token_bought_amount_raw": "uint256",
"token_sold_amount_raw": "uint256",
"block_date": "timestamp"
}
}
}
schema["seeds"].append(seed_config)
# 将schema字典转换为YAML格式字符串
schema_yaml = yaml.dump(schema, sort_keys=False)
# 打印或保存schema
# print(schema_yaml)
# 可以选择将schema写入文件
with open('schema.yml', 'w') as file:
file.write(schema_yaml)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment