Skip to content

Instantly share code, notes, and snippets.

@sifnoc
Created February 7, 2024 13:01
Show Gist options
  • Save sifnoc/efe097af179ead123c64c01ec7caddeb to your computer and use it in GitHub Desktop.
Save sifnoc/efe097af179ead123c64c01ec7caddeb to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "a109d639",
"metadata": {},
"source": [
"## Generating a Dummy CSV File for Summa\n",
"\n",
"Note that it would be better to check for duplicates, especially when generating a large dataset.\n",
"\n",
"Also, check that the maximum balance multiplied by the number of users is less than $2^{64}\\$ to prevent overflow cases.\n",
"\n",
"The overflow threshold is 18,446,744,073,709,551,616 (= 1 << 64) on the range chip."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "5cec3828",
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"import string\n",
"\n",
"def generate_random_string(length=8):\n",
" return ''.join(random.choice(string.ascii_letters) for _ in range(length))\n",
"\n",
"def generate_random_balance(max_balance=90000):\n",
" return str(random.randint(1000, max_balance))\n",
"\n",
"def generate_csv_entry(num_asset):\n",
" username = generate_random_string()\n",
" balance = generate_random_balance()\n",
" result = f\"{username},{balance}\"\n",
" \n",
" for i in range(1, num_asset):\n",
" balance = generate_random_balance()\n",
" result += f\",{balance}\"\n",
" \n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "866d989b",
"metadata": {},
"outputs": [],
"source": [
"# Generate Dummy CSV file \n",
"#\n",
"# number of user: 1 << 10\n",
"# number of currencies: 2\n",
"l = 10\n",
"u = 1 << l\n",
"c = 2\n",
"csv_file_name = f\"{c}_entry_2_{l}.csv\"\n",
"\n",
"with open(csv_file_name, \"w\") as f:\n",
" header = \"username,\" + \",\".join([f'balance_ETH{i+1}_ETH' for i in range(c)])\n",
" f.write(f\"{header}\\n\") # Header\n",
" for _ in range(u):\n",
" f.write(generate_csv_entry(c) + \"\\n\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "faf8f3df",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>username</th>\n",
" <th>balance_ETH1_ETH</th>\n",
" <th>balance_ETH2_ETH</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>FNSkKHIq</td>\n",
" <td>54616</td>\n",
" <td>8135</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>oCqQqFdC</td>\n",
" <td>53960</td>\n",
" <td>58498</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>JNSOqfCp</td>\n",
" <td>50719</td>\n",
" <td>10905</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>pXOmRAxh</td>\n",
" <td>5088</td>\n",
" <td>16052</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>sNkUXewl</td>\n",
" <td>84047</td>\n",
" <td>67946</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1019</th>\n",
" <td>ygsALHDO</td>\n",
" <td>22914</td>\n",
" <td>13596</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1020</th>\n",
" <td>BpserFft</td>\n",
" <td>28307</td>\n",
" <td>60563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1021</th>\n",
" <td>ITmPewPT</td>\n",
" <td>4815</td>\n",
" <td>88183</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1022</th>\n",
" <td>gpdURoww</td>\n",
" <td>9244</td>\n",
" <td>38372</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1023</th>\n",
" <td>gLXlSPEV</td>\n",
" <td>52895</td>\n",
" <td>45712</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1024 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" username balance_ETH1_ETH balance_ETH2_ETH\n",
"0 FNSkKHIq 54616 8135\n",
"1 oCqQqFdC 53960 58498\n",
"2 JNSOqfCp 50719 10905\n",
"3 pXOmRAxh 5088 16052\n",
"4 sNkUXewl 84047 67946\n",
"... ... ... ...\n",
"1019 ygsALHDO 22914 13596\n",
"1020 BpserFft 28307 60563\n",
"1021 ITmPewPT 4815 88183\n",
"1022 gpdURoww 9244 38372\n",
"1023 gLXlSPEV 52895 45712\n",
"\n",
"[1024 rows x 3 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Check Dummy Data\n",
"import pandas as pd\n",
"\n",
"df = pd.read_csv(csv_file_name)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a63e0442",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment