Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save fabrizioc1/7e58294f5969350e7960f5a608b275a7 to your computer and use it in GitHub Desktop.
Save fabrizioc1/7e58294f5969350e7960f5a608b275a7 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import random\n",
"import numpy as np\n",
"\n",
"SEEDS_COUNT = 20\n",
"SAMPLES_COUNT = 300\n",
"FEATURES_COUNT = 10\n",
"\n",
"BANDS_COUNT = 4\n",
"HASH_FUNCTIONS_COUNT = 48\n",
"\n",
"P_VALUE = 15307\n",
"R_VALUE = HASH_FUNCTIONS_COUNT / BANDS_COUNT\n",
"\n",
"def generate_features(feature_count, sample_count):\n",
" return [[int(round(random.random())) for i in range(feature_count)] for i in range(sample_count)]\n",
"\n",
"def generate_hash_function_seeds(p_value, hash_function_count):\n",
" return np.random.randint(0, p_value + 1, size=(hash_function_count,2))\n",
"\n",
"hash_function_seeds = generate_hash_function_seeds(P_VALUE, HASH_FUNCTIONS_COUNT)\n",
"\n",
"with open('/tmp/hash_function_seeds.json', 'w') as file:\n",
" hash_function_seeds_dict = dict(p_value=P_VALUE, seeds=hash_function_seeds.tolist())\n",
" file.write(json.dumps(hash_function_seeds_dict))\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"with open('/tmp/hash_function_seeds.json') as file: \n",
" hash_function_seeds_dict_read = json.load(file)\n",
"\n",
"assert np.array_equal(np.array(hash_function_seeds_dict_read['seeds']), hash_function_seeds)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"features = generate_features(FEATURES_COUNT, SAMPLES_COUNT)\n",
"\n",
"with open('/tmp/features.json', 'w') as file:\n",
" file.write(json.dumps(features))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"with open('/tmp/features.json') as file: \n",
" features_read = json.load(file)\n",
"\n",
"assert features_read == features"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"SEEDS_COUNT = 20\n",
"\n",
"seeds = generate_features(FEATURES_COUNT, SEEDS_COUNT)\n",
"\n",
"with open('/tmp/seeds.json', 'w') as file:\n",
" file.write(json.dumps(seeds))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment