Skip to content

Instantly share code, notes, and snippets.

@hasanisaeed
Last active July 4, 2024 19:05
Show Gist options
  • Save hasanisaeed/49bed122ff2fcfcb70d795be71a8e5db to your computer and use it in GitHub Desktop.
Save hasanisaeed/49bed122ff2fcfcb70d795be71a8e5db to your computer and use it in GitHub Desktop.
Sharding Implementation
#!/bin/bash
docker pull postgres:latest
docker run -d \
--name shard1 \
-e POSTGRES_PASSWORD=postgres \
-e POSTGRES_USER=postgres \
-e POSTGRES_DB=postgres \
-p 5435:5432 \
postgres:latest
docker run -d \
--name shard2 \
-e POSTGRES_PASSWORD=postgres \
-e POSTGRES_USER=postgres \
-e POSTGRES_DB=postgres \
-p 5433:5432 \
postgres:latest
docker run -d \
--name shard3 \
-e POSTGRES_PASSWORD=postgres \
-e POSTGRES_USER=postgres \
-e POSTGRES_DB=postgres \
-p 5434:5432 \
postgres:latest
sleep 15
# Initialize the tables in each container
for port in 5435 5433 5434; do
PGPASSWORD=postgres psql -h localhost -p $port -U postgres -d postgres -c "CREATE TABLE IF NOT EXISTS myTable (url TEXT, url_id TEXT);"
done
echo "PostgreSQL containers started and initialized."
#!/bin/bash
NUM_LINKS=1000
BASE_URL="http://127.0.0.1:5000"
COLOR_RED="\033[0;31m" # Red
COLOR_GREEN="\033[0;32m" # Green
COLOR_BLUE="\033[0;34m" # Blue
COLOR_RESET="\033[0m" # Reset color
# Function to determine server name based on URL hash
get_server_name() {
URL_HASH=$(echo -n "$1" | sha256sum | cut -d ' ' -f 1)
SERVER_NUM=$((16#${URL_HASH:0:4} % 3 + 1)) # Hash to determine server (assuming 3 servers)
case $SERVER_NUM in
1) echo -e "${COLOR_RED}Server 5432${COLOR_RESET}" ;;
2) echo -e "${COLOR_GREEN}Server 5433${COLOR_RESET}" ;;
3) echo -e "${COLOR_BLUE}Server 5434${COLOR_RESET}" ;;
esac
}
# Loop to send POST requests with random URLs
for (( i=1; i<=$NUM_LINKS; i++ ))
do
RANDOM_URL="https://example.com/$i"
SERVER_NAME=$(get_server_name "$RANDOM_URL")
echo -e "Sending POST request to: $BASE_URL?url=$RANDOM_URL (Server: $SERVER_NAME)"
curl -X POST "$BASE_URL?url=$RANDOM_URL"
done
echo "Completed sending $NUM_LINKS POST requests."
from flask import Flask, request, jsonify
import psycopg2
import hashlib
import bisect
import base64
app = Flask(__name__)
# Consistent hashing class
class ConsistentHashRing:
def __init__(self, nodes=None, replicas=100):
self.replicas = replicas
self.ring = dict()
self.sorted_keys = []
if nodes:
for node in nodes:
self.add_node(node)
def add_node(self, node):
for i in range(self.replicas):
key = self.gen_key(f'{node}-{i}')
self.ring[key] = node
self.sorted_keys.append(key)
self.sorted_keys.sort()
def remove_node(self, node):
for i in range(self.replicas):
key = self.gen_key(f'{node}-{i}')
del self.ring[key]
self.sorted_keys.remove(key)
def get_node(self, key):
if not self.ring:
return None
pos = self.get_node_pos(key)
return self.ring[self.sorted_keys[pos]]
def get_node_pos(self, key):
if not self.sorted_keys:
return 0
pos = bisect.bisect(self.sorted_keys, self.gen_key(key))
if pos == len(self.sorted_keys):
return 0
else:
return pos
def gen_key(self, key):
m = hashlib.md5()
m.update(key.encode('utf-8'))
return int(m.hexdigest(), 16)
# Create consistent hashing instance and add 3 database ports
hr = ConsistentHashRing(['5435', '5433', '5434'])
# Create postgres clients for 3 shards
clients = {
'5432': psycopg2.connect(
host='172.17.0.2', # docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' shard1
port='5432',
user='postgres',
password='postgres',
database='postgres'
),
'5433': psycopg2.connect(
host='172.17.0.3',
port='5432',
user='postgres',
password='postgres',
database='postgres'
),
'5434': psycopg2.connect(
host='172.17.0.4',
port='5432',
user='postgres',
password='postgres',
database='postgres'
)
}
# Write to sharded database
@app.route('/', methods=['POST'])
def write_url():
url = request.args.get('url')
hash_obj = hashlib.sha256(url.encode())
hash_base64 = base64.b64encode(hash_obj.digest()).decode('utf-8')
url_id = hash_base64[:5]
server = hr.get_node(url_id)
print(f"{hash_base64} -> {url_id} -> S{server}")
conn = clients[server]
cursor = conn.cursor()
cursor.execute("INSERT INTO myTable (url, url_id) VALUES (%s, %s)", (url, url_id))
conn.commit()
cursor.close()
return jsonify({
'urlId': url_id,
'url': url,
'server': server
})
# Read from sharded database
@app.route('/<url_id>', methods=['GET'])
def read_url(url_id):
server = hr.get_node(url_id)
print(f">> {url_id} -> {server}")
conn = clients[server]
cursor = conn.cursor()
cursor.execute("SELECT * FROM myTable WHERE url_id = %s", (url_id,))
result = cursor.fetchone()
cursor.close()
if result:
return jsonify({
'urlId': result[1],
'url': result[0],
'server': server
})
else:
return 'Not Found', 404
if __name__ == '__main__':
app.run()
@hasanisaeed
Copy link
Author

 lsof -i :5000

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment