Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Alex-CodeLab/3afba265164619cd18086bf97b012bb7 to your computer and use it in GitHub Desktop.
Save Alex-CodeLab/3afba265164619cd18086bf97b012bb7 to your computer and use it in GitHub Desktop.
intersect
import timeit
import random
import numpy as np
import matplotlib.pyplot as plt
import bisect
# Function to calculate intersection using built-in set intersection
def intersection_set(set1, set2):
return set1.intersection(set2)
# Function to calculate intersection using built-in list intersection
def intersection_list(list1, list2):
return list(set(list1) & set(list2))
# Function to calculate intersection using numpy's intersect1d
def intersection_numpy(array1, array2):
return np.intersect1d(array1, array2)
# Function to calculate intersection using numpy's in1d
def intersection_numpy_in1d(array1, array2):
return np.in1d(array1, array2)
def binary_search_intersection(list1, list2):
list1.sort()
common_elements = []
for elem in list2:
if bisect.bisect_left(list1, elem) != len(list1) and list1[bisect.bisect_left(list1, elem)] == elem:
common_elements.append(elem)
return common_elements
# Create test set2 with a fixed size of 40
set2 = set(range(10, 50))
# Create test list2 with a fixed size of 40
list2 = list(range(10, 50))
# Create test numpy array2 with a fixed size of 40
array2 = np.array(list(range(10, 50)))
# Create test set1/list1 sizes ranging from 100 to 1000
set_list_sizes = range(100, 4001, 100)
# Measure time for each set1 size
set_intersection_times = []
list_intersection_times = []
numpy_intersection_times = []
numpy_in1d_times = []
bsearch_times = []
for size in set_list_sizes:
set1 = set(random.sample(range(size*10), size))
list1 = list(random.sample(range(size*10), size))
array1 = np.array(list(random.sample(range(size*10), size)))
set_time = timeit.timeit(lambda: intersection_set(set1, set2), number=4000)
list_time = timeit.timeit(lambda: intersection_list(list1, list2), number=4000)
numpy_time = timeit.timeit(lambda: intersection_numpy(array1, array2), number=4000)
in1d_time = timeit.timeit(lambda: intersection_numpy_in1d(array1, array2), number=4000)
bsearch_time = timeit.timeit(lambda: binary_search_intersection(list1, list2), number=4000)
set_intersection_times.append(set_time)
list_intersection_times.append(list_time)
numpy_intersection_times.append(numpy_time)
numpy_in1d_times.append(in1d_time)
bsearch_times.append(bsearch_time)
# Plot results
plt.plot(set_list_sizes, set_intersection_times, label='Set Intersection')
plt.plot(set_list_sizes, list_intersection_times, label='List Intersection')
plt.plot(set_list_sizes, numpy_intersection_times, label='Numpy Intersection')
plt.plot(set_list_sizes, numpy_in1d_times, label='Numpy in1d')
plt.plot(set_list_sizes, bsearch_times, label='Bisect')
plt.xlabel('Set/List 1 Size')
plt.ylabel('Time (seconds)')
plt.title('Set/List Intersection Performance (Set/List 2 Size = 40)')
plt.legend()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment