Created
December 11, 2013 12:45
-
-
Save ClickerMonkey/7909809 to your computer and use it in GitHub Desktop.
Set operations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class SetOperations | |
{ | |
//=========================================================================== | |
// A set is an immutable group of unique numbers ordered from smallest to | |
// largest. The values in the set must be ordered for all set operations to | |
// function properly, so don't manually change/re-order these! | |
//=========================================================================== | |
class Set { | |
// The ordered set of numbers. | |
final int[] set; | |
// The count of numbers in this set. | |
int n; | |
// Initializes a new set of numbers. The numbers passed in will be ordered | |
// and any duplicates will automatically be removed. | |
Set(int ... set) { | |
// Sort the given set | |
Arrays.sort(this.set = set); | |
n = set.length; | |
// Remove all duplicated numbers | |
if (n > 1) { | |
n = removeDuplicates(); | |
} | |
} | |
// Removes all duplicates from the set and returns how many unique | |
// numbers in the set exist. | |
int removeDuplicates() { | |
int j = 0, last = n - 1; | |
for (int i = 0; i < n; i++) { | |
// This is a unique number, keep it. | |
set[j++] = set[i]; | |
// If the next is a duplicate then skip it. | |
while ((i < last) && set[i] == set[i + 1]) | |
i++; | |
} | |
return j; | |
} | |
// Returns the number of possible subsets this set contains. If the size | |
// of this set is greater then 63 then the value returned is inaccurate. | |
long subsetCount() { | |
return (1L << n); | |
} | |
// Returns true if this set contains the given integer. | |
boolean contains(int x) { | |
// Perform a binary search for x | |
int min = 0, max = n - 1, mid; | |
while (min <= max) { | |
mid = ((max + min) >> 1); | |
if (set[mid] == x) | |
return true; | |
if (set[mid] < x) | |
min = mid + 1; | |
else | |
max = mid - 1; | |
} | |
return false; | |
} | |
// Prints the string representation of this Set. | |
void print() { | |
if (n == 0) { | |
System.out.println("{ }"); | |
} else { | |
System.out.print("{ " + set[0]); | |
for (int i = 1; i < n; i++) | |
System.out.print(" " + set[i]); | |
System.out.println(" }"); | |
} | |
} | |
} | |
//=========================================================================== | |
// Builds a set by providing number appending to the set. | |
//=========================================================================== | |
class SetBuilder { | |
// The set of numbers being built. | |
int[] set; | |
// The count of numbers currently in the set. | |
int n; | |
// Initializes a new empty Set Builder. | |
SetBuilder() { | |
this(16); | |
} | |
SetBuilder(int n) { | |
set = new int[Math.max(n, 2)]; | |
} | |
// Adds the given number to the set. | |
void add(int x) { | |
ensureCapacity(); | |
set[n++] = x; | |
} | |
// Clears the set of all numbers. | |
void clear() { | |
n = 0; | |
} | |
// Ensures the set has space to add one more number. | |
void ensureCapacity() { | |
if (n == set.length) { | |
set = Arrays.copyOf(set, n + (n >> 1)); // Resize by 150% | |
} | |
} | |
// Returns the count of numbers in the set. | |
int size() { | |
return n; | |
} | |
// Returns the builders created Set. | |
Set getSet() { | |
return new Set(Arrays.copyOf(set, n)); | |
} | |
} | |
//=========================================================================== | |
// A generator of subsets for a given parent Set. The sizes of the subsets | |
// generated can be specifed to be within a given bounds inclusively. | |
//=========================================================================== | |
class Subset extends Set { | |
Set parent; // The parent set of this subset. | |
int subsetSize; // The size of the current sub set | |
int subsetMin; // The minimum size allowed for a subset | |
int subsetMax; // The maximum size allowed for a subset | |
boolean[] subsetMask; // The mask used to generate subsets | |
boolean rolled; | |
// Initializes a subset given its parent and the min and max size of | |
// subsets to generate. | |
Subset(Set parent, int min, int max) { | |
// Make this subset a copy of the parent set | |
super(Arrays.copyOf(parent.set, parent.n)); | |
this.parent = parent; | |
this.subsetMin = min; | |
this.subsetMax = max; | |
this.subsetSize = n; | |
this.subsetMask = new boolean[n]; | |
this.findNextValidSubset(); // Find the first valid subset | |
} | |
// Increments this subset to the next subset in the parent set. | |
void next() { | |
updateSubset(); // Set this subset based on the mask | |
rollMask(); // The actual 'next' operation | |
findNextValidSubset(); // Get the next subset with the desired size. | |
} | |
// Returns true if this subset can generate another unique subset of the parent. | |
boolean hasNext() { | |
return (subsetSize != 0); | |
} | |
//======================================================================== | |
// Private Methods | |
//======================================================================== | |
// Updates the values and size of this subset based on the current mask | |
void updateSubset() { | |
// Restrict the count of numbers in this subset by its current size. | |
n = subsetSize; | |
// For each marked number in the parent mask add it to this subset. | |
for (int i = 0, j = 0; i < parent.n; i++) { | |
if (!subsetMask[i]) { | |
set[j++] = parent.set[i]; | |
} | |
} | |
} | |
// Rolls the mask until a valid subset is met (within the subset size bounds) | |
void findNextValidSubset() { | |
// While this subset is not within the subset size bounds... | |
while (hasNext() && (subsetSize < subsetMin || subsetSize > subsetMax)) { | |
rollMask(); | |
} | |
} | |
// Rolls the subset mask to the next one and updates the subsetSize. | |
void rollMask() { | |
int i = set.length - 1; | |
while (i >= 0 && subsetMask[i]) { | |
subsetMask[i--] = false; | |
subsetSize++; | |
} | |
if (i >= 0) { | |
subsetMask[i] = true; | |
subsetSize--; | |
} | |
rolled = true; | |
} | |
} | |
//=========================================================================== | |
// Performs grouping on any number of sets. This performs in O(M*N) where M | |
// is the number of sets and N is the size of the largest set. | |
// Sets used in Examples: | |
// A = {1,2,3,4,8,9,10} | |
// B = {1,3,4,5,6,9,13} | |
// C = {1,2,3,5,7,8,11} | |
// D = {1,2,4,5,6,7,12} | |
// | |
// ===Inclusive grouping=== | |
// Returns a set which contains all numbers that are contained in atleast n | |
// sets. If n is 1 then this performs a union of the given sets, if n is the | |
// number of sets then this does a strict intersection of all the sets. | |
// For Example: | |
// group(true, 4, A, B, C, D) = {1} | |
// group(true, 3, A, B, C, D) = {1,2,3,4,5} | |
// group(true, 2, A, B, C, D) = {1,2,3,4,5,6,7,8,9} | |
// group(true, 1, A, B, C, D) = {1,2,3,4,5,6,7,8,9,10,11,12,13} | |
// group(true, 2, A, B) = {1,3,4,9} | |
// | |
// ===Exclusive grouping=== | |
// Returns a set which contains all numbers that are not in anymore then n | |
// sets. If n is 1 this returns an empty set, if n is 2 then this performs a | |
// complement of the given sets, if n is the number of sets then this does | |
// not include the strict intersection. | |
// For Example: | |
// group(false, 4, A, B, C, D) = {2,3,4,5,6,7,8,9,10,11,12,13} | |
// group(false, 3, A, B, C, D) = {6,7,8,9,10,11,12,13} | |
// group(false, 2, A, B, C, D) = {10,11,12,13} | |
// group(false, 1, A, B, C, D) = {} | |
// group(false, 2, A, B) = {1,3,4,9} | |
//=========================================================================== | |
Set group(boolean inclusive, int n, Set ... sets) { | |
/* | |
* Grouping is a generic way to combine sets in certain ways. | |
* The basic algorith for grouping is: | |
* 1 While all sets are not empty | |
* 2 Find the highest remaining number in the sets | |
* 3 Count how many sets contain this high number | |
* 4 Remove this number from every set that contains it. | |
* 5 Based on the count, inclusiveness, and n determine if this number | |
* should be added to the result | |
*/ | |
// Remove empty/null sets | |
int count = removeEmptySets(sets); | |
// A copy of each sets numbers. | |
int[][] values = new int[count][]; | |
// The pointer to j'th set where the current number is. | |
int[] index = new int[count]; | |
// The maximum possible size of the grouped set | |
int maxSize = 0; | |
// The number of sets that have not been completely searched through. | |
int fullSets = count; | |
// Pre compute and store values for quick retrieval. | |
for (int i = 0; i < count; i++) { | |
values[i] = sets[i].set; | |
index[i] = sets[i].n - 1; | |
maxSize += sets[i].n; | |
} | |
// With the initial size being maxSize the builder will never have to | |
// resize its internal array | |
SetBuilder result = new SetBuilder(maxSize); | |
// While not all sets have been exhausted... | |
while (fullSets > 0) { | |
//===================================================================== | |
// Find maximum number in the sets (only looking at the last number in the sets) | |
int max = 0; // The index of the set with the minimum first number. | |
int maxIndex = -1; // The index of the maximum number in the set. | |
for (int j = 0; j < count; j++) { | |
// If the next number in the j'th set is less then the next number | |
// in the min'th set then reset the min set. | |
if (index[j] >= 0 && (maxIndex == -1 || values[j][index[j]] > values[max][maxIndex])) { | |
maxIndex = index[max = j]; | |
} | |
} | |
//===================================================================== | |
// Count how many sets have max, for every set that has the max | |
// decrement their positions. | |
int total = 0; | |
for (int j = 0; j < count; j++) { | |
// If the next number in the j'th set is equal to the min... | |
if (index[j] >= 0 && values[j][index[j]] == values[max][maxIndex]) { | |
// Increment matches | |
total++; | |
// Change the index | |
index[j]--; | |
// If this set has been exhausted then decrement full sets | |
if (index[j] < 0) { | |
fullSets--; | |
} | |
} | |
} | |
//===================================================================== | |
// If its inclusive use the >= n rule | |
// If its not inclusive use the < n rule | |
if ((inclusive && total >= n) || (!inclusive && total < n)) { | |
result.add(values[max][maxIndex]); | |
} | |
} | |
return result.getSet(); | |
} | |
// Removes all null or empty sets from the given array and returns how many | |
// non-empty sets exist in the array starting at index 0. | |
int removeEmptySets(Set[] sets) { | |
// The initial count of full sets. | |
int count = sets.length; | |
int k = 0; | |
while (k < count) { | |
// If it's null or empty then copy over it. | |
if (sets[k] == null || sets[k].n == 0) { | |
sets[k] = sets[--count]; | |
} else { | |
k++; | |
} | |
} | |
return count; | |
} | |
// Returns the union of the given sets. | |
// union({1,2,3}, {2,3,4}) = {1,2,3,4} | |
Set union(Set ... sets) { | |
return group(true, 1, sets); | |
} | |
// Returns the intersection of the given sets. | |
// intersect({1,2,3}, {2,3,4}) = {2,3} | |
Set intersect(Set ... sets) { | |
return group(true, sets.length, sets); | |
} | |
// Returns the symmetric difference of the given sets. | |
// difference({1,2,3}, {2,3,4}) = {1,4} | |
Set difference(Set ... sets) { | |
return group(false, sets.length, sets); | |
} | |
// Returns the relative complement (set difference) of the two sets. | |
// complement({1,2,3}, {2,3,4}) = {1} | |
// complement({2,3,4}, {1,2,3}) = {4} | |
Set complement(Set a, Set b) { | |
return difference(union(a, b), b); | |
} | |
// Returns whether b is a subset of a. (all elements in b exist in a) | |
// isSubset({1,2,3,4,5}, {2,5}) = true | |
// isSubset({1,2,3,4,5}, {2,6}) = false | |
boolean isSubset(Set a, Set b) { | |
for (int i = 0; i < b.n; i++) { | |
if (!a.contains(b.set[i])) | |
return false; | |
} | |
return true; | |
} | |
// Returns the number of elements that are different between the two sets | |
int findDifference(Set a, Set b) { | |
int[] s1 = a.set; | |
int[] s2 = b.set; | |
int n1 = a.n - 1; | |
int n2 = b.n - 1; | |
int diff = 0; | |
while (n1 >= 0 && n2 >= 0) { | |
int d = s1[n1] - s2[n2]; | |
if (d != 0) diff++; | |
if (d > 0) n2++; | |
if (d < 0) n1++; | |
n1--; | |
n2--; | |
} | |
return diff + (n1 + n2 + 2); | |
} | |
// Given an array of Sets this removes all subsets in the array and returns | |
// the number of unique sets that exist (n) in the array [0,n-1]. | |
int removeSubsets(Set[] sets) { | |
// Mark all sets that are a subset of another cell | |
boolean[] isSubset = new boolean[sets.length]; | |
for (int i = 0; i < sets.length; i++) { | |
for (int j = i + 1; j < sets.length; j++) { | |
if (!isSubset[j] && isSubset(sets[i], sets[j])) { | |
isSubset[j] = true; | |
} | |
} | |
} | |
// Remove all marked sets | |
int unique = 0; | |
for (int i = 0; i < sets.length; i++) { | |
// This is a unique set, keep it. | |
sets[unique++] = sets[i]; | |
// Skip all following 'subset' sets. | |
while ((i < sets.length - 1) && isSubset[i + 1]) | |
i++; | |
} | |
return unique; | |
} | |
// Sorts from largest to smallest. | |
void sortBySize(Set[] sets) { | |
Arrays.sort(sets, new Comparator<Set>() { | |
public int compare(Set o1, Set o2) { | |
return (o2.n - o1.n); | |
} | |
}); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment