Skip to content

Instantly share code, notes, and snippets.

@castarco
Last active August 29, 2015 14:21
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save castarco/70798ae1d0266e9af9b5 to your computer and use it in GitHub Desktop.
Save castarco/70798ae1d0266e9af9b5 to your computer and use it in GitHub Desktop.
package prop.g12.common;
import com.google.common.collect.Sets;
import java.util.*;
/**
* This class encapsulates the Clique Percolation algorithm, used to detect overlapped communities inside non-directed
* and not weighted graphs.
*
* This method is slightly modified using suggestions from the article "Extended Clique percolation method to detect
* overlapping community structure" written by Sumana Maity and Santanu Kumar Rath. It can be found in the following
* URL : http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=6968420&pageNumber%3D33268%26rowsPerPage%3D100 .
*
* The mentioned modifications are done in order to ensure that every node in the graph is placed at least in one
* community. We don't apply the third proposed step (merging "similar" communities after adding the left out nodes).
*
* @author Andrés Correa Casablanca
*
* @param <T> The graph's nodes type.
*/
public class CliquePercolation<T> {
/**
* Clique size
*/
private int k;
/**
* Threshold used to "binarize" the weights between nodes
*/
private double threshold;
/**
* @param k {@link CliquePercolation#k}
*/
public CliquePercolation(int k, double threshold) {
if (k < 2) {
throw new IllegalArgumentException("k must be greater than 1.");
}
if (threshold <= 0 || threshold >= 1) {
throw new IllegalArgumentException("threshold must be strictly between 0 and 1.");
}
this.k = k;
this.threshold = threshold;
}
/**
* Implementation of Clique Percolation using an alternative class for graphs.
*/
public Solucio<T> generateSolution(AffinitiesGraph<T> g) {
return generateSolution(g, true);
}
/**
* Implementation of Clique Percolation using an alternative class for graphs.
*/
public Solucio<T> generateSolution(AffinitiesGraph<T> g, boolean processLeftOutNodes) {
long startTime = System.currentTimeMillis();
// This is the complicated part: obtaining the Cliques.
ArrayList<HashSet<T>> kCliques = getKCliquesFromGraph(g);
// Now we are creating a graph where the nodes are the previously found cliques.
AffinitiesGraph<HashSet<T>> kCliqueGraf = new AffinitiesGraph<>(kCliques.size());
kCliqueGraf.addNodes(kCliques);
for (int i=0; i<kCliques.size(); i++) {
for (int j=i+1; j<kCliques.size(); j++) {
if (Sets.intersection(kCliques.get(i), kCliques.get(j)).size() >= k-1) {
kCliqueGraf.addEdge(kCliques.get(i), kCliques.get(j), 1.0);
}
}
}
// Now we are finding the "components" of the clique-graph
ArrayList<HashSet<T>> communities = findConnectedComponents(kCliqueGraf);
// Now we apply "Maity-Kumar tweaks" to "classify" the left out nodes.
if (processLeftOutNodes) {
applyMaityKumarTweaks(g, communities);
}
// Now we "translate" what we've found to a Solution instance.
Solucio<T> solution = new Solucio<>();
int communityId = 0;
for (HashSet<T> community : communities) {
solution.afegirComunitat(communityId);
for(T node : community) {
solution.afegirNode(node, communityId);
}
communityId++;
}
solution.setTempsExec((double)System.currentTimeMillis()-startTime);
return solution;
}
private ArrayList<HashSet<T>> findConnectedComponents(AffinitiesGraph<HashSet<T>> kCliqueGraf) {
ArrayList<HashSet<T>> communities = new ArrayList<>();
HashSet<HashSet<T>> processedCliques = new HashSet<>();
for(HashSet<T> seedClique : kCliqueGraf) {
if (processedCliques.contains(seedClique)) continue;
Queue<HashSet<T>> cliquesQueue = new ArrayDeque<>();
cliquesQueue.add(seedClique);
HashSet<T> community = new HashSet<>(seedClique.size());
while (cliquesQueue.size() > 0) {
HashSet<T> tmpClique = cliquesQueue.poll();
// Making grow the community
community.addAll(tmpClique);
processedCliques.add(tmpClique);
for (HashSet<T> stemClique : kCliqueGraf) {
if (processedCliques.contains(stemClique)) continue;
if (kCliqueGraf.hasEdge(seedClique, stemClique)) {
cliquesQueue.add(stemClique);
}
}
}
communities.add(community);
}
return communities;
}
private void applyMaityKumarTweaks(AffinitiesGraph<T> g, ArrayList<HashSet<T>> communities) {
HashSet<T> leftOutNodes = g.getNodes();
for (HashSet<T> community : communities) {
leftOutNodes.removeAll(community);
}
// We take apart the "very lonely" nodes because Maity-Kumar tweaks don't handle them in a graceful way.
// We'll create a singleton community for every one of them at the end of the Maity-Kumar tweaks.
HashSet<T> singletons = new HashSet<>();
for(T leftOutNode : leftOutNodes) {
boolean connected = false;
for (T node : g) {
if (node.equals(leftOutNode)) continue;
if (g.getEdge(leftOutNode, node) >= threshold) {
connected = true;
break;
}
}
if (!connected) {
singletons.add(leftOutNode);
}
}
leftOutNodes.removeAll(singletons);
// This numbers will be used to compute a "belonging score".
HashMap<T, Double> weightsSums = new HashMap<>(leftOutNodes.size());
for (T leftOutNode : leftOutNodes) {
double sum = 0;
for (T node : g) {
if (node.equals(leftOutNode)) continue;
sum += g.getEdge(leftOutNode, node);
}
weightsSums.put(leftOutNode, sum);
}
// The nodes aren't added to existent communities at the same time because we want to optimize the "classification".
while (leftOutNodes.size() > 0) {
double maxBelongingScore = 0;
ImmutablePair<T, HashSet<T>> nextAnnexation = null;
for (T leftOutNode : leftOutNodes) {
for (HashSet<T> community : communities) {
double belongingScore = 0;
for (T node : community) {
if (node.equals(leftOutNode)) continue;
belongingScore += g.getEdge(leftOutNode, node);
}
belongingScore /= weightsSums.get(leftOutNode);
if (belongingScore > maxBelongingScore) {
maxBelongingScore = belongingScore;
nextAnnexation = new ImmutablePair<>(leftOutNode, community);
}
}
}
nextAnnexation.getSecond().add(nextAnnexation.getFirst());
leftOutNodes.remove(nextAnnexation.getFirst());
}
// Finally, we add the singletons we previously segregated.
for (T singleton : singletons) {
HashSet<T> tmpSingleton = new HashSet<>(1);
tmpSingleton.add(singleton);
communities.add(tmpSingleton);
}
}
/**
* Finds all the maximal cliques of g, and returns it in form of sets.
* Implements the Bron-Kerbosch algorithm.
*/
private ArrayList<HashSet<T>> getKCliquesFromGraph(AffinitiesGraph<T> g) {
ArrayList<HashSet<T>> cliques = new ArrayList<>();
ArrayList<T> potentialClique = new ArrayList<>();
ArrayList<T> alreadyFound = new ArrayList<>();
HashSet<T> candidates = g.getNodes();
findCliques(potentialClique, candidates, alreadyFound, cliques, g);
return cliques;
}
/**
* Body of the Bron-Kerbosch algorithm.
*
* @param potentialClique
* @param candidates
* @param alreadyFound
*/
private void findCliques(
ArrayList<T> potentialClique, Collection<T> candidates, ArrayList<T> alreadyFound,
ArrayList<HashSet<T>> cliques, AffinitiesGraph<T> g
) {
ArrayList<T> candidatesArray = new ArrayList<T>(candidates);
if (!end(candidates, alreadyFound, g)) {
for (T candidate : candidatesArray) {
ArrayList<T> newCandidates = new ArrayList<T>();
ArrayList<T> newAlreadyFound = new ArrayList<T>();
// move candidate node to potentialClique
potentialClique.add(candidate);
candidates.remove(candidate);
// create newCandidates by removing nodes in candidates not
// connected to candidate node
for (T newCandidate : candidates) {
if (g.getEdge(candidate, newCandidate) >= threshold) {
newCandidates.add(newCandidate);
}
}
// create newAlreadyFound by removing nodes in alreadyFound
// not connected to candidate node
for (T newFound : alreadyFound) {
if (g.getEdge(candidate, newFound) >= threshold) {
newAlreadyFound.add(newFound);
}
}
// if new_candidates and new_already_found are empty
if (newCandidates.isEmpty() && newAlreadyFound.isEmpty()) {
// potential_clique is maximal_clique
if (potentialClique.size() >= k) {
cliques.add(new HashSet<T>(potentialClique));
}
} else {
// recursive call
findCliques(potentialClique, newCandidates, newAlreadyFound, cliques, g);
}
// move candidate_node from potential_clique to already_found;
alreadyFound.add(candidate);
potentialClique.remove(candidate);
}
}
}
/**
* Tells us if a node inside alreadyFound us connected to all candidate nodes.
*/
private boolean end(Collection<T> candidates, ArrayList<T> alreadyFound, AffinitiesGraph<T> g)
{
int edgeCounter;
for (T found : alreadyFound) {
edgeCounter = 0;
for (T candidate : candidates) {
if (g.getEdge(found, candidate) >= threshold) {
edgeCounter++;
}
}
if (edgeCounter == candidates.size()) {
return true;
}
}
return false;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment