castarco/CliquePercolation.java

## CliquePercolation.java
package prop.g12.common;


import com.google.common.collect.Sets;

import java.util.*;

/**
 * This class encapsulates the Clique Percolation algorithm, used to detect overlapped communities inside non-directed
 * and not weighted graphs.
 *
 * This method is slightly modified using suggestions from the article "Extended Clique percolation method to detect
 * overlapping community structure" written by Sumana Maity and Santanu Kumar Rath. It can be found in the following
 * URL : http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=6968420&pageNumber%3D33268%26rowsPerPage%3D100 .
 *
 * The mentioned modifications are done in order to ensure that every node in the graph is placed at least in one
 * community. We don't apply the third proposed step (merging "similar" communities after adding the left out nodes).
 *
 * @author Andrés Correa Casablanca
 *
 * @param <T> The graph's nodes type.
 */
public class CliquePercolation<T> {

    /**
     * Clique size
     */
    private int k;

    /**
     * Threshold used to "binarize" the weights between nodes
     */
    private double threshold;


    /**
     * @param k {@link CliquePercolation#k}
     */
    public CliquePercolation(int k, double threshold) {
        if (k < 2) {
            throw new IllegalArgumentException("k must be greater than 1.");
        }

        if (threshold <= 0 || threshold >= 1) {
            throw new IllegalArgumentException("threshold must be strictly between 0 and 1.");
        }

        this.k = k;
        this.threshold = threshold;
    }

    /**
     * Implementation of Clique Percolation using an alternative class for graphs.
     */
    public Solucio<T> generateSolution(AffinitiesGraph<T> g) {
        return generateSolution(g, true);
    }

    /**
     * Implementation of Clique Percolation using an alternative class for graphs.
     */
    public Solucio<T> generateSolution(AffinitiesGraph<T> g, boolean processLeftOutNodes) {
        long startTime = System.currentTimeMillis();

        // This is the complicated part: obtaining the Cliques.
        ArrayList<HashSet<T>> kCliques = getKCliquesFromGraph(g);

        // Now we are creating a graph where the nodes are the previously found cliques.
        AffinitiesGraph<HashSet<T>> kCliqueGraf = new AffinitiesGraph<>(kCliques.size());
        kCliqueGraf.addNodes(kCliques);

        for (int i=0; i<kCliques.size(); i++) {
            for (int j=i+1; j<kCliques.size(); j++) {

                if (Sets.intersection(kCliques.get(i), kCliques.get(j)).size() >= k-1) {
                    kCliqueGraf.addEdge(kCliques.get(i), kCliques.get(j), 1.0);
                }
            }
        }

        // Now we are finding the "components" of the clique-graph
        ArrayList<HashSet<T>> communities = findConnectedComponents(kCliqueGraf);

        // Now we apply "Maity-Kumar tweaks" to "classify" the left out nodes.
        if (processLeftOutNodes) {
            applyMaityKumarTweaks(g, communities);
        }

        // Now we "translate" what we've found to a Solution instance.
        Solucio<T> solution = new Solucio<>();

        int communityId = 0;
        for (HashSet<T> community : communities) {
            solution.afegirComunitat(communityId);

            for(T node : community) {
                solution.afegirNode(node, communityId);
            }

            communityId++;
        }

        solution.setTempsExec((double)System.currentTimeMillis()-startTime);
        return solution;
    }

    private ArrayList<HashSet<T>> findConnectedComponents(AffinitiesGraph<HashSet<T>> kCliqueGraf) {
        ArrayList<HashSet<T>> communities = new ArrayList<>();
        HashSet<HashSet<T>> processedCliques = new HashSet<>();

        for(HashSet<T> seedClique : kCliqueGraf) {
            if (processedCliques.contains(seedClique)) continue;

            Queue<HashSet<T>> cliquesQueue = new ArrayDeque<>();
            cliquesQueue.add(seedClique);

            HashSet<T> community = new HashSet<>(seedClique.size());

            while (cliquesQueue.size() > 0) {
                HashSet<T> tmpClique = cliquesQueue.poll();

                // Making grow the community
                community.addAll(tmpClique);
                processedCliques.add(tmpClique);

                for (HashSet<T> stemClique : kCliqueGraf) {
                    if (processedCliques.contains(stemClique)) continue;

                    if (kCliqueGraf.hasEdge(seedClique, stemClique)) {
                        cliquesQueue.add(stemClique);
                    }
                }
            }

            communities.add(community);
        }

        return communities;
    }

    private void applyMaityKumarTweaks(AffinitiesGraph<T> g, ArrayList<HashSet<T>> communities) {
        HashSet<T> leftOutNodes = g.getNodes();
        for (HashSet<T> community : communities) {
            leftOutNodes.removeAll(community);
        }

        // We take apart the "very lonely" nodes because Maity-Kumar tweaks don't handle them in a graceful way.
        // We'll create a singleton community for every one of them at the end of the Maity-Kumar tweaks.
        HashSet<T> singletons = new HashSet<>();

        for(T leftOutNode : leftOutNodes) {
            boolean connected = false;
            for (T node : g) {
                if (node.equals(leftOutNode)) continue;

                if (g.getEdge(leftOutNode, node) >= threshold)  {
                    connected = true;
                    break;
                }
            }
            if (!connected) {
                singletons.add(leftOutNode);
            }
        }
        leftOutNodes.removeAll(singletons);

        // This numbers will be used to compute a "belonging score".
        HashMap<T, Double> weightsSums = new HashMap<>(leftOutNodes.size());
        for (T leftOutNode : leftOutNodes) {
            double sum = 0;

            for (T node : g) {
                if (node.equals(leftOutNode)) continue;

                sum += g.getEdge(leftOutNode, node);
            }

            weightsSums.put(leftOutNode, sum);
        }

        // The nodes aren't added to existent communities at the same time because we want to optimize the "classification".
        while (leftOutNodes.size() > 0) {
            double maxBelongingScore = 0;
            ImmutablePair<T, HashSet<T>> nextAnnexation = null;

            for (T leftOutNode : leftOutNodes) {
                for (HashSet<T> community : communities) {
                    double belongingScore = 0;

                    for (T node : community) {
                        if (node.equals(leftOutNode)) continue;
                        belongingScore += g.getEdge(leftOutNode, node);
                    }

                    belongingScore /= weightsSums.get(leftOutNode);

                    if (belongingScore > maxBelongingScore) {
                        maxBelongingScore = belongingScore;
                        nextAnnexation = new ImmutablePair<>(leftOutNode, community);
                    }
                }
            }

            nextAnnexation.getSecond().add(nextAnnexation.getFirst());
            leftOutNodes.remove(nextAnnexation.getFirst());
        }

        // Finally, we add the singletons we previously segregated.
        for (T singleton : singletons) {
            HashSet<T> tmpSingleton = new HashSet<>(1);
            tmpSingleton.add(singleton);
            communities.add(tmpSingleton);
        }
    }

    /**
     * Finds all the maximal cliques of g, and returns it in form of sets.
     * Implements the Bron-Kerbosch algorithm.
     */
    private ArrayList<HashSet<T>> getKCliquesFromGraph(AffinitiesGraph<T> g) {
        ArrayList<HashSet<T>> cliques = new ArrayList<>();
        ArrayList<T> potentialClique = new ArrayList<>();
        ArrayList<T> alreadyFound = new ArrayList<>();
        HashSet<T> candidates = g.getNodes();

        findCliques(potentialClique, candidates, alreadyFound, cliques, g);

        return cliques;
    }

    /**
     * Body of the Bron-Kerbosch algorithm.
     *
     * @param potentialClique
     * @param candidates
     * @param alreadyFound
     */
    private void findCliques(
            ArrayList<T> potentialClique, Collection<T> candidates, ArrayList<T> alreadyFound,
            ArrayList<HashSet<T>> cliques, AffinitiesGraph<T> g
    ) {

        ArrayList<T> candidatesArray = new ArrayList<T>(candidates);

        if (!end(candidates, alreadyFound, g)) {
            for (T candidate : candidatesArray) {
                ArrayList<T> newCandidates = new ArrayList<T>();
                ArrayList<T> newAlreadyFound = new ArrayList<T>();

                // move candidate node to potentialClique
                potentialClique.add(candidate);
                candidates.remove(candidate);

                // create newCandidates by removing nodes in candidates not
                // connected to candidate node
                for (T newCandidate : candidates) {
                    if (g.getEdge(candidate, newCandidate) >= threshold)  {
                        newCandidates.add(newCandidate);
                    }
                }

                // create newAlreadyFound by removing nodes in alreadyFound
                // not connected to candidate node
                for (T newFound : alreadyFound) {
                    if (g.getEdge(candidate, newFound) >= threshold) {
                        newAlreadyFound.add(newFound);
                    }
                }

                // if new_candidates and new_already_found are empty
                if (newCandidates.isEmpty() && newAlreadyFound.isEmpty()) {
                    // potential_clique is maximal_clique
                    if (potentialClique.size() >= k) {
                        cliques.add(new HashSet<T>(potentialClique));
                    }
                } else {
                    // recursive call
                    findCliques(potentialClique, newCandidates, newAlreadyFound, cliques, g);
                }

                // move candidate_node from potential_clique to already_found;
                alreadyFound.add(candidate);
                potentialClique.remove(candidate);
            }
        }
    }

    /**
     * Tells us if a node inside alreadyFound us connected to all candidate nodes.
     */
    private boolean end(Collection<T> candidates, ArrayList<T> alreadyFound, AffinitiesGraph<T> g)
    {
        int edgeCounter;

        for (T found : alreadyFound) {
            edgeCounter = 0;

            for (T candidate : candidates) {
                if (g.getEdge(found, candidate) >= threshold) {
                    edgeCounter++;
                }
            }

            if (edgeCounter == candidates.size()) {
                return true;
            }
        }

        return false;
    }
}
	package prop.g12.common;


	import com.google.common.collect.Sets;

	import java.util.*;

	/**
	* This class encapsulates the Clique Percolation algorithm, used to detect overlapped communities inside non-directed
	* and not weighted graphs.
	*
	* This method is slightly modified using suggestions from the article "Extended Clique percolation method to detect
	* overlapping community structure" written by Sumana Maity and Santanu Kumar Rath. It can be found in the following
	* URL : http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=6968420&pageNumber%3D33268%26rowsPerPage%3D100 .
	*
	* The mentioned modifications are done in order to ensure that every node in the graph is placed at least in one
	* community. We don't apply the third proposed step (merging "similar" communities after adding the left out nodes).
	*
	* @author Andrés Correa Casablanca
	*
	* @param <T> The graph's nodes type.
	*/
	public class CliquePercolation<T> {

	/**
	* Clique size
	*/
	private int k;

	/**
	* Threshold used to "binarize" the weights between nodes
	*/
	private double threshold;


	/**
	* @param k {@link CliquePercolation#k}
	*/
	public CliquePercolation(int k, double threshold) {
	if (k < 2) {
	throw new IllegalArgumentException("k must be greater than 1.");
	}

	if (threshold <= 0 \|\| threshold >= 1) {
	throw new IllegalArgumentException("threshold must be strictly between 0 and 1.");
	}

	this.k = k;
	this.threshold = threshold;
	}

	/**
	* Implementation of Clique Percolation using an alternative class for graphs.
	*/
	public Solucio<T> generateSolution(AffinitiesGraph<T> g) {
	return generateSolution(g, true);
	}

	/**
	* Implementation of Clique Percolation using an alternative class for graphs.
	*/
	public Solucio<T> generateSolution(AffinitiesGraph<T> g, boolean processLeftOutNodes) {
	long startTime = System.currentTimeMillis();

	// This is the complicated part: obtaining the Cliques.
	ArrayList<HashSet<T>> kCliques = getKCliquesFromGraph(g);

	// Now we are creating a graph where the nodes are the previously found cliques.
	AffinitiesGraph<HashSet<T>> kCliqueGraf = new AffinitiesGraph<>(kCliques.size());
	kCliqueGraf.addNodes(kCliques);

	for (int i=0; i<kCliques.size(); i++) {
	for (int j=i+1; j<kCliques.size(); j++) {

	if (Sets.intersection(kCliques.get(i), kCliques.get(j)).size() >= k-1) {
	kCliqueGraf.addEdge(kCliques.get(i), kCliques.get(j), 1.0);
	}
	}
	}

	// Now we are finding the "components" of the clique-graph
	ArrayList<HashSet<T>> communities = findConnectedComponents(kCliqueGraf);

	// Now we apply "Maity-Kumar tweaks" to "classify" the left out nodes.
	if (processLeftOutNodes) {
	applyMaityKumarTweaks(g, communities);
	}

	// Now we "translate" what we've found to a Solution instance.
	Solucio<T> solution = new Solucio<>();

	int communityId = 0;
	for (HashSet<T> community : communities) {
	solution.afegirComunitat(communityId);

	for(T node : community) {
	solution.afegirNode(node, communityId);
	}

	communityId++;
	}

	solution.setTempsExec((double)System.currentTimeMillis()-startTime);
	return solution;
	}

	private ArrayList<HashSet<T>> findConnectedComponents(AffinitiesGraph<HashSet<T>> kCliqueGraf) {
	ArrayList<HashSet<T>> communities = new ArrayList<>();
	HashSet<HashSet<T>> processedCliques = new HashSet<>();

	for(HashSet<T> seedClique : kCliqueGraf) {
	if (processedCliques.contains(seedClique)) continue;

	Queue<HashSet<T>> cliquesQueue = new ArrayDeque<>();
	cliquesQueue.add(seedClique);

	HashSet<T> community = new HashSet<>(seedClique.size());

	while (cliquesQueue.size() > 0) {
	HashSet<T> tmpClique = cliquesQueue.poll();

	// Making grow the community
	community.addAll(tmpClique);
	processedCliques.add(tmpClique);

	for (HashSet<T> stemClique : kCliqueGraf) {
	if (processedCliques.contains(stemClique)) continue;

	if (kCliqueGraf.hasEdge(seedClique, stemClique)) {
	cliquesQueue.add(stemClique);
	}
	}
	}

	communities.add(community);
	}

	return communities;
	}

	private void applyMaityKumarTweaks(AffinitiesGraph<T> g, ArrayList<HashSet<T>> communities) {
	HashSet<T> leftOutNodes = g.getNodes();
	for (HashSet<T> community : communities) {
	leftOutNodes.removeAll(community);
	}

	// We take apart the "very lonely" nodes because Maity-Kumar tweaks don't handle them in a graceful way.
	// We'll create a singleton community for every one of them at the end of the Maity-Kumar tweaks.
	HashSet<T> singletons = new HashSet<>();

	for(T leftOutNode : leftOutNodes) {
	boolean connected = false;
	for (T node : g) {
	if (node.equals(leftOutNode)) continue;

	if (g.getEdge(leftOutNode, node) >= threshold) {
	connected = true;
	break;
	}
	}
	if (!connected) {
	singletons.add(leftOutNode);
	}
	}
	leftOutNodes.removeAll(singletons);

	// This numbers will be used to compute a "belonging score".
	HashMap<T, Double> weightsSums = new HashMap<>(leftOutNodes.size());
	for (T leftOutNode : leftOutNodes) {
	double sum = 0;

	for (T node : g) {
	if (node.equals(leftOutNode)) continue;

	sum += g.getEdge(leftOutNode, node);
	}

	weightsSums.put(leftOutNode, sum);
	}

	// The nodes aren't added to existent communities at the same time because we want to optimize the "classification".
	while (leftOutNodes.size() > 0) {
	double maxBelongingScore = 0;
	ImmutablePair<T, HashSet<T>> nextAnnexation = null;

	for (T leftOutNode : leftOutNodes) {
	for (HashSet<T> community : communities) {
	double belongingScore = 0;

	for (T node : community) {
	if (node.equals(leftOutNode)) continue;
	belongingScore += g.getEdge(leftOutNode, node);
	}

	belongingScore /= weightsSums.get(leftOutNode);

	if (belongingScore > maxBelongingScore) {
	maxBelongingScore = belongingScore;
	nextAnnexation = new ImmutablePair<>(leftOutNode, community);
	}
	}
	}

	nextAnnexation.getSecond().add(nextAnnexation.getFirst());
	leftOutNodes.remove(nextAnnexation.getFirst());
	}

	// Finally, we add the singletons we previously segregated.
	for (T singleton : singletons) {
	HashSet<T> tmpSingleton = new HashSet<>(1);
	tmpSingleton.add(singleton);
	communities.add(tmpSingleton);
	}
	}

	/**
	* Finds all the maximal cliques of g, and returns it in form of sets.
	* Implements the Bron-Kerbosch algorithm.
	*/
	private ArrayList<HashSet<T>> getKCliquesFromGraph(AffinitiesGraph<T> g) {
	ArrayList<HashSet<T>> cliques = new ArrayList<>();
	ArrayList<T> potentialClique = new ArrayList<>();
	ArrayList<T> alreadyFound = new ArrayList<>();
	HashSet<T> candidates = g.getNodes();

	findCliques(potentialClique, candidates, alreadyFound, cliques, g);

	return cliques;
	}

	/**
	* Body of the Bron-Kerbosch algorithm.
	*
	* @param potentialClique
	* @param candidates
	* @param alreadyFound
	*/
	private void findCliques(
	ArrayList<T> potentialClique, Collection<T> candidates, ArrayList<T> alreadyFound,
	ArrayList<HashSet<T>> cliques, AffinitiesGraph<T> g
	) {

	ArrayList<T> candidatesArray = new ArrayList<T>(candidates);

	if (!end(candidates, alreadyFound, g)) {
	for (T candidate : candidatesArray) {
	ArrayList<T> newCandidates = new ArrayList<T>();
	ArrayList<T> newAlreadyFound = new ArrayList<T>();

	// move candidate node to potentialClique
	potentialClique.add(candidate);
	candidates.remove(candidate);

	// create newCandidates by removing nodes in candidates not
	// connected to candidate node
	for (T newCandidate : candidates) {
	if (g.getEdge(candidate, newCandidate) >= threshold) {
	newCandidates.add(newCandidate);
	}
	}

	// create newAlreadyFound by removing nodes in alreadyFound
	// not connected to candidate node
	for (T newFound : alreadyFound) {
	if (g.getEdge(candidate, newFound) >= threshold) {
	newAlreadyFound.add(newFound);
	}
	}

	// if new_candidates and new_already_found are empty
	if (newCandidates.isEmpty() && newAlreadyFound.isEmpty()) {
	// potential_clique is maximal_clique
	if (potentialClique.size() >= k) {
	cliques.add(new HashSet<T>(potentialClique));
	}
	} else {
	// recursive call
	findCliques(potentialClique, newCandidates, newAlreadyFound, cliques, g);
	}

	// move candidate_node from potential_clique to already_found;
	alreadyFound.add(candidate);
	potentialClique.remove(candidate);
	}
	}
	}

	/**
	* Tells us if a node inside alreadyFound us connected to all candidate nodes.
	*/
	private boolean end(Collection<T> candidates, ArrayList<T> alreadyFound, AffinitiesGraph<T> g)
	{
	int edgeCounter;

	for (T found : alreadyFound) {
	edgeCounter = 0;

	for (T candidate : candidates) {
	if (g.getEdge(found, candidate) >= threshold) {
	edgeCounter++;
	}
	}

	if (edgeCounter == candidates.size()) {
	return true;
	}
	}

	return false;
	}
	}