Skip to content

Instantly share code, notes, and snippets.

@jackbergus
Created August 26, 2019 17:35
Show Gist options
  • Save jackbergus/0e944b1b62155a655cc4d97df9b09d6c to your computer and use it in GitHub Desktop.
Save jackbergus/0e944b1b62155a655cc4d97df9b09d6c to your computer and use it in GitHub Desktop.
/*
* HierarchicalDistance.java
* This file is part of HierarchicalDistance
*
* Copyright (C) 2019 - Giacomo Bergami
*
* HierarchicalDistance is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* HierarchicalDistance is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HierarchicalDistance. If not, see <http://www.gnu.org/licenses/>.
*/
package it.giacomobergami.jingoloba.poc;
import java.util.*;
/**
* Given two vectors representing a path into a hierarchical tree, the class defines a similarity
* score `distance` determining the similarity of the two terms. For each vector, a threshold value
* `thresholdValue`: if the distance is greater than the threshold, then the two vectors are unrelated.
*
* The int[] hierarchical vectors should be represented accordingly to the ones in the following paper:
* - A. Petermann, G. Micale, G. Bergami, A. Pulvirenti, E. Rahm. “Mining and Ranking of Generalized Multi-Dimensional Frequent Subgraphs”. Proceedings of the Twelfth International Conference on Digital Information Management, Fukuoka, Japan, September 2017 (ICDIM 2017).
*/
public class HierarchicalDistance {
/**
* Given a hierarchical tree extracted from a DAG, its maximum assocated branching factor
*/
private double maximumBranchingFactor;
/**
* Maximum distance represented by the distance between the root (empty vector) and one of the nodes at the fist level
*/
private double distanceFactor;
/**
* Factor reducing the distance between the one node from the i-th level to the children at the (i+1)-level greater
* at each level step, such that the nodes from different branches will not be overlapped.
*/
private double decayFactor;
public HierarchicalDistance(int maximumBranchingFactor, double distanceFactor, int decayFactor) {
this.maximumBranchingFactor = maximumBranchingFactor;
this.distanceFactor = distanceFactor;
this.decayFactor = decayFactor;
}
/**
* Returns a vector that can be used to compute the euclidean distance from the hierarchical vector
* @param hierarchyVector Vector expressed as in the Petermann paper
* @return
*/
public double[] fromHierarchyVectorToEuclideanVector(int[] hierarchyVector) {
double[] d = new double[(int)maximumBranchingFactor];
double currentDecay = 1.0;
for (int i = 0, hierarchyVectorLength = hierarchyVector.length; i < hierarchyVectorLength; i++) {
int vi = hierarchyVector[i] - 1;
d[vi] = d[vi] + (distanceFactor / currentDecay);
currentDecay *= decayFactor;
}
return d;
}
/**
* Distance threshold values for two given elements
* @param left
* @param right
* @return
*/
public double thresholdValue(int[] left, int[] right) {
int h2 = left.length, h1 = right.length;
if (h2 < h1) {
int tmp = h2;
h2 = h1;
h1 = tmp;
}
return distanceFactor * (Math.pow(decayFactor, h2-h1+1) - 1.0) / (Math.pow(decayFactor, h2) * (decayFactor - 1.0));
}
/**
* @param left Hierarchy vector
* @param right Hierarchy vector
* @return Distance
*/
public double distance(int[] left, int[] right) {
return euclideanDistance(fromHierarchyVectorToEuclideanVector(left), fromHierarchyVectorToEuclideanVector(right));
}
/**
* This distance is defined as follows: isConsistent(left, right) <=> subArrayOf(left,right) || subArrayOf(right,left)
*
* @param left path index vector
* @param right path index vector
* @return Whether the two elements are consistent or not.
*/
public boolean isConsistent(int[] left, int []right) {
return distance(left, right) <= thresholdValue(left, right);
}
/**
* Implements the euclidean distance between two coordinate vectors
* @param left Left coordinate vector
* @param right Right coordinate vector
* @return Distance
*/
public double euclideanDistance(double[] left, double[] right) {
double sum = 0.0;
int M = left.length;
int N = right.length;
for (int i = 0, n = Integer.max(M, N); i<n; i++) {
sum += Math.pow((i<M ? left[i] : 0.0)- (i<N ? right[i] : 0.0), 2);
}
return Math.sqrt(sum);
}
public static void generateCompleteSubgraph(List<int[]> finalList, int currentHeight, int maximumBranching, int[] currentArray) {
if (currentHeight > 0) {
for (int i = 1; i<=maximumBranching; i++) {
int[] current = new int[currentArray.length+1];
System.arraycopy(currentArray, 0, current, 0, currentArray.length);
current[currentArray.length] = i;
finalList.add(current);
generateCompleteSubgraph(finalList, currentHeight-1, maximumBranching, current);
}
}
}
public static List<int[]> generateCompleteSubgraph(int maximumBranchingFactor, int maximumHeight) {
List<int[]> result = new ArrayList<>();
if (maximumBranchingFactor > 0) {
for (int i = 1; i<=maximumBranchingFactor; i++) {
int[] current = new int[1];
current[0] = i;
result.add(current);
generateCompleteSubgraph(result, maximumHeight-1, maximumBranchingFactor, current);
}
}
return result;
}
public static int indexOfSubList(int[] array, int[] subarray) {
int sourceSize = array.length;
int targetSize = subarray.length;
int maxCandidate = sourceSize - targetSize;
nextCand:
for (int candidate = 0; candidate <= maxCandidate; candidate++) {
for (int i=0, j=candidate; i<targetSize; i++, j++)
if (!(subarray[i] == array[j]))
continue nextCand; // Element mismatch, try next cand
return candidate; // All elements of candidate matched target
}
return -1; // No candidate matched the target
}
public static boolean subArrayOf(int[] subarray, int[] array) {
return indexOfSubList(array, subarray) == 0;
}
/**
* Empirical test
* @param args
*/
public static void main(String args[]) {
// Initializing the concept
int maximumBranchingFactor = Integer.valueOf(args[0]);
double distanceFactor = Double.valueOf(args[1]);
int decayFactor = Integer.valueOf(args[2]);
int maximumHeight = Integer.valueOf(args[3]);
HierarchicalDistance hd = new HierarchicalDistance(maximumBranchingFactor, distanceFactor, decayFactor);
List<int[]> ls = generateCompleteSubgraph(maximumBranchingFactor, maximumHeight);
for (int i = 0, n = ls.size(); i<n; i++) {
int[] veci = ls.get(i);
for (int j = 0; j<n; j++) {
int[] vecj = ls.get(j);
boolean consistenceInference = hd.isConsistent(veci, vecj);
if ((subArrayOf(veci, vecj)) || (subArrayOf(vecj, veci))) {
if (!consistenceInference)
System.err.println("ERROR! the algorithm says that is not consistent, while it should be");
} else {
if (consistenceInference) {
System.err.println("ERROR! the algorithm says that is consistent, while it is not");
}
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment