Skip to content

Instantly share code, notes, and snippets.

@alejandroerickson
Last active August 15, 2016 11:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alejandroerickson/8a0643cdaac8aed16b022f1cdad4bacd to your computer and use it in GitHub Desktop.
Save alejandroerickson/8a0643cdaac8aed16b022f1cdad4bacd to your computer and use it in GitHub Desktop.
Check if binary tree t2 on n nodes is a subtree of binary tree t1 on n nodes in O(m+(1+epsilon)n) time
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#define max(a,b) ((a>b)?a:b)
#define timeit(a,b){ \
start=clock(); \
a; \
end=clock(); \
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; \
printf("%g seconds to %s\n",cpu_time_used,b); \
}
typedef struct node{
int value,height;
struct node *left;
struct node *right;
}node;
int count_dfs_compare;
int count_nodes_hashed;
// returns new height of subtree
//inserts to a random branch
int insert_item(node **subtree,node *theItem){
int new_height;
if(!theItem || !subtree) exit(1);
if(!(*subtree)){
*subtree=theItem;
theItem->height=1;
return 1;
}
if(rand()%2==0){
new_height=insert_item(&(*subtree)->left,theItem);
}else{
new_height=insert_item(&(*subtree)->right,theItem);
}
if(new_height+1 > (*subtree)->height) (*subtree)->height=new_height+1;
return (*subtree)->height;
}
uint32_t hash(uint64_t left,
uint64_t right,
uint64_t value,
uint64_t height){
return ((left&(0xFFFFFFFF))*2654435761 +
(right&(0xFFFFFFFF))*2654435741 +
(value&(0xFFFFFFFF))*2654435723 +
(height&(0xFFFFFFFF))*2654435713);
}
uint32_t dfs_hash(node *t){
if(!t) return 0;
return hash(dfs_hash(t->left),dfs_hash(t->right),t->value,t->height);
}
int dfs_compare_trees(node *t1, node *t2){
if(!t1 && !t2)
return 1;
if(!t1 && t2)
return 0;
if(t1 && !t2)
return 0;
if(t1->value!= t2->value)
return 0;
return dfs_compare_trees(t1->left,t2->left) &&
dfs_compare_trees(t1->right,t2->right);
}
node *rec_contains_subtree(node *t1, node *t2, uint32_t *t1ChildHash, uint32_t t2hash){
uint32_t leftChildHash,rightChildHash;
node *res;
*t1ChildHash=0;
if(!t1)
return NULL;
if((res=rec_contains_subtree(t1->left,t2,&leftChildHash, t2hash))||
(res=rec_contains_subtree(t1->right,t2,&rightChildHash,t2hash)))
return res;
count_nodes_hashed++;
*t1ChildHash=hash(leftChildHash,rightChildHash,t1->value,t1->height);
if(*t1ChildHash==t2hash){
count_dfs_compare++;
if(dfs_compare_trees(t1,t2))
return t1;
}
return NULL;
}
node *contains_subtree(node *t1, node *t2){
//hash t2
uint32_t t2hash = dfs_hash(t2);
uint32_t t1ChildHash;
if(!t2)
exit(-1);
return rec_contains_subtree(t1,t2,&t1ChildHash,t2hash);
}
void print_item_value(node *theItem){
if(theItem){
printf("Item: %d\n",theItem->value);
}else{printf("NULL\n");}
}
void init_node(node *foo, int value){
foo->value=value;
foo->left=NULL;
foo->right=NULL;
}
void create_test_tree(node **t,node *nodes,int n){
int i,height;
*t=NULL;
for(i=0;i<n;i++){
init_node(&nodes[i],0);
height=insert_item(t,&nodes[i]);
//printf("i %d height %d hash %u\n",i,height,dfs_hash(*t));
}
}
void check_and_report_containment(node *t1, node *t2,int m,int n){
node *res=NULL;
clock_t start,end;
double cpu_time_used;
count_nodes_hashed=0;
count_dfs_compare=0;
printf("t1 has %d nodes\nt2 has %d nodes\n",m,n);
timeit(res=contains_subtree(t1,t2),"check if t1 contains t2");
if(res){
printf("t1 contains t2\n");
}else
printf("t1 does not contain t2\n");
printf("Did dfs_compare_trees %d times\n",count_dfs_compare);
printf("Hashed %d nodes of t1\n",count_nodes_hashed);
}
void check_and_report_containment_trials(node *t1, node *t2, int m, int n, int trials, int useSubtreeOft1, node *tree_data){
node *res=NULL;
int i;
clock_t start,end;
double cpu_time_used;
double mean_containment;
double mean_dfs;
if(useSubtreeOft1){
printf("Test containment in t1 of randomly selected subtrees of t1\n");
printf("Nodes: t1 %d t2 (unknown); Trials: %d\n",m,trials);
}else{
printf("Test containment in t1 of randomly generated trees t2\n");
printf("Nodes: t1 %d t2 %d; Trials: %d\n",m,n,trials);
}
printf("All nodes hold value=0\n");
count_nodes_hashed=0;
count_dfs_compare=0;
cpu_time_used =0.0;
mean_containment=0.0;
mean_dfs=0.0;
for(i=0;i<trials;i++){
if(useSubtreeOft1){
t2=&tree_data[rand()%m];
}else{
create_test_tree(&t2,&tree_data[m],n);
}
start=clock();
res=contains_subtree(t1,t2);
if(res){
mean_containment+=1.0;
}
end=clock();
cpu_time_used += ((double) (end - start)) / CLOCKS_PER_SEC;
}
cpu_time_used/=(double)trials;
mean_dfs=(double)count_dfs_compare/(double)trials;
mean_containment/=(double)trials;
printf("Average per search:\n\ttime %g;\n\tdfs compares %g;\n\tprop. nodes hashed %g;\n\tcontainment %g\n\n",cpu_time_used,mean_dfs, (double)count_nodes_hashed/(double)trials/(double)m,mean_containment);
}
int main(){
int i,height,m=10000000,n=100,nsmall=20,trials=100;
node *theItem=NULL;
node *t1=NULL,*t2=NULL;
node *tree_data=NULL;
node *res=NULL;
clock_t start,end;
double cpu_time_used;
srand(time(NULL));
if(n<nsmall){
printf("nsmall test must be smaller than t2 size n test\n");
exit(-1);
}
timeit(tree_data=malloc((m+n)*sizeof(struct node)),"allocate trees");
timeit(create_test_tree(&t1,tree_data,m),"create t1");
timeit(create_test_tree(&t2,&tree_data[m],n),"create t2");
check_and_report_containment(t1,t2,m,n);
check_and_report_containment_trials(t1,t2,m,n,trials,0,tree_data);
check_and_report_containment_trials(t1,t2,m,nsmall,trials,0,tree_data);
check_and_report_containment_trials(t1,t2,m,n,trials,1,tree_data);
free(tree_data);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment