Last active
August 15, 2016 11:56
-
-
Save alejandroerickson/8a0643cdaac8aed16b022f1cdad4bacd to your computer and use it in GitHub Desktop.
Check if binary tree t2 on n nodes is a subtree of binary tree t1 on n nodes in O(m+(1+epsilon)n) time
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <time.h> | |
#define max(a,b) ((a>b)?a:b) | |
#define timeit(a,b){ \ | |
start=clock(); \ | |
a; \ | |
end=clock(); \ | |
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; \ | |
printf("%g seconds to %s\n",cpu_time_used,b); \ | |
} | |
typedef struct node{ | |
int value,height; | |
struct node *left; | |
struct node *right; | |
}node; | |
int count_dfs_compare; | |
int count_nodes_hashed; | |
// returns new height of subtree | |
//inserts to a random branch | |
int insert_item(node **subtree,node *theItem){ | |
int new_height; | |
if(!theItem || !subtree) exit(1); | |
if(!(*subtree)){ | |
*subtree=theItem; | |
theItem->height=1; | |
return 1; | |
} | |
if(rand()%2==0){ | |
new_height=insert_item(&(*subtree)->left,theItem); | |
}else{ | |
new_height=insert_item(&(*subtree)->right,theItem); | |
} | |
if(new_height+1 > (*subtree)->height) (*subtree)->height=new_height+1; | |
return (*subtree)->height; | |
} | |
uint32_t hash(uint64_t left, | |
uint64_t right, | |
uint64_t value, | |
uint64_t height){ | |
return ((left&(0xFFFFFFFF))*2654435761 + | |
(right&(0xFFFFFFFF))*2654435741 + | |
(value&(0xFFFFFFFF))*2654435723 + | |
(height&(0xFFFFFFFF))*2654435713); | |
} | |
uint32_t dfs_hash(node *t){ | |
if(!t) return 0; | |
return hash(dfs_hash(t->left),dfs_hash(t->right),t->value,t->height); | |
} | |
int dfs_compare_trees(node *t1, node *t2){ | |
if(!t1 && !t2) | |
return 1; | |
if(!t1 && t2) | |
return 0; | |
if(t1 && !t2) | |
return 0; | |
if(t1->value!= t2->value) | |
return 0; | |
return dfs_compare_trees(t1->left,t2->left) && | |
dfs_compare_trees(t1->right,t2->right); | |
} | |
node *rec_contains_subtree(node *t1, node *t2, uint32_t *t1ChildHash, uint32_t t2hash){ | |
uint32_t leftChildHash,rightChildHash; | |
node *res; | |
*t1ChildHash=0; | |
if(!t1) | |
return NULL; | |
if((res=rec_contains_subtree(t1->left,t2,&leftChildHash, t2hash))|| | |
(res=rec_contains_subtree(t1->right,t2,&rightChildHash,t2hash))) | |
return res; | |
count_nodes_hashed++; | |
*t1ChildHash=hash(leftChildHash,rightChildHash,t1->value,t1->height); | |
if(*t1ChildHash==t2hash){ | |
count_dfs_compare++; | |
if(dfs_compare_trees(t1,t2)) | |
return t1; | |
} | |
return NULL; | |
} | |
node *contains_subtree(node *t1, node *t2){ | |
//hash t2 | |
uint32_t t2hash = dfs_hash(t2); | |
uint32_t t1ChildHash; | |
if(!t2) | |
exit(-1); | |
return rec_contains_subtree(t1,t2,&t1ChildHash,t2hash); | |
} | |
void print_item_value(node *theItem){ | |
if(theItem){ | |
printf("Item: %d\n",theItem->value); | |
}else{printf("NULL\n");} | |
} | |
void init_node(node *foo, int value){ | |
foo->value=value; | |
foo->left=NULL; | |
foo->right=NULL; | |
} | |
void create_test_tree(node **t,node *nodes,int n){ | |
int i,height; | |
*t=NULL; | |
for(i=0;i<n;i++){ | |
init_node(&nodes[i],0); | |
height=insert_item(t,&nodes[i]); | |
//printf("i %d height %d hash %u\n",i,height,dfs_hash(*t)); | |
} | |
} | |
void check_and_report_containment(node *t1, node *t2,int m,int n){ | |
node *res=NULL; | |
clock_t start,end; | |
double cpu_time_used; | |
count_nodes_hashed=0; | |
count_dfs_compare=0; | |
printf("t1 has %d nodes\nt2 has %d nodes\n",m,n); | |
timeit(res=contains_subtree(t1,t2),"check if t1 contains t2"); | |
if(res){ | |
printf("t1 contains t2\n"); | |
}else | |
printf("t1 does not contain t2\n"); | |
printf("Did dfs_compare_trees %d times\n",count_dfs_compare); | |
printf("Hashed %d nodes of t1\n",count_nodes_hashed); | |
} | |
void check_and_report_containment_trials(node *t1, node *t2, int m, int n, int trials, int useSubtreeOft1, node *tree_data){ | |
node *res=NULL; | |
int i; | |
clock_t start,end; | |
double cpu_time_used; | |
double mean_containment; | |
double mean_dfs; | |
if(useSubtreeOft1){ | |
printf("Test containment in t1 of randomly selected subtrees of t1\n"); | |
printf("Nodes: t1 %d t2 (unknown); Trials: %d\n",m,trials); | |
}else{ | |
printf("Test containment in t1 of randomly generated trees t2\n"); | |
printf("Nodes: t1 %d t2 %d; Trials: %d\n",m,n,trials); | |
} | |
printf("All nodes hold value=0\n"); | |
count_nodes_hashed=0; | |
count_dfs_compare=0; | |
cpu_time_used =0.0; | |
mean_containment=0.0; | |
mean_dfs=0.0; | |
for(i=0;i<trials;i++){ | |
if(useSubtreeOft1){ | |
t2=&tree_data[rand()%m]; | |
}else{ | |
create_test_tree(&t2,&tree_data[m],n); | |
} | |
start=clock(); | |
res=contains_subtree(t1,t2); | |
if(res){ | |
mean_containment+=1.0; | |
} | |
end=clock(); | |
cpu_time_used += ((double) (end - start)) / CLOCKS_PER_SEC; | |
} | |
cpu_time_used/=(double)trials; | |
mean_dfs=(double)count_dfs_compare/(double)trials; | |
mean_containment/=(double)trials; | |
printf("Average per search:\n\ttime %g;\n\tdfs compares %g;\n\tprop. nodes hashed %g;\n\tcontainment %g\n\n",cpu_time_used,mean_dfs, (double)count_nodes_hashed/(double)trials/(double)m,mean_containment); | |
} | |
int main(){ | |
int i,height,m=10000000,n=100,nsmall=20,trials=100; | |
node *theItem=NULL; | |
node *t1=NULL,*t2=NULL; | |
node *tree_data=NULL; | |
node *res=NULL; | |
clock_t start,end; | |
double cpu_time_used; | |
srand(time(NULL)); | |
if(n<nsmall){ | |
printf("nsmall test must be smaller than t2 size n test\n"); | |
exit(-1); | |
} | |
timeit(tree_data=malloc((m+n)*sizeof(struct node)),"allocate trees"); | |
timeit(create_test_tree(&t1,tree_data,m),"create t1"); | |
timeit(create_test_tree(&t2,&tree_data[m],n),"create t2"); | |
check_and_report_containment(t1,t2,m,n); | |
check_and_report_containment_trials(t1,t2,m,n,trials,0,tree_data); | |
check_and_report_containment_trials(t1,t2,m,nsmall,trials,0,tree_data); | |
check_and_report_containment_trials(t1,t2,m,n,trials,1,tree_data); | |
free(tree_data); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment