Created
February 17, 2024 02:10
-
-
Save steveroush/3aee138b327bd5293983d606b2e8cc04 to your computer and use it in GitHub Desktop.
A GVPR program that reports information about Graphviz files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BEGIN{ | |
int clusterCnt, cHtmlCnt; | |
graph_t aGraph; | |
string printBuf; | |
//////////////// help ///////////////////////////////////////////// | |
string help=" | |
gvstats.gvpr : | |
a GVPR program that provides information about Graphviz input files | |
- the file name | |
- the layout engine (if set) | |
- node count | |
- edge count | |
- cluster count | |
- HTML label count | |
- record/Mrecord count | |
- total Graph area (if already processed by a layout engine) | |
- total Node area (if already processed by a layout engine) | |
- node counts by degree (total edges connecting to a node) | |
if the input file was previously processed by DOT | |
- list of nodes per rank | |
finally, the file itself, if the -c argument was provided to gvpr | |
Arguments: | |
a # this report will be added to the graph | |
# as an attribute with name='STATS' | |
A {optional name} # like argument a, except use 'optional name' | |
# instead of 'STATS' | |
R # request ranking info | |
F filename # used if file was processed & output by dot | |
if neither a or A arguments are used, the report is written to stdout | |
Usage examples: | |
gvpr -cf gvstats.gvpr myFiles.gv | |
OR | |
gvpr -a'AInfo' -cf gvstats.gvpr myFiles.gv | |
"; | |
void bufferit(string nextLine) { | |
printBuf+=nextLine; | |
// print("// BUFFER:\n",printBuf); | |
} | |
graph_t graphTraverse(graph_t thisG) { | |
for (aGraph = fstsubg(thisG); aGraph; aGraph = nxtsubg(aGraph)) { | |
if (match(aGraph.name,"cluster")==0 || (hasAttr(aGraph, "cluster") && aGraph.cluster=="true")) { | |
//print ("// CLUSTER ",aGraph.name); | |
clusterCnt++; | |
if (hasAttr(aGraph, "label") && ishtml(aGraph.label)) | |
cHtmlCnt++; | |
} | |
aGraph = graphTraverse(aGraph); | |
} | |
return thisG; | |
} // end of graphTraverse | |
} | |
BEG_G{ | |
int i, ncnt, ecnt, LR, gHtmlCnt, eHtmlCnt, nHtmlCnt, recordCnt; | |
int haveRank, wantRanking, haveGraphArea, haveNodeArea; | |
int alreadyConcentrating; | |
int dcnt[int], lvlX[], lvlY[], rnk[int]; | |
float h, w, area, totNodeArea, totGraphArea, ftmp; | |
float MinX=0., MinY=0., MaxX=0., MaxY=0.; | |
string FN, Engine, addAtt; | |
graph_t Root; | |
unset(dcnt); | |
unset(lvlX); | |
unset(lvlY); | |
unset(rnk); | |
ncnt=0; | |
ecnt=0; | |
clusterCnt=0; | |
gHtmlCnt=0; | |
cHtmlCnt=0; | |
nHtmlCnt=0; | |
eHtmlCnt=0; | |
recordCnt=0; | |
haveRank=0; | |
wantRanking=0; | |
totNodeArea=0.; | |
totGraphArea=0.; | |
haveGraphArea=0; | |
haveNodeArea=0; | |
alreadyConcentrating=0; | |
printBuf=""; | |
addAtt=""; | |
Root=$G; | |
i=0; | |
while (i<ARGC) { | |
if (ARGV[i]=="R") // -a "R ..." | |
wantRanking=1; | |
else if (ARGV[i]=="F") // -a "F filename ..." | |
FN=ARGV[++i]; | |
else if (ARGV[i]=="F*") // -a "Ffilename ..." | |
FN=substr(ARGV[i],1); | |
else if (ARGV[i]=="a") // -a "a ..." | |
addAtt="STATS"; | |
else if (ARGV[i]=="A") // -a "A attributeName ..." | |
addAtt=ARGV[++i]; | |
else if (ARGV[i]=="A*") // -a "Aattributename ..." | |
addAtt=substr(ARGV[i],1); | |
else { | |
printf(2,"unknown argument >%s<\n", ARGV[i]); | |
printf(2,"%s\n",help); | |
exit(1); | |
} | |
i++; | |
} | |
if ($F!="<stdin>") | |
FN=$F; | |
else if (FN=="") | |
FN="unknown"; | |
if (hasAttr(Root,"bb") && Root.bb!="") { | |
sscanf (Root.bb, "%lf,%lf,%lf,%lf", &MinX, &MinY, &MaxX, &MaxY); | |
totGraphArea=((MaxX-MinX)*(MaxY-MinY)); | |
haveGraphArea=1; | |
} | |
/************************************************************* | |
hmm, if rankdir not set | |
and if nodes do not have "rank" attribute | |
and if no subgraphs have "rank" attribute | |
how do we know that the dot engine was used? | |
(other dot-only attributes? non-dot attributes? | |
*************************************************************/ | |
LR=-1; | |
if (hasAttr(Root,"rankdir")) { | |
if (toupper(Root.rankdir)=="@(LR|RL)") { | |
LR = 1; | |
} else if (toupper(Root.rankdir)=="@(TB|BT)") { | |
LR=0; | |
} | |
} | |
if (hasAttr(Root,"concentrate") && Root.concentrate=="true") | |
alreadyConcentrating=1; | |
else | |
alreadyConcentrating=0; | |
if (hasAttr(Root, "layout") && Root.layout=="(dot|fdp|neato|twopi|circo|sfdp|patchwork)") | |
Engine=Root.layout; | |
if (hasAttr($, "label") && ishtml($.label)) | |
gHtmlCnt++; | |
graphTraverse(Root); | |
} | |
N{ | |
ncnt++; | |
i=$.degree; | |
dcnt[i]++; | |
h=0; | |
w=0; | |
if (hasAttr($, "height") && $.height!="") | |
h=(float)$.height; | |
if (hasAttr($, "width") && $.width!="") | |
w=(float)$.width; | |
if (h==0&&w!=0)h=w; | |
if (w==0&&h!=0)w=h; | |
if (w!=0&&h!=0) { | |
area=h*w; | |
totNodeArea+=area; | |
haveNodeArea=1; | |
} | |
/********* rank already set *************/ | |
if ( hasAttr($, "rank") && $.rank!="" && rank=="+([0-9])" ) { | |
rnk[(int)$.rank]++; | |
haveRank=1; | |
continue; | |
} | |
/********* rank not set **********************/ | |
if (hasAttr($, "pos") && $.pos!="") { | |
float x,y; | |
sscanf ($.pos, "%f,%f", &x, &y); | |
if (LR==1) { | |
//ftmp=x; // $.X; // a bug in gvpr. Could not handle $.X & $.Y | |
lvlX[x]++; | |
} else { | |
ftmp=y; // $.Y; | |
lvlY[y]++; | |
} | |
} | |
if (hasAttr($, "label") && ishtml($.label)) | |
nHtmlCnt++; | |
if (hasAttr($, "xlabel") && ishtml($.xlabel)) | |
nHtmlCnt++; | |
if (hasAttr($, "shape") && ($.shape=="record" || $.shape=="Mrecord")) | |
recordCnt++; | |
} | |
E{ | |
ecnt++; | |
if (hasAttr($, "label") && ishtml($.label)) | |
eHtmlCnt++; | |
if (hasAttr($, "headlabel") && ishtml($.headlabel)) | |
eHtmlCnt++; | |
if (hasAttr($, "taillabel") && ishtml($.taillabel)) | |
eHtmlCnt++; | |
if (hasAttr($, "xlabel") && ishtml($.xlabel)) | |
eHtmlCnt++; | |
} | |
END_G{ | |
string extra; | |
bufferit(sprintf(" file: \t%s\n",FN)); | |
if (Engine!="") { | |
bufferit(sprintf(" layout: \t%s\n", Engine)); | |
} | |
bufferit(sprintf(" nodes: \t%5d\n", ncnt)); | |
bufferit(sprintf(" edges: \t%5d\n", ecnt)); | |
bufferit(sprintf(" clusters: \t%5d\n",clusterCnt)); | |
bufferit(sprintf(" HTML labels: \t%5d\n", gHtmlCnt+cHtmlCnt+nHtmlCnt+eHtmlCnt)); | |
if (gHtmlCnt>0) | |
bufferit(sprintf(" Root: \t%5d\n", gHtmlCnt)); | |
if (cHtmlCnt>0) | |
bufferit(sprintf(" Clusters: \t%5d\n", cHtmlCnt)); | |
if (nHtmlCnt>0) | |
bufferit(sprintf(" Nodes: \t%5d\n", nHtmlCnt)); | |
if (eHtmlCnt>0) | |
bufferit(sprintf(" Edges: \t%5d\n", eHtmlCnt)); | |
bufferit(sprintf(" record nodes:\t%5d\n", recordCnt)); | |
if (haveGraphArea==1) { | |
bufferit(sprintf(" total Graph area: %.1f sq. in. - %.1fin x %.1fin\n", totGraphArea/(72*72), (MaxX-MinX)/72., (MaxY-MinY)/72.)); | |
bufferit(sprintf(" total Node area: %.1f sq. in.\n", totNodeArea)); | |
} | |
bufferit("\n"); | |
bufferit(" (degree is the count of edges connecting to a node)\n"); | |
extra=""; | |
for (dcnt[i]) { | |
if (alreadyConcentrating==0 && i>14) // 14 is arbitrary | |
extra=" (\"concentrate=true\" and/or coloring edges may help nodes with large degrees)"; | |
bufferit(sprintf(" degree:%4d node count:%6d\n", i, dcnt[i])); | |
} | |
if (extra!="") | |
bufferit(sprintf("%s\n", extra)); | |
i=0; | |
if (wantRanking) { | |
if (haveRank) | |
i=(#rnk); | |
else if (ncnt>3500) { // arbitrary | |
bufferit(" To get faster ranking data, run: \n"); | |
bufferit(" dot -Gphase=1 myfile.cv | gvpr -f gvstats.gvpr -a\"R Ffilename\" \n"); | |
} | |
// | |
// did dot set pos values?? | |
// check # of lvls (X & Y) | |
// overlap in both directions? | |
// ??? | |
// | |
if (haveRank==0 && haveGraphArea==1) { | |
i=0; | |
if (LR==1) { | |
for (lvlX[ftmp]) { | |
rnk[i++]=lvlX[ftmp]; | |
} | |
} else { // TB | |
forr (lvlY[ftmp]) { | |
rnk[i++]=lvlY[ftmp]; | |
} | |
} | |
} | |
bufferit(sprintf("\n")); | |
if (i==0) | |
bufferit(sprintf(" ranks not established")); | |
else { | |
bufferit(sprintf(" ranks: \t%5d\n",i)); | |
for (rnk[i]) | |
bufferit(sprintf(" rank: \t%5d node count: \t%5d\n",i, rnk[i])); | |
if (LR) { | |
for (lvlX[ftmp]) { | |
rnk[i++]=lvlX[ftmp]; | |
} | |
} else { | |
forr (lvlY[ftmp]) { | |
rnk[i++]=lvlY[ftmp]; | |
} | |
} | |
} | |
} | |
if (addAtt!="") { | |
if (FN!="stdin") { | |
Root.linelength=99999; | |
printBuf="\n" + printBuf; | |
aset(Root, addAtt, printBuf); | |
//write(Root); | |
//writeG(Root, FN); | |
} else | |
printf(2,"Do not know name of input/output file"); | |
} else { | |
print("/*****************************************************"); | |
print(printBuf); | |
print("*****************************************************/"); | |
print(""); | |
} | |
printBuf=""; | |
} |
Thank you.
My question is how useful is it? And why?
If one has many .gv files hanging around (and I do), it may well be better than "grep".
Or maybe not. Time will tell.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Amazing work!