Skip to content

Instantly share code, notes, and snippets.

@steveroush
Created February 17, 2024 02:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save steveroush/3aee138b327bd5293983d606b2e8cc04 to your computer and use it in GitHub Desktop.
Save steveroush/3aee138b327bd5293983d606b2e8cc04 to your computer and use it in GitHub Desktop.
A GVPR program that reports information about Graphviz files
BEGIN{
int clusterCnt, cHtmlCnt;
graph_t aGraph;
string printBuf;
//////////////// help /////////////////////////////////////////////
string help="
gvstats.gvpr :
a GVPR program that provides information about Graphviz input files
- the file name
- the layout engine (if set)
- node count
- edge count
- cluster count
- HTML label count
- record/Mrecord count
- total Graph area (if already processed by a layout engine)
- total Node area (if already processed by a layout engine)
- node counts by degree (total edges connecting to a node)
if the input file was previously processed by DOT
- list of nodes per rank
finally, the file itself, if the -c argument was provided to gvpr
Arguments:
a # this report will be added to the graph
# as an attribute with name='STATS'
A {optional name} # like argument a, except use 'optional name'
# instead of 'STATS'
R # request ranking info
F filename # used if file was processed & output by dot
if neither a or A arguments are used, the report is written to stdout
Usage examples:
gvpr -cf gvstats.gvpr myFiles.gv
OR
gvpr -a'AInfo' -cf gvstats.gvpr myFiles.gv
";
void bufferit(string nextLine) {
printBuf+=nextLine;
// print("// BUFFER:\n",printBuf);
}
graph_t graphTraverse(graph_t thisG) {
for (aGraph = fstsubg(thisG); aGraph; aGraph = nxtsubg(aGraph)) {
if (match(aGraph.name,"cluster")==0 || (hasAttr(aGraph, "cluster") && aGraph.cluster=="true")) {
//print ("// CLUSTER ",aGraph.name);
clusterCnt++;
if (hasAttr(aGraph, "label") && ishtml(aGraph.label))
cHtmlCnt++;
}
aGraph = graphTraverse(aGraph);
}
return thisG;
} // end of graphTraverse
}
BEG_G{
int i, ncnt, ecnt, LR, gHtmlCnt, eHtmlCnt, nHtmlCnt, recordCnt;
int haveRank, wantRanking, haveGraphArea, haveNodeArea;
int alreadyConcentrating;
int dcnt[int], lvlX[], lvlY[], rnk[int];
float h, w, area, totNodeArea, totGraphArea, ftmp;
float MinX=0., MinY=0., MaxX=0., MaxY=0.;
string FN, Engine, addAtt;
graph_t Root;
unset(dcnt);
unset(lvlX);
unset(lvlY);
unset(rnk);
ncnt=0;
ecnt=0;
clusterCnt=0;
gHtmlCnt=0;
cHtmlCnt=0;
nHtmlCnt=0;
eHtmlCnt=0;
recordCnt=0;
haveRank=0;
wantRanking=0;
totNodeArea=0.;
totGraphArea=0.;
haveGraphArea=0;
haveNodeArea=0;
alreadyConcentrating=0;
printBuf="";
addAtt="";
Root=$G;
i=0;
while (i<ARGC) {
if (ARGV[i]=="R") // -a "R ..."
wantRanking=1;
else if (ARGV[i]=="F") // -a "F filename ..."
FN=ARGV[++i];
else if (ARGV[i]=="F*") // -a "Ffilename ..."
FN=substr(ARGV[i],1);
else if (ARGV[i]=="a") // -a "a ..."
addAtt="STATS";
else if (ARGV[i]=="A") // -a "A attributeName ..."
addAtt=ARGV[++i];
else if (ARGV[i]=="A*") // -a "Aattributename ..."
addAtt=substr(ARGV[i],1);
else {
printf(2,"unknown argument >%s<\n", ARGV[i]);
printf(2,"%s\n",help);
exit(1);
}
i++;
}
if ($F!="<stdin>")
FN=$F;
else if (FN=="")
FN="unknown";
if (hasAttr(Root,"bb") && Root.bb!="") {
sscanf (Root.bb, "%lf,%lf,%lf,%lf", &MinX, &MinY, &MaxX, &MaxY);
totGraphArea=((MaxX-MinX)*(MaxY-MinY));
haveGraphArea=1;
}
/*************************************************************
hmm, if rankdir not set
and if nodes do not have "rank" attribute
and if no subgraphs have "rank" attribute
how do we know that the dot engine was used?
(other dot-only attributes? non-dot attributes?
*************************************************************/
LR=-1;
if (hasAttr(Root,"rankdir")) {
if (toupper(Root.rankdir)=="@(LR|RL)") {
LR = 1;
} else if (toupper(Root.rankdir)=="@(TB|BT)") {
LR=0;
}
}
if (hasAttr(Root,"concentrate") && Root.concentrate=="true")
alreadyConcentrating=1;
else
alreadyConcentrating=0;
if (hasAttr(Root, "layout") && Root.layout=="(dot|fdp|neato|twopi|circo|sfdp|patchwork)")
Engine=Root.layout;
if (hasAttr($, "label") && ishtml($.label))
gHtmlCnt++;
graphTraverse(Root);
}
N{
ncnt++;
i=$.degree;
dcnt[i]++;
h=0;
w=0;
if (hasAttr($, "height") && $.height!="")
h=(float)$.height;
if (hasAttr($, "width") && $.width!="")
w=(float)$.width;
if (h==0&&w!=0)h=w;
if (w==0&&h!=0)w=h;
if (w!=0&&h!=0) {
area=h*w;
totNodeArea+=area;
haveNodeArea=1;
}
/********* rank already set *************/
if ( hasAttr($, "rank") && $.rank!="" && rank=="+([0-9])" ) {
rnk[(int)$.rank]++;
haveRank=1;
continue;
}
/********* rank not set **********************/
if (hasAttr($, "pos") && $.pos!="") {
float x,y;
sscanf ($.pos, "%f,%f", &x, &y);
if (LR==1) {
//ftmp=x; // $.X; // a bug in gvpr. Could not handle $.X & $.Y
lvlX[x]++;
} else {
ftmp=y; // $.Y;
lvlY[y]++;
}
}
if (hasAttr($, "label") && ishtml($.label))
nHtmlCnt++;
if (hasAttr($, "xlabel") && ishtml($.xlabel))
nHtmlCnt++;
if (hasAttr($, "shape") && ($.shape=="record" || $.shape=="Mrecord"))
recordCnt++;
}
E{
ecnt++;
if (hasAttr($, "label") && ishtml($.label))
eHtmlCnt++;
if (hasAttr($, "headlabel") && ishtml($.headlabel))
eHtmlCnt++;
if (hasAttr($, "taillabel") && ishtml($.taillabel))
eHtmlCnt++;
if (hasAttr($, "xlabel") && ishtml($.xlabel))
eHtmlCnt++;
}
END_G{
string extra;
bufferit(sprintf(" file: \t%s\n",FN));
if (Engine!="") {
bufferit(sprintf(" layout: \t%s\n", Engine));
}
bufferit(sprintf(" nodes: \t%5d\n", ncnt));
bufferit(sprintf(" edges: \t%5d\n", ecnt));
bufferit(sprintf(" clusters: \t%5d\n",clusterCnt));
bufferit(sprintf(" HTML labels: \t%5d\n", gHtmlCnt+cHtmlCnt+nHtmlCnt+eHtmlCnt));
if (gHtmlCnt>0)
bufferit(sprintf(" Root: \t%5d\n", gHtmlCnt));
if (cHtmlCnt>0)
bufferit(sprintf(" Clusters: \t%5d\n", cHtmlCnt));
if (nHtmlCnt>0)
bufferit(sprintf(" Nodes: \t%5d\n", nHtmlCnt));
if (eHtmlCnt>0)
bufferit(sprintf(" Edges: \t%5d\n", eHtmlCnt));
bufferit(sprintf(" record nodes:\t%5d\n", recordCnt));
if (haveGraphArea==1) {
bufferit(sprintf(" total Graph area: %.1f sq. in. - %.1fin x %.1fin\n", totGraphArea/(72*72), (MaxX-MinX)/72., (MaxY-MinY)/72.));
bufferit(sprintf(" total Node area: %.1f sq. in.\n", totNodeArea));
}
bufferit("\n");
bufferit(" (degree is the count of edges connecting to a node)\n");
extra="";
for (dcnt[i]) {
if (alreadyConcentrating==0 && i>14) // 14 is arbitrary
extra=" (\"concentrate=true\" and/or coloring edges may help nodes with large degrees)";
bufferit(sprintf(" degree:%4d node count:%6d\n", i, dcnt[i]));
}
if (extra!="")
bufferit(sprintf("%s\n", extra));
i=0;
if (wantRanking) {
if (haveRank)
i=(#rnk);
else if (ncnt>3500) { // arbitrary
bufferit(" To get faster ranking data, run: \n");
bufferit(" dot -Gphase=1 myfile.cv | gvpr -f gvstats.gvpr -a\"R Ffilename\" \n");
}
//
// did dot set pos values??
// check # of lvls (X & Y)
// overlap in both directions?
// ???
//
if (haveRank==0 && haveGraphArea==1) {
i=0;
if (LR==1) {
for (lvlX[ftmp]) {
rnk[i++]=lvlX[ftmp];
}
} else { // TB
forr (lvlY[ftmp]) {
rnk[i++]=lvlY[ftmp];
}
}
}
bufferit(sprintf("\n"));
if (i==0)
bufferit(sprintf(" ranks not established"));
else {
bufferit(sprintf(" ranks: \t%5d\n",i));
for (rnk[i])
bufferit(sprintf(" rank: \t%5d node count: \t%5d\n",i, rnk[i]));
if (LR) {
for (lvlX[ftmp]) {
rnk[i++]=lvlX[ftmp];
}
} else {
forr (lvlY[ftmp]) {
rnk[i++]=lvlY[ftmp];
}
}
}
}
if (addAtt!="") {
if (FN!="stdin") {
Root.linelength=99999;
printBuf="\n" + printBuf;
aset(Root, addAtt, printBuf);
//write(Root);
//writeG(Root, FN);
} else
printf(2,"Do not know name of input/output file");
} else {
print("/*****************************************************");
print(printBuf);
print("*****************************************************/");
print("");
}
printBuf="";
}
@rebelinux
Copy link

Amazing work!

@steveroush
Copy link
Author

Thank you.
My question is how useful is it? And why?
If one has many .gv files hanging around (and I do), it may well be better than "grep".
Or maybe not. Time will tell.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment