Skip to content

Instantly share code, notes, and snippets.

@mclements
Last active August 3, 2023 13:21
Show Gist options
  • Save mclements/e2746fda77a2c8bb43607df3b37fa16d to your computer and use it in GitHub Desktop.
Save mclements/e2746fda77a2c8bb43607df3b37fa16d to your computer and use it in GitHub Desktop.
Example of using Harbour and the new SUMMARIZE command for https://rosettacode.org/wiki/Merge_and_aggregate_datasets
rosettacode example:
patient table: NUM PATIENT_ID LASTNAME
1 1001 Hopper
2 2002 Wirth
3 3003 Kemeny
4 4004 Gosling
5 5005 Kurtz
visit table: NUM PATIENT_ID SCORE VISIT_DATE
1 2002 6.8 2020-09-10
2 1001 5.5 2020-09-17
3 4004 8.4 2020-09-24
4 2002 0.0 2020-10-08
5 1001 6.6 - -
6 3003 0.0 2020-11-12
7 4004 7.0 2020-10-05
8 1001 5.3 2020-11-19
summ2 table: NUM PATIENT_ID LASTNAME N SUM_SCORE AVG_SCORE MAX_DATE
1 1001 Hopper 3 17.4 5.8 2020-11-19
2 2002 Wirth 2 6.8 3.4 2020-10-08
3 3003 Kemeny 1 0.0 0.0 2020-11-12
4 4004 Gosling 2 15.4 7.7 2020-10-05
User-defined aggregate:
summ3 table: NUM PATIENT_ID MEDIAN
1 1001 5.5
2 2002 3.0
3 3003 0.0
4 4004 7.0
.PHONY: clean
rosetta2: clean rosetta2.prg
hbmk2 -run rosetta2.prg hbnf.hbc
clean:
rm -f rosetta2
rm -f patient.dbf visit.dbf summ1.dbf summ2.dbf summ3.dbf
// Example: https://rosettacode.org/wiki/Merge_and_aggregate_datasets
// Provides a new SUMMARIZE command for aggregates in Harbour
// Licence: GPL >= 2
// Author: Mark Clements, 2023-07-21
// Adapted from src/rdd/dbtotal.prg
#include "dbstruct.ch"
#include "error.ch"
// Use ft_AMedian() in an example of a user-defined aggregate
// For compilation, add: hbnf.hbc
#require "hbnf"
#command SUMMARIZE [<func1>(<field1>) AS <name1>][, <funcN>(<fieldN>) AS <nameN>] ;
[TO <(f)>] [ON <key>] ;
[FOR <for>] [WHILE <while>] [NEXT <next>] ;
[RECORD <rec>] [<rest:REST>] [ALL] [VIA <rdd>] ;
[CODEPAGE <cp>] => ;
__dbSummarize( <(f)>, <"key">, {<"field1">[, <"fieldN">]}, { <"name1">[, <"nameN">] }, ;
{ <(func1)>[, <(funcN)>] },,, ;
<{for}>, <{while}>, <next>, <rec>, <.rest.>, <rdd>,, <cp> )
PROCEDURE Main()
local pStruct, vStruct
set date format "yyyy-mm-dd"
? "rosettacode example:"
pStruct := {{"patient_id", "n", 8, 0}, {"lastname", "c", 10, 0 }}
dbCreate( "patient", pStruct, "DBFCDX", .t., "patient" )
(dbAppend(), patient->patient_id := 1001, patient->lastname := "Hopper")
(dbAppend(), patient->patient_id := 2002, patient->lastname := "Wirth")
(dbAppend(), patient->patient_id := 3003, patient->lastname := "Kemeny")
(dbAppend(), patient->patient_id := 4004, patient->lastname := "Gosling")
(dbAppend(), patient->patient_id := 5005, patient->lastname := "Kurtz")
? "patient table:", "NUM", "PATIENT_ID", "LASTNAME"
LIST patient_id, lastname
vStruct := {{"patient_id", "n", 8, 0}, {"visit_date", "d", 10, 0}, {"score", "n", 8, 1}}
dbCreate( "visit", vStruct, "DBFCDX", .t., "visit" )
(dbAppend(), visit->patient_id := 2002, visit->visit_date := ctod("2020-09-10"), visit->score := 6.8)
(dbAppend(), visit->patient_id := 1001, visit->visit_date := ctod("2020-09-17"), visit->score := 5.5)
(dbAppend(), visit->patient_id := 4004, visit->visit_date := ctod("2020-09-24"), visit->score := 8.4)
(dbAppend(), visit->patient_id := 2002, visit->visit_date := ctod("2020-10-08"))
(dbAppend(), visit->patient_id := 1001, visit->score := 6.6)
(dbAppend(), visit->patient_id := 3003, visit->visit_date := ctod("2020-11-12"))
(dbAppend(), visit->patient_id := 4004, visit->visit_date := ctod("2020-10-05"), visit->score := 7.0)
(dbAppend(), visit->patient_id := 1001, visit->visit_date := ctod("2020-11-19"), visit->score := 5.3)
? "visit table:", "NUM", "PATIENT_ID", "SCORE", "VISIT_DATE"
LIST patient_id, score, visit_date
USE visit
INDEX ON visit->patient_id TO visit_id
SUMMARIZE COUNT(patient_id) AS n, SUM(score) as sum_score, AVG(score) AS avg_score, ;
MAX(visit_date) AS max_date TO summ1 ON patient_id
USE patient
USE summ1
JOIN WITH patient to summ2 FOR patient_id == patient->patient_id FIELDS patient_id, ;
patient->lastname, n, sum_score, avg_score, max_date
USE summ2
? "summ2 table:", "NUM", "PATIENT_ID", "LASTNAME", "N", "SUM_SCORE", "AVG_SCORE", "MAX_DATE"
LIST patient_id, lastname, n, sum_score, avg_score, max_date
?
? "User-defined aggregate:"
USE visit
INDEX ON visit->patient_id TO visit_id
__dbSummarize("summ3", "patient_id", {"score"}, {"median"}, {{|x,agg| AAdd(agg,x), agg}}, {{}}, ;
{{|x| ft_AMedian(x)}})
USE summ3
? "summ3 table:", "NUM", "PATIENT_ID", "MEDIAN"
LIST patient_id, median
RETURN
FUNCTION __dbSummarize( cFile, xKey, aFields, aNames, xUpdate, ;
aInit, xFinalize, ;
xFor, xWhile, nNext, nRec, lRest, ;
cRDD, nConnection, cCodePage )
LOCAL nOldArea
LOCAL nNewArea
LOCAL aOldDbStruct
LOCAL aNewDbStruct
LOCAL aGetField
LOCAL aPutField
LOCAL aFieldsAgg
LOCAL lDbTransRecord
LOCAL xCurKey
LOCAL bWhileBlock
LOCAL bForBlock
LOCAL bKeyBlock
LOCAL oError
LOCAL lError := .F.
IF EMPTY( aNames )
aNames := AClone( aFields )
ENDIF
IF EMPTY( xUpdate )
xUpdate := Array(Len(aFields))
AFill(xUpdate, {|x,agg| x+agg})
ENDIF
IF EMPTY( xFinalize )
xFinalize := Array(Len(aFields))
AFill(xFinalize, {|x| x})
ENDIF
IF EMPTY( aInit )
aInit := Array(Len(aFields))
AFill(aInit, 0)
ENDIF
DO CASE
CASE HB_ISEVALITEM( xWhile )
bWhileBlock := xWhile
lRest := .T.
CASE HB_ISSTRING( xWhile ) .AND. ! Empty( xWhile )
bWhileBlock := hb_macroBlock( xWhile )
lRest := .T.
OTHERWISE
bWhileBlock := {|| .T. }
ENDCASE
DO CASE
CASE HB_ISEVALITEM( xFor )
bForBlock := xFor
CASE HB_ISSTRING( xFor ) .AND. ! Empty( xFor )
bForBlock := hb_macroBlock( xFor )
OTHERWISE
bForBlock := {|| .T. }
ENDCASE
__defaultNIL( @lRest, .F. )
IF nRec != NIL
dbGoto( nRec )
nNext := 1
ELSEIF nNext == NIL
nNext := -1
IF ! lRest
dbGoTop()
ENDIF
ELSE
lRest := .T.
ENDIF
nOldArea := Select()
hOldDbStruct := hb_Hash()
AEval(dbStruct(), {|aField| iif(aField[DBS_TYPE] == "M", NIL, ;
hb_HSet(hOldDbStruct, aField[DBS_NAME], aField))})
aNewDbStruct := {}
IF ! Empty( xKey )
AAdd(aNewDbStruct, hb_HGet(hOldDbStruct, upper(xKey)))
ENDIF
AEval(aNames, {|cName, i| aNames[i] := iif(empty(aNames[i]), ;
aFields[i], aNames[i])})
AEval(aFields, {| cField, i | aField := hb_HGet(hOldDbStruct, upper(cField)), ;
AAdd(aNewDbStruct, {aNames[i], AField[2], aField[3], aField[4]})})
IF aHasDups(aNames)
? "ERROR: Duplicate names in __dbSummarize()"
RETURN .F.
ENDIF
IF Empty( aNewDbStruct )
? "ERROR: Empty new table in __dbSummarize()"
RETURN .F.
ENDIF
FOR i := 1 TO Len(xUpdate)
IF HB_ISSTRING(xUpdate[i])
DO CASE
CASE upper(xUpdate[i]) == "MAX"
xUpdate[i] := {|x,agg| max(x,agg)}
xFinalize[i] := {|x| x}
aInit[i] := iif(hb_HGet(hOldDbStruct,upper(aFields[i]))[DBS_TYPE]=="D", ;
ctod("19000101"),-9999999999999999999999999999999999999999999999999)
CASE upper(xUpdate[i]) == "MIN"
xUpdate[i] := {|x,agg| min(x,agg)}
xFinalize[i] := {|x| x}
aInit[i] := iif(hb_HGet(hOldDbStruct,upper(aFields[i]))[DBS_TYPE]=="D", ;
ctod("99991231"), 9999999999999999999999999999999999999999999999999)
CASE upper(xUpdate[i]) == "SUM"
xUpdate[i] := {|x,agg| x+agg}
xFinalize[i] := {|x| x}
aInit[i] := 0
CASE upper(xUpdate[i]) == "COUNT"
aNewDbStruct[iif(empty(xKey),i,i+1)][2] := "I"
xUpdate[i] := {|x,agg| agg+1}
xFinalize[i] := {|x| x}
aInit[i] := 0
CASE upper(xUpdate[i]) == "AVG"
xUpdate[i] := {|x,agg| {1+agg[1], x+agg[2]}}
xFinalize[i] := {|x| x[2]/x[1]}
aInit[i] := {0,0}
ENDCASE
ENDIF
NEXT
BEGIN SEQUENCE
IF HB_ISSTRING( xKey ) .AND. ! Empty( xKey )
bKeyBlock := hb_macroBlock( xKey )
ELSE
bKeyBlock := {|| NIL }
ENDIF
aGetField := {}
AEval( aFields, {| cField | AAdd( aGetField, __GetField( cField ) ) } )
/* Keep it open after creating it. */
dbCreate( cFile, aNewDbStruct, cRDD, .T., "", , cCodePage, nConnection )
nNewArea := Select()
aNewField := {}
AEval( aNames, {| cField | AAdd( aNewField, __GetField( cField ) ) } )
dbSelectArea( nOldArea )
DO WHILE ! Eof() .AND. nNext != 0 .AND. Eval( bWhileBlock )
lDbTransRecord := .F.
aFieldsAgg := AClone(aInit)
xCurKey := Eval( bKeyBlock )
DO WHILE ! Eof() .AND. nNext-- != 0 .AND. Eval( bWhileBlock ) .AND. ;
xCurKey == Eval( bKeyBlock )
IF Eval( bForBlock )
IF ! lDbTransRecord
__dbTransRec( nNewArea, aNewDbStruct )
dbSelectArea( nOldArea )
lDbTransRecord := .T.
ENDIF
AEval( aGetField, {| bFieldBlock, nFieldPos | ;
aFieldsAgg[ nFieldPos ] := Eval(xUpdate[nFieldPos], Eval( bFieldBlock ), aFieldsAgg[ nFieldPos ]) } )
ENDIF
dbSkip()
ENDDO
IF lDbTransRecord
dbSelectArea( nNewArea )
AEval( aNewField, {| bFieldBlock, nFieldPos | ;
Eval( bFieldBlock, Eval(xFinalize[nFieldPos], aFieldsAgg[ nFieldPos ]) ) } )
dbSelectArea( nOldArea )
ENDIF
ENDDO
RECOVER USING oError
lError := .T.
END SEQUENCE
IF nNewArea != NIL
dbSelectArea( nNewArea )
dbCloseArea()
ENDIF
dbSelectArea( nOldArea )
IF lError
Break( oError )
ENDIF
RETURN .T.
STATIC FUNCTION __GetField( cField )
LOCAL nCurrArea := Select()
LOCAL nPos
LOCAL oError
/* Is the field aliased? */
IF ( nPos := At( "->", cField ) ) > 0
IF Select( Left( cField, nPos - 1 ) ) != nCurrArea
oError := ErrorNew()
oError:severity := ES_ERROR
oError:genCode := EG_SYNTAX
oError:subSystem := "DBCMD"
oError:canDefault := .T.
oError:operation := cField
oError:subCode := 1101
IF hb_defaultValue( Eval( ErrorBlock(), oError ), .T. )
__errInHandler()
ENDIF
Break( oError )
ENDIF
cField := SubStr( cField, nPos + 2 )
ENDIF
RETURN FieldBlock( cField )
FUNCTION __dbTransRec( nDstArea, aFieldsStru )
RETURN __dbTrans( nDstArea, aFieldsStru, , , 1 )
FUNCTION AHasDups(anArray)
LOCAL h := hb_Hash(), x, i
hb_HSet(h, anArray[1], 1)
FOR i := 2 to Len(anArray)
x := anArray[i]
IF hb_hHasKey(h,x)
RETURN .T.
ELSE
hb_HSet(h, x, 1)
ENDIF
NEXT
RETURN .F.
PROCEDURE Test_AHasDups()
? "Expected: .F.: observed:", AHasDups({1})
? "Expected: .F.: observed:", AHasDups({1,2})
? "Expected: .F.: observed:", AHasDups({1,2,3})
? "Expected: .T.: observed:", AHasDups({2,1,1})
? "Expected: .T.: observed:", AHasDups({1,1,2})
? "Expected: .T.: observed:", AHasDups({1,2,1})
RETURN
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment