Created
April 14, 2022 23:12
-
-
Save ncalm/3e9d97f77e36f3c50e8c8bc60068dfbd to your computer and use it in GitHub Desktop.
This Excel LAMBDA function creates a table of summary statistics for an array of data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
DESCRIBE | |
Creates tables of statistics similar to the Analysis Toolpak Add-in "Summary statistics" feature. | |
Including support for text columns and additional statistics for numeric columns. | |
Inputs | |
- data: a range or array of data with at least one column | |
- has_header: TRUE if data includes a header row, FALSE otherwise | |
Returns | |
An array with COLUMNS(data) columns and ROWS(_stats) rows (see below definition of _stats). | |
The first column is the row header, which is the description of the statistic (i.e. INDEX(_stats,r,1)) | |
The first row is the column header, which is either the column header of `data` if has_header=TRUE, | |
or Column1, Column2, ..., Column[COLUMNS(data)] if has_header=FALSE. | |
Columns containing numbers will show statistics for "Sum" through "Maximum". | |
Columns containing all text will show statistics for "Distinct count" through "Least common text". | |
Dependencies | |
RECURSIVEFILTER | |
https://gist.github.com/ncalm/f1c7b5bd8fe86528da88798b545053c8 | |
GROUPAGGREGATE | |
https://gist.github.com/ncalm/29c473a940997c3b112bf06e7e71cf15 | |
Note: the "pretty" formatted version will NOT paste into the Name Manager due to length limits. | |
If pasting into Name Manager, use the compact version at the bottom of this file. | |
DESCRIBE =LAMBDA(data,has_header, | |
LET( | |
rng,IF(has_header,INDEX(data,2,1):INDEX(data,ROWS(data),COLUMNS(data)),data), | |
_header,IF( | |
has_header, | |
INDEX(data,1,1):INDEX(data,1,COLUMNS(data)), | |
"Column "&SEQUENCE(1,COLUMNS(data)) | |
), | |
_stats, | |
{"Statistic"; | |
"Sum"; | |
"Mean"; | |
"Count"; | |
"Mode"; | |
"Standard Deviation"; | |
"Sample Variance"; | |
"Standard Error"; | |
"Kurtosis"; | |
"Skewness"; | |
"Confidence Level(95.0%)"; | |
"1st quartile"; | |
"Median"; | |
"3rd quartile"; | |
"Range"; | |
"Distinct count"; | |
"Rows"; | |
"Minimum"; | |
"Maximum"; | |
"Length of longest text"; | |
"Length of shortest text"; | |
"Most common text"; | |
"Least common text"}, | |
MAKEARRAY( | |
COUNTA(_stats), | |
COLUMNS(rng)+1, | |
LAMBDA(r,c, | |
IFS( | |
c=1, | |
INDEX(_stats,r,1), | |
r=1, | |
INDEX(_header,1,c-1), | |
TRUE, | |
LET( | |
_m,INDEX(rng,,c-1), | |
_istxt,ISTEXT(_m), | |
_hastxt,OR(_istxt), | |
_cnt,COUNT(_m), | |
_alltxt,SUM(N(_istxt))=ROWS(_m), | |
_mean,AVERAGE(_m), | |
_median,MEDIAN(_m), | |
_stddev,STDEV.S(_m), | |
_stderr,_stddev/SQRT(_cnt), | |
_mode,MODE.SNGL(_m), | |
_var_s,VAR.S(_m), | |
_kurt,KURT(_m), | |
_skew,SKEW(_m), | |
_max,MAX(_m), | |
_min,MIN(_m), | |
_range,_max-_min, | |
_sum,SUM(_m), | |
_conf,CONFIDENCE.T(0.05,_stddev,_cnt), | |
_q_one,QUARTILE.EXC(_m,1), | |
_q_three,QUARTILE.EXC(_m,3), | |
_rows,ROWS(_m), | |
_txtm,FILTER(_m,_istxt), | |
_tfreqs,IF(_alltxt,GROUPAGGREGATE(CHOOSE({1,2},_txtm,_txtm),{"group","counta"}),#N/A), | |
_tvals,INDEX(_tfreqs,,1), | |
_tcounts,INDEX(_tfreqs,,2), | |
_dcount,COUNTA(UNIQUE(_m)), | |
_long,IF(_hastxt,MAX(LEN(_txtm)),#N/A), | |
_short,IF(_hastxt,MIN(LEN(_txtm)),#N/A), | |
_mosttxt,TEXTJOIN( | |
",", | |
TRUE, | |
INDEX( | |
_tvals, | |
XMATCH(MAX(_tcounts),_tcounts), | |
1 | |
) | |
)&" ("&MAX(_tcounts)&")", | |
_leasttxt,TEXTJOIN( | |
",", | |
TRUE, | |
INDEX( | |
_tvals, | |
XMATCH(MIN(_tcounts),_tcounts), | |
1 | |
) | |
)&" ("&MIN(_tcounts)&")", | |
_mintxt,INDEX(SORT(_tvals),1), | |
_maxtxt,INDEX(SORT(_tvals,,-1),1), | |
IF( | |
AND(_alltxt,r<16), | |
#N/A, | |
CHOOSE( | |
r-1, | |
_sum, | |
_mean, | |
_cnt, | |
_mode, | |
_stddev, | |
_var_s, | |
_stderr, | |
_kurt, | |
_skew, | |
_conf, | |
_q_one, | |
_median, | |
_q_three, | |
_range, | |
_dcount, | |
_rows, | |
IF(_alltxt,_mintxt,_min), | |
IF(_alltxt,_maxtxt,_max), | |
_long, | |
_short, | |
_mosttxt, | |
_leasttxt | |
) | |
) | |
) | |
) | |
) | |
) | |
) | |
); | |
*/ | |
DESCRIBE =LAMBDA(data,has_header,LET(rng,IF(has_header,INDEX(data,2,1):INDEX(data,ROWS(data),COLUMNS(data)),data),_header,IF(has_header,INDEX(data,1,1):INDEX(data,1,COLUMNS(data)),"Column"&SEQUENCE(1,COLUMNS(data))),_stats,{"Statistic";"Sum";"Mean";"Count";"Mode";"Standard Deviation";"Sample Variance";"Standard Error";"Kurtosis";"Skewness";"Confidence Level (95.0%)";"1st quartile";"Median";"3rd quartile";"Range";"Distinct count";"Rows";"Minimum";"Maximum";"Length of longest text";"Length of shortest text";"Most common text";"Least common text"},MAKEARRAY(COUNTA(_stats),COLUMNS(rng)+1,LAMBDA(r,c,IFS(c=1,INDEX(_stats,r,1),r=1,INDEX(_header,1,c-1),TRUE,LET(_m,INDEX(rng,,c-1),_istxt,ISTEXT(_m),_hastxt,OR(_istxt),_cnt,COUNT(_m),_alltxt,SUM(N(_istxt))=ROWS(_m),_mean,AVERAGE(_m),_median,MEDIAN(_m),_stddev,STDEV.S(_m),_stderr,_stddev/SQRT(_cnt),_mode,MODE.SNGL(_m),_var_s,VAR.S(_m),_kurt,KURT(_m),_skew,SKEW(_m),_max,MAX(_m),_min,MIN(_m),_range,_max-_min,_sum,SUM(_m),_conf,CONFIDENCE.T(0.05,_stddev,_cnt),_q_one,QUARTILE.EXC(_m,1),_q_three,QUARTILE.EXC(_m,3),_rows,ROWS(_m),_txtm,FILTER(_m,_istxt),_tfreqs,IF(_alltxt,GROUPAGGREGATE(CHOOSE({1,2},_txtm,_txtm),{"group","counta"}),#N/A),_tvals,INDEX(_tfreqs,,1),_tcounts,INDEX(_tfreqs,,2),_dcount,COUNTA(UNIQUE(_m)),_long,IF(_hastxt,MAX(LEN(_txtm)),#N/A),_short,IF(_hastxt,MIN(LEN(_txtm)),#N/A),_mosttxt,TEXTJOIN(",",TRUE,INDEX(_tvals,XMATCH(MAX(_tcounts),_tcounts),1))&" ("&MAX(_tcounts)&")",_leasttxt,TEXTJOIN(",",TRUE,INDEX(_tvals,XMATCH(MIN(_tcounts),_tcounts),1))&" ("&MIN(_tcounts)&")",_mintxt,INDEX(SORT(_tvals),1),_maxtxt,INDEX(SORT(_tvals,,-1),1),IF(AND(_alltxt,r<16),#N/A,CHOOSE(r-1,_sum,_mean,_cnt,_mode,_stddev,_var_s,_stderr,_kurt,_skew,_conf,_q_one,_median,_q_three,_range,_dcount,_rows,IF(_alltxt,_mintxt,_min),IF(_alltxt,_maxtxt,_max),_long,_short,_mosttxt,_leasttxt)))))))); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment