Skip to content

Instantly share code, notes, and snippets.

@zarzen
Created January 11, 2020 04:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zarzen/c2dca46542dc8e6f715ff210bbfb5c11 to your computer and use it in GitHub Desktop.
Save zarzen/c2dca46542dc8e6f715ff210bbfb5c11 to your computer and use it in GitHub Desktop.
mimic distributed training results
{
"folder": "20191220-013118-40Gbit-100Gbit-8p3dn-resnet101-imagenet",
"reduced_params": 178054802.176,
"actual_model_size": 178618432,
"missed_param_percent": 0.003155496427154847,
"mimic_coll_ops_time": 129155.102,
"horovod_coll_ops_time": 142147.65263157897,
"mimic_dur * (model_size/reduced_size)": 129563.94055149835,
"horovod_overhead": 0.09712356714775058
}
{
"folder": "20191220-024540-1Gbit-100Gbit-4p3dn-resnet101-imagenet",
"reduced_params": 178049481.984,
"actual_model_size": 178618432,
"missed_param_percent": 0.0031852816623090872,
"mimic_coll_ops_time": 2839662.96,
"horovod_coll_ops_time": 2801185.9684210527,
"mimic_dur * (model_size/reduced_size)": 2848736.9896940137,
"horovod_overhead": -0.016691966104623986
}
{
"folder": "20191219-164914-25Gbit-100Gbit-8p3dn-vgg16-CIFAR10",
"reduced_params": 513848183.7714286,
"actual_model_size": 553497812,
"missed_param_percent": 0.07163466118375805,
"mimic_coll_ops_time": 339328.77857142856,
"horovod_coll_ops_time": 367171.37391304347,
"mimic_dur * (model_size/reduced_size)": 365512.1150948036,
"horovod_overhead": 0.004539545338488935
}
{
"folder": "20191219-072606-25Gbit-100Gbit-8p3dn-ResNet101-CIFAR10",
"reduced_params": 177772844.8,
"actual_model_size": 178618432,
"missed_param_percent": 0.004734042229191599,
"mimic_coll_ops_time": 137473.0,
"horovod_coll_ops_time": 140149.6,
"mimic_dur * (model_size/reduced_size)": 138126.89857082153,
"horovod_overhead": 0.014643790964012537
}
{
"folder": "20191220-034144-40Gbit-100Gbit-4p3dn-ResNet50-imagenet",
"reduced_params": 102123143.936,
"actual_model_size": 102440820,
"missed_param_percent": 0.0031010691245930634,
"mimic_coll_ops_time": 60696.171,
"horovod_coll_ops_time": 82135.21684210526,
"mimic_dur * (model_size/reduced_size)": 60884.97953017251,
"horovod_overhead": 0.34902265675234173
}
{
"folder": "20191219-164534--100Gbit-8p3dn-vgg16-CIFAR10",
"reduced_params": 530454960.45714283,
"actual_model_size": 553497812,
"missed_param_percent": 0.04163133266885827,
"mimic_coll_ops_time": 165860.26428571428,
"horovod_coll_ops_time": 271983.93043478264,
"mimic_dur * (model_size/reduced_size)": 173065.1991655787,
"horovod_overhead": 0.5715691643735045
}
{
"folder": "20191220-025411-25Gbit-100Gbit-2p3dn-resnet101-imagenet",
"reduced_params": 178073714.432,
"actual_model_size": 178618432,
"missed_param_percent": 0.003049615663404708,
"mimic_coll_ops_time": 102683.5358,
"horovod_coll_ops_time": 134275.93263157894,
"mimic_dur * (model_size/reduced_size)": 102997.63901322844,
"horovod_overhead": 0.3036797145838778
}
{
"folder": "20191220-040515-40Gbit-100Gbit-2p3dn-ResNet50-imagenet",
"reduced_params": 102146874.624,
"actual_model_size": 102440820,
"missed_param_percent": 0.0028694164689427714,
"mimic_coll_ops_time": 50611.0904,
"horovod_coll_ops_time": 81384.06315789474,
"mimic_dur * (model_size/reduced_size)": 50756.732604444915,
"horovod_overhead": 0.6034141478753835
}
{
"folder": "20191219-172457-10Gbit-100Gbit-4p3dn-vgg16-CIFAR10",
"reduced_params": 515235125.6615385,
"actual_model_size": 553497812,
"missed_param_percent": 0.06912888453922474,
"mimic_coll_ops_time": 811357.7461538462,
"horovod_coll_ops_time": 876131.1531914894,
"mimic_dur * (model_size/reduced_size)": 871611.2603324567,
"horovod_overhead": 0.005185675156730667
}
{
"folder": "20191220-024834--100Gbit-2p3dn-resnet101-imagenet",
"reduced_params": 178068861.184,
"actual_model_size": 178618432,
"missed_param_percent": 0.0030767867002662666,
"mimic_coll_ops_time": 106282.8534,
"horovod_coll_ops_time": 140156.67368421052,
"mimic_dur * (model_size/reduced_size)": 106610.87231403962,
"horovod_overhead": 0.3146564758550732
}
Error when drawing 20191220-085205-40Gbit-100Gbit-p3dn-vgg16-CIFAR10
'model_log'
{
"folder": "20191219-045621-25Gbit-100Gbit-8p3dn-ResNet101-CIFAR10",
"reduced_params": 177785109.02857143,
"actual_model_size": 178618432,
"missed_param_percent": 0.004665380622244927,
"mimic_coll_ops_time": 137156.05714285714,
"horovod_coll_ops_time": 140979.0,
"mimic_dur * (model_size/reduced_size)": 137798.94165502032,
"horovod_overhead": 0.023077523722504052
}
{
"folder": "20191220-061036-10Gbit-100Gbit-8p3dn-vgg16-imagenet",
"reduced_params": 509510824.704,
"actual_model_size": 553497812,
"missed_param_percent": 0.0794709325716359,
"mimic_coll_ops_time": 849864.102,
"horovod_coll_ops_time": 899652.6989473684,
"mimic_dur * (model_size/reduced_size)": 923234.4008149821,
"horovod_overhead": -0.025542486119231517
}
{
"folder": "20191220-020719-1Gbit-100Gbit-8p3dn-resnet101-imagenet",
"reduced_params": 178063679.744,
"actual_model_size": 178618432,
"missed_param_percent": 0.00310579512869093,
"mimic_coll_ops_time": 3053031.58,
"horovod_coll_ops_time": 2858647.227368421,
"mimic_dur * (model_size/reduced_size)": 3062543.2117885784,
"horovod_overhead": -0.06657734122258432
}
{
"folder": "20191219-134443-10Gbit-100Gbit-2p3dn-ResNet101-CIFAR10",
"reduced_params": 177632248.064,
"actual_model_size": 178618432,
"missed_param_percent": 0.005521176761869623,
"mimic_coll_ops_time": 158605.828,
"horovod_coll_ops_time": 161865.73684210525,
"mimic_dur * (model_size/reduced_size)": 159486.38049783936,
"horovod_overhead": 0.014918868538107722
}
{
"folder": "20191220-041756-1Gbit-100Gbit-2p3dn-ResNet50-imagenet",
"reduced_params": 102131559.68,
"actual_model_size": 102440820,
"missed_param_percent": 0.003018916873176072,
"mimic_coll_ops_time": 895671.456,
"horovod_coll_ops_time": 888573.2947368422,
"mimic_dur * (model_size/reduced_size)": 898383.60141289,
"horovod_overhead": -0.01091995296955461
}
{
"folder": "20191220-031218-1Gbit-100Gbit-2p3dn-resnet101-imagenet",
"reduced_params": 178011665.152,
"actual_model_size": 178618432,
"missed_param_percent": 0.0033970001931267093,
"mimic_coll_ops_time": 1793071.76,
"horovod_coll_ops_time": 1613949.3915789474,
"mimic_dur * (model_size/reduced_size)": 1799183.5869924838,
"horovod_overhead": -0.10295458270780136
}
{
"folder": "20191220-060123-25Gbit-100Gbit-8p3dn-vgg16-imagenet",
"reduced_params": 508950553.856,
"actual_model_size": 553497812,
"missed_param_percent": 0.08048316936074897,
"mimic_coll_ops_time": 333436.866,
"horovod_coll_ops_time": 370881.2947368421,
"mimic_dur * (model_size/reduced_size)": 362621.8192963294,
"horovod_overhead": 0.02277710551598986
}
{
"folder": "20191220-013905-10Gbit-100Gbit-8p3dn-resnet101-imagenet",
"reduced_params": 178005494.016,
"actual_model_size": 178618432,
"missed_param_percent": 0.0034315494606961803,
"mimic_coll_ops_time": 327455.35,
"horovod_coll_ops_time": 300077.4694736842,
"mimic_dur * (model_size/reduced_size)": 328582.8984680319,
"horovod_overhead": -0.08675262506737248
}
{
"folder": "20191220-055236--100Gbit-8p3dn-vgg16-imagenet",
"reduced_params": 513307941.632,
"actual_model_size": 553497812,
"missed_param_percent": 0.07261071226782008,
"mimic_coll_ops_time": 155205.942,
"horovod_coll_ops_time": 214117.28631578945,
"mimic_dur * (model_size/reduced_size)": 167357.9197572334,
"horovod_overhead": 0.2793973934808964
}
{
"folder": "20191220-022031-10Gbit-100Gbit-4p3dn-resnet101-imagenet",
"reduced_params": 178027628.8,
"actual_model_size": 178618432,
"missed_param_percent": 0.003307627288991027,
"mimic_coll_ops_time": 291836.452,
"horovod_coll_ops_time": 291303.80842105264,
"mimic_dur * (model_size/reduced_size)": 292804.94161523797,
"horovod_overhead": -0.005126734493975517
}
{
"folder": "20191220-080447-40Gbit-100Gbit-2p3dn-vgg16-imagenet",
"reduced_params": 507204569.856,
"actual_model_size": 553497812,
"missed_param_percent": 0.08363762446815236,
"mimic_coll_ops_time": 179785.1,
"horovod_coll_ops_time": 255238.22315789474,
"mimic_dur * (model_size/reduced_size)": 196194.3274849696,
"horovod_overhead": 0.3009459877347803
}
{
"folder": "20191219-182813--100Gbit-2p3dn-ResNet50-CIFAR10",
"reduced_params": 101585924.864,
"actual_model_size": 102440820,
"missed_param_percent": 0.008345258618585906,
"mimic_coll_ops_time": 45662.3712,
"horovod_coll_ops_time": 56766.616842105264,
"mimic_dur * (model_size/reduced_size)": 46046.64233883511,
"horovod_overhead": 0.2328068662289644
}
{
"folder": "20191220-072601-1Gbit-100Gbit-8p3dn-vgg16-imagenet_deprecated",
"reduced_params": 524425261.824,
"actual_model_size": 553497812,
"missed_param_percent": 0.05252514020055422,
"mimic_coll_ops_time": 8420971.4,
"horovod_coll_ops_time": 8822852.661052631,
"mimic_dur * (model_size/reduced_size)": 8887804.581730524,
"horovod_overhead": -0.007307982537263013
}
{
"folder": "20191219-134201-25Gbit-100Gbit-2p3dn-ResNet101-CIFAR10",
"reduced_params": 177684748.544,
"actual_model_size": 178618432,
"missed_param_percent": 0.0052272514406575925,
"mimic_coll_ops_time": 81701.5158,
"horovod_coll_ops_time": 96824.57473684211,
"mimic_dur * (model_size/reduced_size)": 82130.83432203226,
"horovod_overhead": 0.17890650370354444
}
{
"folder": "20191219-164717-40Gbit-100Gbit-8p3dn-vgg16-CIFAR10",
"reduced_params": 510389071.54285717,
"actual_model_size": 553497812,
"missed_param_percent": 0.07788421114326434,
"mimic_coll_ops_time": 221143.58571428573,
"horovod_coll_ops_time": 255301.2695652174,
"mimic_dur * (model_size/reduced_size)": 239821.92734002045,
"horovod_overhead": 0.06454514979879332
}
{
"folder": "20191219-133605-40Gbit-100Gbit-2p3dn-ResNet101-CIFAR10",
"reduced_params": 177691615.488,
"actual_model_size": 178618432,
"missed_param_percent": 0.0051888066736583745,
"mimic_coll_ops_time": 78246.224,
"horovod_coll_ops_time": 96935.87157894738,
"mimic_dur * (model_size/reduced_size)": 78654.34619645642,
"horovod_overhead": 0.23242867389462313
}
{
"folder": "20191219-073101-10Gbit-100Gbit-8p3dn-ResNet101-CIFAR10",
"reduced_params": 177673849.6,
"actual_model_size": 178618432,
"missed_param_percent": 0.005288269465941824,
"mimic_coll_ops_time": 306333.55,
"horovod_coll_ops_time": 298372.93043478264,
"mimic_dur * (model_size/reduced_size)": 307962.136764518,
"horovod_overhead": -0.03113761461223947
}
{
"folder": "20191219-183129-25Gbit-100Gbit-2p3dn-ResNet50-CIFAR10",
"reduced_params": 101650496.256,
"actual_model_size": 102440820,
"missed_param_percent": 0.007714929888300413,
"mimic_coll_ops_time": 49725.175200000005,
"horovod_coll_ops_time": 59778.70947368421,
"mimic_dur * (model_size/reduced_size)": 50111.784100916215,
"horovod_overhead": 0.19290722823399242
}
{
"folder": "20191220-031915-25Gbit-100Gbit-8p3dn-ResNet50-imagenet",
"reduced_params": 102152894.208,
"actual_model_size": 102440820,
"missed_param_percent": 0.002810654893234901,
"mimic_coll_ops_time": 81772.0652,
"horovod_coll_ops_time": 91176.98105263159,
"mimic_dur * (model_size/reduced_size)": 82002.54605733376,
"horovod_overhead": 0.11187987978913884
}
{
"folder": "20191219-143126--100Gbit-2p3dn-VGG16-CIFAR10",
"reduced_params": 504209755.904,
"actual_model_size": 553497812,
"missed_param_percent": 0.08904833050360823,
"mimic_coll_ops_time": 160237.662,
"horovod_coll_ops_time": 196036.4989473684,
"mimic_dur * (model_size/reduced_size)": 175901.38683052792,
"horovod_overhead": 0.11446818288158044
}
{
"folder": "20191219-101417-1Gbit-100Gbit-4p3dn-ResNet101-CIFAR10",
"reduced_params": 177393733.41538462,
"actual_model_size": 178618432,
"missed_param_percent": 0.0068565073094773304,
"mimic_coll_ops_time": 2822042.8846153845,
"horovod_coll_ops_time": 2799990.1829787237,
"mimic_dur * (model_size/reduced_size)": 2841525.8272197857,
"horovod_overhead": -0.014617373469978778
}
{
"folder": "20191219-181250-1Gbit-100Gbit-8p3dn-ResNet50-CIFAR10",
"reduced_params": 101477927.31428571,
"actual_model_size": 102440820,
"missed_param_percent": 0.009399501934036545,
"mimic_coll_ops_time": 1644554.857142857,
"horovod_coll_ops_time": 1643910.339130435,
"mimic_dur * (model_size/reduced_size)": 1660159.5298545337,
"horovod_overhead": -0.009787728487468072
}
{
"folder": "20191219-145430-10Gbit-100Gbit-2p3dn-VGG16-CIFAR10",
"reduced_params": 506358925.056,
"actual_model_size": 553497812,
"missed_param_percent": 0.08516544405779874,
"mimic_coll_ops_time": 415073.228,
"horovod_coll_ops_time": 460868.2442105263,
"mimic_dur * (model_size/reduced_size)": 453713.98063610547,
"horovod_overhead": 0.015768223770381885
}
{
"folder": "20191219-182639-1Gbit-100Gbit-4p3dn-ResNet50-CIFAR10",
"reduced_params": 101427914.33846153,
"actual_model_size": 102440820,
"missed_param_percent": 0.009887715283209045,
"mimic_coll_ops_time": 1594383.8846153845,
"horovod_coll_ops_time": 1608110.510638298,
"mimic_dur * (model_size/reduced_size)": 1610306.1331790644,
"horovod_overhead": -0.001363481449599759
}
{
"folder": "20191219-051040-1Gbit-100Gbit-8p3dn-ResNet101-CIFAR10",
"reduced_params": 177539500.8,
"actual_model_size": 178618432,
"missed_param_percent": 0.006040424764225834,
"mimic_coll_ops_time": 2859218.4285714286,
"horovod_coll_ops_time": 2858150.0086956522,
"mimic_dur * (model_size/reduced_size)": 2876594.279896345,
"horovod_overhead": -0.0064118431054369495
}
{
"folder": "20191219-160017-10Gbit-100Gbit-4p3dn-vgg16-CIFAR10",
"reduced_params": 533990293.16923076,
"actual_model_size": 553497812,
"missed_param_percent": 0.03524407578104255,
"mimic_coll_ops_time": 843209.6653846154,
"horovod_coll_ops_time": 876218.3531914894,
"mimic_dur * (model_size/reduced_size)": 874013.4620756614,
"horovod_overhead": 0.002522719856730398
}
{
"folder": "20191219-133233--100Gbit-2p3dn-ResNet101-CIFAR10",
"reduced_params": 177681735.936,
"actual_model_size": 178618432,
"missed_param_percent": 0.005244117605959112,
"mimic_coll_ops_time": 73769.6988,
"horovod_coll_ops_time": 94086.58105263158,
"mimic_dur * (model_size/reduced_size)": 74158.59519469368,
"horovod_overhead": 0.26872118876604356
}
{
"folder": "20191220-075449-10Gbit-100Gbit-4p3dn-vgg16-imagenet",
"reduced_params": 518591482.624,
"actual_model_size": 553497812,
"missed_param_percent": 0.06306498168415521,
"mimic_coll_ops_time": 819387.386,
"horovod_coll_ops_time": 876279.1642105263,
"mimic_dur * (model_size/reduced_size)": 874540.2509054061,
"horovod_overhead": 0.001988374238143879
}
{
"folder": "20191220-055633-40Gbit-100Gbit-8p3dn-vgg16-imagenet",
"reduced_params": 508814180.096,
"actual_model_size": 553497812,
"missed_param_percent": 0.08072955472496066,
"mimic_coll_ops_time": 218179.172,
"horovod_coll_ops_time": 265339.0042105263,
"mimic_dur * (model_size/reduced_size)": 237339.48276203123,
"horovod_overhead": 0.11797245499421953
}
{
"folder": "20191220-034348-25Gbit-100Gbit-4p3dn-ResNet50-imagenet",
"reduced_params": 102137995.008,
"actual_model_size": 102440820,
"missed_param_percent": 0.0029560969152726295,
"mimic_coll_ops_time": 78700.7112,
"horovod_coll_ops_time": 84014.12842105262,
"mimic_dur * (model_size/reduced_size)": 78934.04789549386,
"horovod_overhead": 0.06435854565934114
}
{
"folder": "20191219-144639-40Gbit-100Gbit-2p3dn-VGG16-CIFAR10",
"reduced_params": 503141678.848,
"actual_model_size": 553497812,
"missed_param_percent": 0.09097801664299987,
"mimic_coll_ops_time": 180060.816,
"horovod_coll_ops_time": 207720.4989473684,
"mimic_dur * (model_size/reduced_size)": 198081.91583556536,
"horovod_overhead": 0.048659581421881884
}
{
"folder": "20191219-171733--100Gbit-4p3dn-vgg16-CIFAR10",
"reduced_params": 499935523.93846154,
"actual_model_size": 553497812,
"missed_param_percent": 0.0967705506693827,
"mimic_coll_ops_time": 198153.61538461538,
"horovod_coll_ops_time": 193355.3659574468,
"mimic_dur * (model_size/reduced_size)": 219383.47507542727,
"horovod_overhead": -0.11864206777211281
}
{
"folder": "20191220-075929--100Gbit-2p3dn-vgg16-imagenet",
"reduced_params": 514334539.008,
"actual_model_size": 553497812,
"missed_param_percent": 0.07075596712927924,
"mimic_coll_ops_time": 166949.868,
"horovod_coll_ops_time": 242120.16631578948,
"mimic_dur * (model_size/reduced_size)": 179662.02858923987,
"horovod_overhead": 0.3476423939826887
}
{
"folder": "20191219-181651-25Gbit-100Gbit-4p3dn-ResNet50-CIFAR10",
"reduced_params": 101576609.96923077,
"actual_model_size": 102440820,
"missed_param_percent": 0.008436188140325594,
"mimic_coll_ops_time": 77973.92153846152,
"horovod_coll_ops_time": 75312.71489361703,
"mimic_dur * (model_size/reduced_size)": 78637.32077133967,
"horovod_overhead": -0.042277710444762966
}
{
"folder": "20191220-031653-40Gbit-100Gbit-8p3dn-ResNet50-imagenet",
"reduced_params": 102151192.32,
"actual_model_size": 102440820,
"missed_param_percent": 0.0028272682705976697,
"mimic_coll_ops_time": 65812.7598,
"horovod_coll_ops_time": 86527.35578947367,
"mimic_dur * (model_size/reduced_size)": 65999.35768987641,
"horovod_overhead": 0.3110333012035666
}
{
"folder": "20191220-011225--100Gbit-8p3dn-resnet101-imagenet",
"reduced_params": 178063852.544,
"actual_model_size": 178618432,
"missed_param_percent": 0.0031048277033357916,
"mimic_coll_ops_time": 118285.9519,
"horovod_coll_ops_time": 176645.3887179487,
"mimic_dur * (model_size/reduced_size)": 118654.35322300819,
"horovod_overhead": 0.48873921537415194
}
{
"folder": "20191219-140204-1Gbit-100Gbit-2p3dn-ResNet101-CIFAR10",
"reduced_params": 177355406.08,
"actual_model_size": 178618432,
"missed_param_percent": 0.007071083906950806,
"mimic_coll_ops_time": 2182359.68,
"horovod_coll_ops_time": 1689813.5894736843,
"mimic_dur * (model_size/reduced_size)": 2197901.22397391,
"horovod_overhead": -0.23116945791657503
}
{
"folder": "20191220-013433-25Gbit-100Gbit-8p3dn-resnet101-imagenet",
"reduced_params": 178053317.888,
"actual_model_size": 178618432,
"missed_param_percent": 0.0031638062526491584,
"mimic_coll_ops_time": 149081.4,
"horovod_coll_ops_time": 147327.04421052634,
"mimic_dur * (model_size/reduced_size)": 149554.5616572835,
"horovod_overhead": -0.014894346398217635
}
{
"folder": "20191219-085008-1Gbit-100Gbit-8p3dn-ResNet101-CIFAR10",
"reduced_params": 177499572.1142857,
"actual_model_size": 178618432,
"missed_param_percent": 0.006263966563732306,
"mimic_coll_ops_time": 2866090.5714285714,
"horovod_coll_ops_time": 2857906.2521739127,
"mimic_dur * (model_size/reduced_size)": 2884156.8333975333,
"horovod_overhead": -0.009101648329122731
}
{
"folder": "20191219-155529-25Gbit-100Gbit-4p3dn-vgg16-CIFAR10",
"reduced_params": 523690167.63076925,
"actual_model_size": 553497812,
"missed_param_percent": 0.053853228907128445,
"mimic_coll_ops_time": 333064.1230769231,
"horovod_coll_ops_time": 357040.144680851,
"mimic_dur * (model_size/reduced_size)": 352021.62418438384,
"horovod_overhead": 0.014256284704369633
}
{
"folder": "20191220-040317--100Gbit-2p3dn-Resnet50-imagenet",
"reduced_params": 102137044.224,
"actual_model_size": 102440820,
"missed_param_percent": 0.0029653782154417845,
"mimic_coll_ops_time": 48564.813,
"horovod_coll_ops_time": 80858.78105263159,
"mimic_dur * (model_size/reduced_size)": 48709.25436177482,
"horovod_overhead": 0.6600291281832164
}
{
"folder": "20191220-040713-25Gbit-100Gbit-2p3dn-ResNet50-imagenet",
"reduced_params": 102143127.296,
"actual_model_size": 102440820,
"missed_param_percent": 0.0029059968867878663,
"mimic_coll_ops_time": 53642.3094,
"horovod_coll_ops_time": 81703.0,
"mimic_dur * (model_size/reduced_size)": 53798.64810390334,
"horovod_overhead": 0.5186812843736137
}
{
"folder": "20191219-180750-10Gbit-100Gbit-8p3dn-ResNet50-CIFAR10",
"reduced_params": 101584507.42857143,
"actual_model_size": 102440820,
"missed_param_percent": 0.008359095245709349,
"mimic_coll_ops_time": 183660.92142857143,
"horovod_coll_ops_time": 176353.42608695652,
"mimic_dur * (model_size/reduced_size)": 185209.1019521618,
"horovod_overhead": -0.04781447440683911
}
{
"folder": "20191220-040918-10Gbit-100Gbit-2p3dn-ResNet50-imagenet",
"reduced_params": 102144322.816,
"actual_model_size": 102440820,
"missed_param_percent": 0.00289432653897148,
"mimic_coll_ops_time": 95426.874,
"horovod_coll_ops_time": 101494.79157894736,
"mimic_dur * (model_size/reduced_size)": 95703.87225735681,
"horovod_overhead": 0.060508725352493804
}
{
"folder": "20191220-021621-25Gbit-100Gbit-4p3dn-resnet101-imagenet",
"reduced_params": 178051158.784,
"actual_model_size": 178618432,
"missed_param_percent": 0.0031758940533079517,
"mimic_coll_ops_time": 127240.498,
"horovod_coll_ops_time": 138672.46315789473,
"mimic_dur * (model_size/reduced_size)": 127645.88781604421,
"horovod_overhead": 0.08638410159942932
}
{
"folder": "20191219-184155-1Gbit-100Gbit-2p3dn-ResNet50-CIFAR10",
"reduced_params": 101569349.888,
"actual_model_size": 102440820,
"missed_param_percent": 0.008507059119597085,
"mimic_coll_ops_time": 1067356.34,
"horovod_coll_ops_time": 886416.4442105263,
"mimic_dur * (model_size/reduced_size)": 1076514.3108857977,
"horovod_overhead": -0.17658647428370136
}
{
"folder": "20191219-155254-40Gbit-100Gbit-4p3dn-ResNet50-CIFAR10",
"reduced_params": 101708512.0,
"actual_model_size": 102440820,
"missed_param_percent": 0.007148595647711527,
"mimic_coll_ops_time": 61392.70192307692,
"horovod_coll_ops_time": 63249.165957446814,
"mimic_dur * (model_size/reduced_size)": 61834.733429347354,
"horovod_overhead": 0.022874401645405277
}
{
"folder": "20191219-084034-100Mbit-100Gbit-8p3dn-ResNet101-CIFAR10",
"reduced_params": 177626065.37142858,
"actual_model_size": 178618432,
"missed_param_percent": 0.0055557907292088475,
"mimic_coll_ops_time": 28581331.42857143,
"horovod_coll_ops_time": 28434766.88695652,
"mimic_dur * (model_size/reduced_size)": 28741010.468078073,
"horovod_overhead": -0.010655282334686524
}
{
"folder": "20191219-144937-25Gbit-100Gbit-2p3dn-VGG16-CIFAR10",
"reduced_params": 502650750.208,
"actual_model_size": 553497812,
"missed_param_percent": 0.09186497342829604,
"mimic_coll_ops_time": 187145.938,
"horovod_coll_ops_time": 224425.56421052633,
"mimic_dur * (model_size/reduced_size)": 206077.21596918654,
"horovod_overhead": 0.08903627776145473
}
{
"folder": "20191219-181408--100Gbit-4p3dn-ResNet50-CIFAR10",
"reduced_params": 101776059.56923077,
"actual_model_size": 102440820,
"missed_param_percent": 0.006489214267996243,
"mimic_coll_ops_time": 45917.37384615385,
"horovod_coll_ops_time": 53957.268085106385,
"mimic_dur * (model_size/reduced_size)": 46217.287729113705,
"horovod_overhead": 0.1674693764237711
}
{
"folder": "20191219-061910-100Mbit-100Gbit-8p3dn-ResNet101-CIFAR10",
"reduced_params": 177558091.8857143,
"actual_model_size": 178618432,
"missed_param_percent": 0.005936342080786531,
"mimic_coll_ops_time": 28570182.14285714,
"horovod_coll_ops_time": 28432763.51304348,
"mimic_dur * (model_size/reduced_size)": 28740797.347586982,
"horovod_overhead": -0.010717650969045296
}
{
"folder": "20191220-021322-40Gbit-100Gbit-4p3dn-resnet101-imagenet",
"reduced_params": 178096748.288,
"actual_model_size": 178618432,
"missed_param_percent": 0.0029206600134078684,
"mimic_coll_ops_time": 107678.3936,
"horovod_coll_ops_time": 137959.36631578946,
"mimic_dur * (model_size/reduced_size)": 107993.80679319659,
"horovod_overhead": 0.277474796123963
}
{
"folder": "20191218-170239--100Gbit-8p3dn-ResNet101-CIFAR10",
"reduced_params": 177691188.1142857,
"actual_model_size": 178618432,
"missed_param_percent": 0.005191199336663601,
"mimic_coll_ops_time": 99774.35857142857,
"horovod_coll_ops_time": 125038.10434782608,
"mimic_dur * (model_size/reduced_size)": 100295.00996060674,
"horovod_overhead": 0.24670314502125068
}
{
"folder": "20191219-183323-10Gbit-100Gbit-2p3dn-ResNet50-CIFAR10",
"reduced_params": 101361246.976,
"actual_model_size": 102440820,
"missed_param_percent": 0.01053850431888386,
"mimic_coll_ops_time": 93468.64940000001,
"horovod_coll_ops_time": 93880.23789473684,
"mimic_dur * (model_size/reduced_size)": 94464.1603619542,
"horovod_overhead": -0.006181418063527675
}
{
"folder": "20191220-104807-10Gbit-100Gbit-8p3dn-vgg16-imagenet",
"reduced_params": 514520484.096,
"actual_model_size": 553497812,
"missed_param_percent": 0.07042002164951645,
"mimic_coll_ops_time": 859817.786,
"horovod_coll_ops_time": 899868.2757894737,
"mimic_dur * (model_size/reduced_size)": 924952.9960033403,
"horovod_overhead": -0.0271199945535136
}
{
"folder": "20191219-095214-40Gbit-100Gbit-4p3dn-ResNet101-CIFAR10",
"reduced_params": 177705184.0,
"actual_model_size": 178618432,
"missed_param_percent": 0.005112843001555405,
"mimic_coll_ops_time": 98111.87653846154,
"horovod_coll_ops_time": 109390.27659574468,
"mimic_dur * (model_size/reduced_size)": 98616.08509900076,
"horovod_overhead": 0.10925389591290005
}
{
"folder": "20191219-171420-1Gbit-100Gbit-8p3dn-vgg16-CIFAR10",
"reduced_params": 509700038.4,
"actual_model_size": 553497812,
"missed_param_percent": 0.07912908172435562,
"mimic_coll_ops_time": 8190279.785714285,
"horovod_coll_ops_time": 8823811.460869566,
"mimic_dur * (model_size/reduced_size)": 8894058.46483979,
"horovod_overhead": -0.007898194536040728
}
{
"folder": "20191219-092049--100Gbit-4p3dn-ResNet101-CIFAR10",
"reduced_params": 177744544.9846154,
"actual_model_size": 178618432,
"missed_param_percent": 0.004892479491616041,
"mimic_coll_ops_time": 77394.40076923076,
"horovod_coll_ops_time": 92492.77872340426,
"mimic_dur * (model_size/reduced_size)": 77774.91293573106,
"horovod_overhead": 0.18923667326809152
}
{
"folder": "20191220-031435--100Gbit-8p3dn-ResNet50-imagenet",
"reduced_params": 102128409.856,
"actual_model_size": 102440820,
"missed_param_percent": 0.003049664616116835,
"mimic_coll_ops_time": 53704.354999999996,
"horovod_coll_ops_time": 83927.69894736844,
"mimic_dur * (model_size/reduced_size)": 53868.63627396317,
"horovod_overhead": 0.558006750357146
}
{
"folder": "20191220-081858-10Gbit-100Gbit-2p3dn-vgg16-imagenet",
"reduced_params": 506617340.16,
"actual_model_size": 553497812,
"missed_param_percent": 0.08469856758891754,
"mimic_coll_ops_time": 533210.048,
"horovod_coll_ops_time": 464831.6547368421,
"mimic_dur * (model_size/reduced_size)": 582551.3094581538,
"horovod_overhead": -0.2020760279996724
}
{
"folder": "20191220-033735-1Gbit-100Gbit-8p3dn-ResNet50-imagenet",
"reduced_params": 101999431.424,
"actual_model_size": 102440820,
"missed_param_percent": 0.004308717716238556,
"mimic_coll_ops_time": 1662349.8,
"horovod_coll_ops_time": 1643011.4484210527,
"mimic_dur * (model_size/reduced_size)": 1669543.3911876392,
"horovod_overhead": -0.01589173597202101
}
{
"folder": "20191220-032219-10Gbit-100Gbit-8p3dn-ResNet50-imagenet",
"reduced_params": 102147308.8,
"actual_model_size": 102440820,
"missed_param_percent": 0.0028651781584724035,
"mimic_coll_ops_time": 177863.616,
"horovod_coll_ops_time": 178440.60842105263,
"mimic_dur * (model_size/reduced_size)": 178374.6912694495,
"horovod_overhead": 0.00036954318538131014
}
Error when drawing 20191219-132555-40Gbit-100Gbit-8p3dn-ResNet50-CIFAR10
'hook_log'
{
"folder": "20191219-171537-40Gbit-100Gbit-4p3dn-ResNet50-CIFAR10",
"reduced_params": 101685409.96923077,
"actual_model_size": 102440820,
"missed_param_percent": 0.007374111518916277,
"mimic_coll_ops_time": 61193.54538461538,
"horovod_coll_ops_time": 63504.80851063829,
"mimic_dur * (model_size/reduced_size)": 61648.14568583715,
"horovod_overhead": 0.030117091181668484
}
{
"folder": "20191219-153320-1Gbit-100Gbit-2p3dn-VGG16-CIFAR10",
"reduced_params": 503107012.352,
"actual_model_size": 553497812,
"missed_param_percent": 0.09104064831967214,
"mimic_coll_ops_time": 5176772.32,
"horovod_coll_ops_time": 4518513.650526316,
"mimic_dur * (model_size/reduced_size)": 5695273.733011353,
"horovod_overhead": -0.20662046069255915
}
{
"folder": "20191220-073846-40Gbit-100Gbit-4p3dn-vgg16-imagenet",
"reduced_params": 522592732.928,
"actual_model_size": 553497812,
"missed_param_percent": 0.05583595526841943,
"mimic_coll_ops_time": 220636.41,
"horovod_coll_ops_time": 252041.1347368421,
"mimic_dur * (model_size/reduced_size)": 233684.40180617705,
"horovod_overhead": 0.07855352256626236
}
{
"folder": "20191220-034635-10Gbit-100Gbit-4p3dn-ResNet50-imagenet",
"reduced_params": 102150848.768,
"actual_model_size": 102440820,
"missed_param_percent": 0.0028306219337173727,
"mimic_coll_ops_time": 172351.45,
"horovod_coll_ops_time": 170148.6589473684,
"mimic_dur * (model_size/reduced_size)": 172840.6966670247,
"horovod_overhead": -0.015575253812141546
}
{
"folder": "20191220-025124-40Gbit-100Gbit-2p3dn-resnet101-imagenet",
"reduced_params": 178066472.192,
"actual_model_size": 178618432,
"missed_param_percent": 0.0030901615349528894,
"mimic_coll_ops_time": 102906.58120000002,
"horovod_coll_ops_time": 135124.4294736842,
"mimic_dur * (model_size/reduced_size)": 103225.56486998504,
"horovod_overhead": 0.30902097405692575
}
{
"folder": "20191219-050038-10Gbit-100Gbit-8p3dn-ResNet101-CIFAR10",
"reduced_params": 177509232.45714286,
"actual_model_size": 178618432,
"missed_param_percent": 0.0062098828795963245,
"mimic_coll_ops_time": 305644.5,
"horovod_coll_ops_time": 297570.95652173914,
"mimic_dur * (model_size/reduced_size)": 307554.37665815436,
"horovod_overhead": -0.032460666776697386
}
{
"folder": "20191219-095727-10Gbit-100Gbit-4p3dn-ResNet101-CIFAR10",
"reduced_params": 177649446.8923077,
"actual_model_size": 178618432,
"missed_param_percent": 0.0054248886682215485,
"mimic_coll_ops_time": 293430.6,
"horovod_coll_ops_time": 288562.3106382979,
"mimic_dur * (model_size/reduced_size)": 295031.11093045946,
"horovod_overhead": -0.02192582426904232
}
Error when drawing 20191219-132358--100Gbit-8p3dn-ResNet50-CIFAR10
'hook_log'
{
"folder": "20191219-163902-1Gbit-100Gbit-4p3dn-vgg16-CIFAR10",
"reduced_params": 511642208.0,
"actual_model_size": 553497812,
"missed_param_percent": 0.07562017968735892,
"mimic_coll_ops_time": 8037567.961538462,
"horovod_coll_ops_time": 8675022.225531913,
"mimic_dur * (model_size/reduced_size)": 8695092.412142899,
"horovod_overhead": -0.0023082200463973293
}
{
"folder": "20191220-025711-10Gbit-100Gbit-2p3dn-resnet101-imagenet",
"reduced_params": 178065960.704,
"actual_model_size": 178618432,
"missed_param_percent": 0.003093025114003933,
"mimic_coll_ops_time": 165224.22,
"horovod_coll_ops_time": 171381.01894736843,
"mimic_dur * (model_size/reduced_size)": 165736.84823390332,
"horovod_overhead": 0.034055014160155425
}
{
"folder": "20191219-143922--100Gbit-8p3dn-ResNet50-CIFAR10",
"reduced_params": 101875157.02857143,
"actual_model_size": 102440820,
"missed_param_percent": 0.005521851264257482,
"mimic_coll_ops_time": 60273.357142857145,
"horovod_coll_ops_time": 66169.78260869565,
"mimic_dur * (model_size/reduced_size)": 60608.02564589407,
"horovod_overhead": 0.09176601454230611
}
{
"folder": "20191220-103445-1Gbit-100Gbit-8p3dn-vgg16-imagenet",
"reduced_params": 513078485.248,
"actual_model_size": 553497812,
"missed_param_percent": 0.07302526925255483,
"mimic_coll_ops_time": 8249364.22,
"horovod_coll_ops_time": 8823296.983157894,
"mimic_dur * (model_size/reduced_size)": 8899233.114314035,
"horovod_overhead": -0.008532884820603354
}
{
"folder": "20191219-180341-1Gbit-100Gbit-4p3dn-vgg16-CIFAR10",
"reduced_params": 515519929.6,
"actual_model_size": 553497812,
"missed_param_percent": 0.06861433157752027,
"mimic_coll_ops_time": 8086768.5,
"horovod_coll_ops_time": 8674140.714893619,
"mimic_dur * (model_size/reduced_size)": 8682513.349917484,
"horovod_overhead": -0.0009643100662719302
}
{
"folder": "20191219-045102-40Gbit-100Gbit-8p3dn-ResNet101-CIFAR10",
"reduced_params": 177760928.0,
"actual_model_size": 178618432,
"missed_param_percent": 0.004800758748122926,
"mimic_coll_ops_time": 114374.3607142857,
"horovod_coll_ops_time": 134073.68695652176,
"mimic_dur * (model_size/reduced_size)": 114926.09316141797,
"horovod_overhead": 0.16660788919545264
}
{
"folder": "20191219-165228-10Gbit-100Gbit-8p3dn-vgg16-CIFAR10",
"reduced_params": 531847649.82857144,
"actual_model_size": 553497812,
"missed_param_percent": 0.03911517209652233,
"mimic_coll_ops_time": 888347.1571428571,
"horovod_coll_ops_time": 902973.4869565218,
"mimic_dur * (model_size/reduced_size)": 924509.5055576139,
"horovod_overhead": -0.023294534530613403
}
{
"folder": "20191219-155137--100Gbit-4p3dn-vgg16-CIFAR10",
"reduced_params": 518097148.5538462,
"actual_model_size": 553497812,
"missed_param_percent": 0.06395809103244264,
"mimic_coll_ops_time": 161321.06923076924,
"horovod_coll_ops_time": 202291.94468085107,
"mimic_dur * (model_size/reduced_size)": 172343.85307459618,
"horovod_overhead": 0.17376942125862974
}
{
"folder": "20191219-182951-40Gbit-100Gbit-2p3dn-ResNet50-CIFAR10",
"reduced_params": 101540593.92,
"actual_model_size": 102440820,
"missed_param_percent": 0.008787767220137424,
"mimic_coll_ops_time": 49116.6512,
"horovod_coll_ops_time": 58405.56421052631,
"mimic_dur * (model_size/reduced_size)": 49552.10355127676,
"horovod_overhead": 0.17866972388140784
}
{
"folder": "20191220-021023--100Gbit-4p3dn-resnet101-imagenet",
"reduced_params": 178030377.216,
"actual_model_size": 178618432,
"missed_param_percent": 0.003292240209565882,
"mimic_coll_ops_time": 107237.28379999999,
"horovod_coll_ops_time": 140611.5642105263,
"mimic_dur * (model_size/reduced_size)": 107591.50086535644,
"horovod_overhead": 0.30690215379086727
}
{
"folder": "20191219-180618-25Gbit-100Gbit-8p3dn-ResNet50-CIFAR10",
"reduced_params": 101644327.31428571,
"actual_model_size": 102440820,
"missed_param_percent": 0.0077751494542340595,
"mimic_coll_ops_time": 87664.79000000001,
"horovod_coll_ops_time": 83897.67826086956,
"mimic_dur * (model_size/reduced_size)": 88351.7379672366,
"horovod_overhead": -0.05041281370173752
}
{
"folder": "20191219-180459-40Gbit-100Gbit-8p3dn-ResNet50-CIFAR10",
"reduced_params": 101767607.77142857,
"actual_model_size": 102440820,
"missed_param_percent": 0.0065717184670274,
"mimic_coll_ops_time": 71713.55357142857,
"horovod_coll_ops_time": 74020.68695652173,
"mimic_dur * (model_size/reduced_size)": 72187.95247178429,
"horovod_overhead": 0.025388370524206224
}
{
"folder": "20191219-172007-25Gbit-100Gbit-4p3dn-vgg16-CIFAR10",
"reduced_params": 525635675.0769231,
"actual_model_size": 553497812,
"missed_param_percent": 0.05033829641060429,
"mimic_coll_ops_time": 334978.3615384615,
"horovod_coll_ops_time": 357217.15744680853,
"mimic_dur * (model_size/reduced_size)": 352734.41086689936,
"horovod_overhead": 0.01270856044039517
}
{
"folder": "20191219-181839-10Gbit-100Gbit-4p3dn-ResNet50-CIFAR10",
"reduced_params": 101620083.6923077,
"actual_model_size": 102440820,
"missed_param_percent": 0.008011809234759193,
"mimic_coll_ops_time": 174388.88846153847,
"horovod_coll_ops_time": 168421.7914893617,
"mimic_dur * (model_size/reduced_size)": 175797.34323955124,
"horovod_overhead": -0.04195485332300614
}
{
"folder": "20191219-071723--100Gbit-8p3dn-ResNet101-CIFAR10",
"reduced_params": 177740709.4857143,
"actual_model_size": 178618432,
"missed_param_percent": 0.004913952633319015,
"mimic_coll_ops_time": 89367.44857142858,
"horovod_coll_ops_time": 122953.64347826086,
"mimic_dur * (model_size/reduced_size)": 89808.76458666436,
"horovod_overhead": 0.3690606261442567
}
{
"folder": "20191220-074409-25Gbit-100Gbit-4p3dn-vgg16-imagenet",
"reduced_params": 518860124.416,
"actual_model_size": 553497812,
"missed_param_percent": 0.06257962874115208,
"mimic_coll_ops_time": 330841.278,
"horovod_coll_ops_time": 361621.5305263158,
"mimic_dur * (model_size/reduced_size)": 352927.339903781,
"horovod_overhead": 0.02463450585864242
}
{
"folder": "20191219-095445-25Gbit-100Gbit-4p3dn-ResNet101-CIFAR10",
"reduced_params": 177718773.66153845,
"actual_model_size": 178618432,
"missed_param_percent": 0.005036760923203872,
"mimic_coll_ops_time": 127135.43846153846,
"horovod_coll_ops_time": 126380.24255319149,
"mimic_dur * (model_size/reduced_size)": 127779.03089113579,
"horovod_overhead": -0.010946931810243754
}
{
"folder": "20191219-181528-40Gbit-100Gbit-4p3dn-ResNet50-CIFAR10",
"reduced_params": 101665841.72307692,
"actual_model_size": 102440820,
"missed_param_percent": 0.007565131525919797,
"mimic_coll_ops_time": 61253.51576923078,
"horovod_coll_ops_time": 63833.574468085106,
"mimic_dur * (model_size/reduced_size)": 61720.439008165056,
"horovod_overhead": 0.034237207218187495
}
{
"folder": "20191219-072247-40Gbit-100Gbit-8p3dn-ResNet101-CIFAR10",
"reduced_params": 177730120.22857141,
"actual_model_size": 178618432,
"missed_param_percent": 0.004973236868570121,
"mimic_coll_ops_time": 116843.13642857144,
"horovod_coll_ops_time": 135224.52173913043,
"mimic_dur * (model_size/reduced_size)": 117427.12935766332,
"horovod_overhead": 0.15156116375168507
}
{
"folder": "20191220-033940--100Gbit-4p3dn-ResNet50-imagenet",
"reduced_params": 102077108.992,
"actual_model_size": 102440820,
"missed_param_percent": 0.0035504499866361996,
"mimic_coll_ops_time": 50806.4542,
"horovod_coll_ops_time": 81813.33894736841,
"mimic_dur * (model_size/reduced_size)": 50987.48270729673,
"horovod_overhead": 0.6045769393447666
}
{
"folder": "20191220-040121-1Gbit-100Gbit-4p3dn-ResNet50-imagenet",
"reduced_params": 102040610.56,
"actual_model_size": 102440820,
"missed_param_percent": 0.003906737958559855,
"mimic_coll_ops_time": 1604848.26,
"horovod_coll_ops_time": 1608969.0189473685,
"mimic_dur * (model_size/reduced_size)": 1611142.5718420672,
"horovod_overhead": -0.0013490754528406516
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment