Skip to content

Instantly share code, notes, and snippets.

@yifuwang
Created September 23, 2022 02:30
Show Gist options
  • Save yifuwang/6b7ea7ff90b070b7518c7b4c20d582b9 to your computer and use it in GitHub Desktop.
Save yifuwang/6b7ea7ff90b070b7518c7b4c20d582b9 to your computer and use it in GitHub Desktop.
tensor([0.0002]) tensor([0], dtype=torch.int32)
tensor([0.0002]) tensor([-1], dtype=torch.int32)
WARNING:root:Could not determine LOCAL_WORLD_SIZE from environment, falling back to WORLD_SIZE.
WARNING:root:Could not determine LOCAL_WORLD_SIZE from environment, falling back to WORLD_SIZE.
torch.float32
torch.float32
old weight tensor([[-0.0285, 0.0301, 0.0173, ..., -0.0305, -0.0288, -0.0027],
[-0.0224, -0.0263, 0.0212, ..., -0.0249, 0.0071, -0.0202],
[ 0.0125, 0.0225, 0.0154, ..., -0.0155, -0.0169, 0.0253],
...,
[ 0.0006, 0.0041, 0.0118, ..., -0.0078, 0.0023, -0.0024],
[-0.0161, -0.0222, -0.0155, ..., -0.0104, -0.0312, 0.0072],
[-0.0046, 0.0148, -0.0016, ..., 0.0116, 0.0208, -0.0281]])
new weight tensor([[-0.0284, 0.0301, 0.0172, ..., -0.0306, -0.0287, -0.0027],
[-0.0223, -0.0262, 0.0211, ..., -0.0250, 0.0071, -0.0201],
[ 0.0125, 0.0225, 0.0154, ..., -0.0154, -0.0169, 0.0252],
...,
[ 0.0007, 0.0042, 0.0118, ..., -0.0078, 0.0025, -0.0025],
[-0.0162, -0.0223, -0.0154, ..., -0.0103, -0.0311, 0.0071],
[-0.0047, 0.0147, -0.0015, ..., 0.0115, 0.0208, -0.0282]])
tensor([[-0.0284, 0.0301, 0.0172, ..., 0.0230, -0.0120, 0.0135],
[-0.0223, -0.0262, 0.0211, ..., 0.0203, 0.0265, -0.0037],
[ 0.0125, 0.0225, 0.0154, ..., -0.0243, 0.0238, -0.0051],
...,
[ 0.0120, 0.0056, 0.0135, ..., 0.0179, 0.0039, -0.0037],
[ 0.0088, -0.0150, 0.0076, ..., 0.0287, -0.0270, 0.0145],
[-0.0311, 0.0221, -0.0164, ..., 0.0252, 0.0206, 0.0199]],
grad_fn=<SplitLookupFunction_sgd_Op>>)
tensor([[-0.0285, 0.0301, 0.0173, ..., 0.0231, -0.0121, 0.0136],
[-0.0224, -0.0263, 0.0212, ..., 0.0204, 0.0266, -0.0037],
[ 0.0125, 0.0225, 0.0154, ..., -0.0244, 0.0237, -0.0052],
...,
[ 0.0121, 0.0057, 0.0134, ..., 0.0178, 0.0040, -0.0038],
[ 0.0087, -0.0150, 0.0076, ..., 0.0286, -0.0269, 0.0145],
[-0.0311, 0.0220, -0.0164, ..., 0.0252, 0.0207, 0.0199]],
grad_fn=<SplitLookupFunction_sgd_Op>>)
*******************
*******************
1049520
264696
0.25220672307340497
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment