Skip to content

Instantly share code, notes, and snippets.

@yifuwang
Last active April 9, 2024 19:47
Show Gist options
  • Save yifuwang/961b3a98aeba88e09a34a89f9702b8d7 to your computer and use it in GitHub Desktop.
Save yifuwang/961b3a98aeba88e09a34a89f9702b8d7 to your computer and use it in GitHub Desktop.
[torch.Size([32000, 8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192, 8192]), torch.Size([1024, 8192]), torch.Size([1024, 8192]), torch.Size([8192, 8192]), torch.Size([28672, 8192]), torch.Size([8192, 28672]), torch.Size([28672, 8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([8192]), torch.Size([32000, 8192])]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment