Skip to content

Instantly share code, notes, and snippets.

@tomascsantos
Created September 12, 2020 16:44
Show Gist options
  • Save tomascsantos/db3aa116d1eaab6804a804e6487fc3a6 to your computer and use it in GitHub Desktop.
Save tomascsantos/db3aa116d1eaab6804a804e6487fc3a6 to your computer and use it in GitHub Desktop.
output from mean_net.parameters()
old net: Sequential(
(0): Linear(in_features=111, out_features=64, bias=True)
(1): Tanh()
(2): Linear(in_features=64, out_features=64, bias=True)
(3): Tanh()
(4): Linear(in_features=64, out_features=8, bias=True)
(5): Identity()
)
Loading expert policy from... ../policies/experts/Ant.pkl
obs (1, 111) (1, 111)
Done restoring expert policy...
********** Iteration 0 ************
Training agent using sampled data from replay buffer...
Parameter containing:
tensor([[ 0.0489, -0.0419, -0.0184, ..., 0.0651, 0.0197, 0.0305],
[ 0.0709, 0.0900, -0.0630, ..., 0.0837, -0.0723, 0.0861],
[-0.0747, -0.0669, 0.0464, ..., 0.0485, 0.0355, 0.0808],
...,
[ 0.0628, -0.0701, -0.0452, ..., -0.0347, -0.0462, 0.0503],
[ 0.0315, -0.0301, -0.0805, ..., -0.0740, 0.0806, 0.0457],
[-0.0828, -0.0745, 0.0925, ..., 0.0152, -0.0703, -0.0798]],
requires_grad=True)
Parameter containing:
tensor([ 0.0012, -0.0011, -0.0764, -0.0612, 0.0118, 0.0229, 0.0077, 0.0277,
-0.0928, -0.0114, 0.0742, -0.0511, 0.0213, 0.0258, -0.0589, 0.0613,
0.0576, 0.0926, 0.0773, 0.0026, 0.0889, 0.0184, -0.0584, -0.0458,
-0.0429, -0.0110, -0.0931, -0.0385, -0.0939, 0.0432, -0.0905, 0.0089,
-0.0455, -0.0051, 0.0523, 0.0252, -0.0943, -0.0676, -0.0532, 0.0271,
-0.0069, -0.0035, 0.0693, -0.0563, -0.0521, 0.0642, 0.0758, 0.0723,
0.0835, -0.0474, -0.0072, 0.0919, 0.0702, -0.0511, -0.0730, 0.0587,
0.0535, -0.0754, -0.0631, 0.0603, -0.0896, 0.0146, 0.0507, 0.0056],
requires_grad=True)
Parameter containing:
tensor([[ 0.0023, 0.0633, 0.0503, ..., 0.0337, -0.0714, -0.1225],
[ 0.0224, 0.0936, 0.0635, ..., -0.0119, -0.0706, 0.0969],
[-0.0493, -0.0511, 0.1219, ..., 0.0405, -0.0202, 0.0153],
...,
[-0.0079, -0.1173, -0.1148, ..., 0.0436, -0.0729, -0.0609],
[-0.0411, -0.0963, -0.0684, ..., -0.0489, -0.1013, -0.0404],
[ 0.1210, 0.0777, -0.0764, ..., -0.0821, 0.0384, 0.0098]],
requires_grad=True)
Parameter containing:
tensor([-0.1125, 0.0173, -0.0869, -0.0684, 0.0695, 0.0205, -0.1249, -0.0402,
-0.0730, 0.0619, -0.0885, 0.1072, -0.0707, 0.0797, -0.0599, -0.0723,
0.1246, -0.0768, 0.1146, -0.0596, -0.0563, -0.0852, -0.0823, 0.1125,
-0.1096, 0.0196, 0.0593, -0.1130, 0.0807, -0.0505, 0.0478, 0.1042,
-0.0910, 0.0588, 0.0065, -0.0162, 0.0330, 0.0699, 0.1205, 0.0502,
0.0360, -0.0892, 0.1052, -0.0248, -0.0281, 0.0111, 0.0770, 0.1215,
-0.1190, 0.0492, -0.0563, -0.0847, -0.0243, 0.0198, -0.0250, -0.0120,
-0.0641, 0.0413, 0.0320, -0.0621, 0.1149, 0.1228, 0.0666, -0.0425],
requires_grad=True)
Parameter containing:
tensor([[-0.0157, -0.0098, 0.0472, -0.1061, 0.1087, -0.0678, -0.1176, -0.0313,
-0.0454, 0.0703, 0.0844, 0.0151, 0.0224, -0.0337, -0.0541, 0.1127,
0.1239, 0.0554, 0.0737, -0.1214, 0.0280, 0.0187, -0.1214, 0.0461,
-0.0924, 0.1055, 0.0257, 0.1009, -0.0043, -0.0848, 0.0051, -0.0951,
-0.1138, -0.0095, 0.0175, 0.0033, 0.0858, -0.0482, -0.0723, 0.0114,
0.0132, -0.0029, -0.0978, -0.1104, -0.0800, 0.0304, 0.0219, 0.1157,
-0.0326, -0.0653, -0.0211, 0.1199, -0.1178, 0.0641, -0.0400, 0.1094,
0.0880, 0.0471, 0.0184, -0.0616, -0.0937, 0.0578, -0.0628, -0.0876],
[-0.0695, 0.0011, 0.0596, 0.0547, -0.0079, -0.0625, -0.0639, -0.1068,
0.0171, 0.0505, -0.0325, -0.0125, 0.1173, -0.0509, 0.0066, 0.0854,
-0.0526, 0.0907, -0.0481, 0.0656, -0.1195, 0.0714, -0.0513, 0.1238,
-0.1099, -0.0373, 0.0463, 0.1147, 0.0118, 0.0936, -0.0532, -0.1064,
0.0769, 0.0450, -0.0169, 0.0538, 0.0903, 0.0083, 0.0871, 0.0408,
-0.0611, -0.0063, 0.0835, -0.1096, 0.0228, 0.1081, -0.0949, 0.1219,
0.1142, -0.0844, 0.0464, 0.0476, 0.0611, 0.0650, -0.0595, -0.0388,
0.0382, -0.0963, -0.0090, -0.1050, -0.1216, -0.0498, -0.1231, -0.0339],
[ 0.0724, 0.0117, -0.0076, 0.0545, 0.0614, -0.1221, 0.0968, 0.0903,
0.0241, -0.0701, -0.0415, -0.0444, -0.0523, 0.0731, -0.1228, -0.0084,
0.1056, 0.0760, -0.0949, 0.0581, -0.1096, -0.1083, 0.1244, -0.0159,
-0.1216, -0.1031, 0.0009, 0.0030, 0.0851, -0.0383, 0.0591, 0.0358,
0.0665, -0.1229, -0.0622, -0.0988, 0.0277, -0.0895, -0.0781, 0.0260,
0.0490, 0.0427, 0.0885, -0.1240, -0.0211, 0.1168, 0.0301, 0.0429,
0.1169, 0.0783, -0.1142, -0.0539, -0.0060, -0.0452, 0.1138, -0.0786,
0.0061, 0.0922, -0.1228, -0.0810, 0.1152, -0.1174, 0.1028, 0.0471],
[-0.1140, 0.1229, -0.0042, 0.0122, 0.1073, 0.0169, 0.0390, -0.0224,
-0.1182, 0.0585, 0.0646, -0.0226, 0.0945, -0.0391, 0.0948, -0.0073,
-0.0778, 0.0094, 0.0832, -0.0660, 0.0367, 0.1196, 0.0693, 0.0478,
-0.0875, 0.0589, 0.0952, 0.1084, -0.0768, -0.0853, -0.0869, -0.0723,
-0.0685, -0.0135, 0.0714, -0.1085, 0.0856, 0.0172, 0.0172, 0.0803,
0.1024, 0.0737, -0.1083, -0.0273, 0.0420, -0.1166, -0.0973, 0.0765,
-0.0504, 0.0422, 0.0472, 0.1123, -0.0219, -0.1218, 0.0215, -0.0731,
0.0192, -0.0455, -0.1179, 0.0137, 0.0614, -0.0671, 0.0469, -0.0620],
[-0.0443, -0.0004, -0.0331, 0.0160, -0.1180, 0.0805, -0.1208, 0.0243,
0.0975, -0.0633, -0.1240, 0.0990, 0.0737, -0.1051, 0.0054, 0.0663,
-0.0843, -0.0117, 0.0751, 0.0589, -0.0035, -0.0035, 0.0938, 0.0715,
-0.1220, -0.1130, -0.0338, -0.0681, -0.0581, 0.0100, 0.0156, 0.0956,
0.0601, 0.0341, 0.0742, -0.1160, -0.0267, 0.0961, -0.0168, 0.0154,
-0.0432, 0.0107, -0.0333, 0.1199, 0.0825, -0.0317, 0.0930, -0.0487,
0.0599, 0.1172, -0.0537, 0.0697, -0.0827, -0.1174, 0.1099, 0.0213,
-0.0774, 0.1127, 0.0605, 0.0276, -0.0135, 0.0380, -0.1243, -0.1175],
[-0.1153, -0.0092, -0.0035, 0.1057, -0.1096, 0.0350, 0.0185, 0.1198,
0.0942, -0.1151, -0.0823, 0.0070, 0.1169, 0.0498, 0.0946, -0.0213,
-0.0735, 0.0804, 0.0821, 0.0563, -0.0536, 0.1009, 0.1085, -0.0569,
-0.0279, 0.0981, 0.0568, 0.1032, 0.0255, -0.1074, 0.0275, -0.0083,
0.1212, 0.0048, 0.0163, 0.0473, -0.1081, -0.0311, -0.1032, -0.1059,
0.0470, -0.0479, -0.0738, -0.1093, 0.1231, -0.0999, -0.0969, -0.0910,
-0.0485, 0.0210, -0.0249, -0.0901, 0.0848, -0.0823, -0.0767, 0.0129,
-0.0403, -0.0808, -0.1005, 0.0637, 0.0175, -0.1158, -0.0445, -0.0949],
[ 0.0479, 0.0260, 0.0919, 0.0983, -0.0055, 0.0147, -0.0849, -0.1035,
-0.0127, -0.0266, 0.0567, 0.0276, -0.0768, 0.0092, -0.0853, -0.0035,
0.1218, -0.0329, -0.1151, -0.0275, -0.0755, 0.0966, 0.0597, -0.1162,
-0.0026, -0.0158, 0.0703, 0.0467, 0.0670, 0.0612, -0.0655, 0.0134,
-0.0203, -0.0412, -0.1236, 0.0186, 0.0714, -0.0294, 0.0045, 0.0966,
-0.0933, -0.0905, -0.0557, 0.1030, -0.0289, 0.0870, 0.0136, -0.0880,
-0.1075, 0.0673, -0.0061, 0.0272, 0.0747, -0.0060, 0.0905, 0.1099,
-0.0065, 0.1005, 0.0907, 0.0197, -0.0542, -0.0419, 0.0619, 0.1079],
[-0.0555, -0.1013, -0.1104, -0.0078, -0.0227, 0.0311, 0.0429, 0.0488,
0.0214, 0.0299, -0.0609, -0.1104, -0.0477, -0.1113, 0.0255, 0.0091,
0.1040, 0.1161, 0.0156, 0.0995, 0.0253, 0.0503, 0.0612, 0.0830,
-0.0848, -0.0833, 0.1180, 0.0893, 0.0124, -0.0573, -0.0994, 0.0082,
0.0368, -0.0035, -0.0542, 0.0791, 0.0162, 0.0134, 0.0680, 0.1042,
-0.0964, 0.0884, 0.0792, -0.0345, -0.0473, 0.0470, -0.0727, 0.0596,
-0.0999, 0.0426, 0.0802, -0.0841, -0.0582, 0.0475, -0.0572, 0.0726,
-0.0028, -0.0313, 0.1166, -0.0380, -0.0058, 0.0125, -0.1160, 0.1250]],
requires_grad=True)
Parameter containing:
tensor([ 0.1131, 0.0020, 0.0964, -0.0887, -0.0480, 0.0505, -0.0603, 0.0085],
requires_grad=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment