-
-
Save Shibaken28/db7f1f7c0e992bfdfab7d8c52d5bf5b8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> // cout, endl, cin | |
#include <string> // string, to_string, stoi | |
#include <vector> // vector | |
#include <algorithm> // min, max, swap, sort, reverse, lower_bound, upper_bound | |
#include <utility> // pair, make_pair | |
#include <tuple> // tuple, make_tuple | |
#include <map> // map | |
#include <queue> // queue, priority_queue | |
#include <set> // set | |
#include <stack> // stack | |
#include <deque> // deque | |
#include <unordered_map> // unordered_map | |
#include <unordered_set> // unordered_set | |
#include <bitset> // bitset | |
#include <cctype> // isupper, islower, isdigit, toupper, tolower | |
#include <iomanip> | |
#include <climits> | |
#include <cmath> | |
#include <functional> | |
#include <numeric> | |
#include <regex> | |
#include <array> | |
#include <fstream> | |
#include <sstream> | |
using namespace std; | |
/* | |
三目並べを価値反復法で強化学習 | |
*/ | |
/* | |
盤面はvector<int> で表す | |
0: 空白 | |
1: o | |
2: x | |
*/ | |
using Board = vector<int>; | |
constexpr int width = 3; | |
constexpr int height = 3; | |
constexpr int cells = width * height; | |
// 判定 | |
int judge(const Board &board) | |
{ | |
static const vector<vector<int>> win = { | |
{0, 1, 2}, | |
{3, 4, 5}, | |
{6, 7, 8}, | |
{0, 3, 6}, | |
{1, 4, 7}, | |
{2, 5, 8}, | |
{0, 4, 8}, | |
{2, 4, 6}, | |
}; | |
// 0: 続行 | |
// 1: oの勝ち | |
// 2: xの勝ち | |
// 3: 引き分け | |
for (const auto &w : win) | |
{ | |
if (board[w[0]] == board[w[1]] && board[w[1]] == board[w[2]]) | |
{ | |
if (board[w[0]] == 1) | |
{ | |
return 1; | |
} | |
else if (board[w[0]] == 2) | |
{ | |
return 2; | |
} | |
} | |
} | |
for (const int &b : board) | |
{ | |
if (b == 0) | |
{ | |
return 0; // 続行 | |
} | |
} | |
return 1; // 引き分け | |
} | |
// 報酬 | |
double reward(const Board &board) | |
{ | |
int j = judge(board); | |
static const vector<double> r = {0.0, 1.0, -1.0, 0.3}; | |
// 0: 続行 | |
// 1: oの勝ち | |
// 2: xの勝ち | |
// 3: 引き分け | |
return r[j]; | |
} | |
// 盤面を表示 | |
void print_board(const Board &board) | |
{ | |
for (int i = 0; i < cells; i++) | |
{ | |
if (board[i] == 0) | |
{ | |
cout << " "; | |
} | |
else if (board[i] == 1) | |
{ | |
cout << "o"; | |
} | |
else if (board[i] == 2) | |
{ | |
cout << "x"; | |
} | |
if (i % width == width - 1) | |
{ | |
cout << endl; | |
} | |
else | |
{ | |
cout << "|"; | |
} | |
} | |
} | |
// 盤面が有効かどうか | |
bool isVaild(const Board &board) | |
{ | |
int cnt = 0; | |
for (auto &b : board) | |
{ | |
if (b == 1) | |
{ | |
cnt++; | |
} | |
if (b == 2) | |
{ | |
cnt--; | |
} | |
} | |
return cnt == 0; | |
} | |
int main(void) | |
{ | |
// コンピュータは常にoとする | |
// V[s] := 状態sにおける価値 | |
map<Board, double> V; | |
map<Board, int> count; | |
// モンテカルロ法 | |
const int N = 100000; | |
double gamma = 0.9; | |
for (int i = 0; i < N; i++) | |
{ | |
Board board = {0, 0, 0, 0, 0, 0, 0, 0, 0}; | |
vector<Board> history; | |
int turn = 0; | |
while (true) | |
{ | |
history.push_back(board); | |
int act; | |
act = rand() % cells; | |
if (board[act] != 0) | |
continue; | |
board[act] = turn + 1; | |
if (judge(board) != 0) | |
break; | |
turn = 1 - turn; | |
} | |
history.push_back(board); | |
double r = reward(board); | |
reverse(history.begin(), history.end()); | |
for (const auto &h : history) | |
{ | |
V[h] += r; | |
count[h]++; | |
r *= gamma; | |
} | |
} | |
for (auto &[prevS, prevV] : V) | |
{ | |
prevV /= count[prevS]; | |
} | |
// 最適方策mu | |
map<Board, int> mu; | |
for (auto &[prevS, prevV] : V) | |
{ | |
if (!isVaild(prevS)) | |
continue; | |
int act; | |
double nextV = -1e9; | |
for (int i = 0; i < cells; i++) | |
{ | |
if (prevS[i] != 0) | |
continue; | |
Board nextS = prevS; | |
nextS[i] = 1; | |
cout << i << " : " << V[nextS] << endl; | |
if (V[nextS] > nextV) | |
{ | |
nextV = V[nextS]; | |
act = i; | |
} | |
} | |
mu[prevS] = act; | |
cout << "mu [" << endl; | |
print_board(prevS); | |
cout << "] = " << mu[prevS] << endl | |
<< endl; | |
} | |
// 対戦開始 | |
Board board = {0, 0, 0, 0, 0, 0, 0, 0, 0}; | |
while (true) | |
{ | |
// 先手はコンピュータ | |
// 盤面を表示 | |
cout << endl; | |
print_board(board); | |
// 勝敗が決まっている場合 | |
if (judge(board) != 0) | |
{ | |
break; | |
} | |
// コンピュータの手を決める | |
int put = mu[board]; | |
board[put] = 1; | |
// 勝敗が決まっている場合 | |
if (judge(board) != 0) | |
{ | |
break; | |
} | |
// 盤面を表示 | |
cout << endl; | |
print_board(board); | |
// 人間の手を決める | |
while (true) | |
{ | |
cout << "put:"; | |
int put; | |
cin >> put; | |
if (board[put] == 0) | |
{ | |
board[put] = 2; | |
break; | |
} | |
} | |
} | |
// 勝敗を表示 | |
print_board(board); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment