Skip to content

Instantly share code, notes, and snippets.

@Shibaken28
Created January 19, 2023 06:48
Show Gist options
  • Save Shibaken28/db7f1f7c0e992bfdfab7d8c52d5bf5b8 to your computer and use it in GitHub Desktop.
Save Shibaken28/db7f1f7c0e992bfdfab7d8c52d5bf5b8 to your computer and use it in GitHub Desktop.
#include <iostream> // cout, endl, cin
#include <string> // string, to_string, stoi
#include <vector> // vector
#include <algorithm> // min, max, swap, sort, reverse, lower_bound, upper_bound
#include <utility> // pair, make_pair
#include <tuple> // tuple, make_tuple
#include <map> // map
#include <queue> // queue, priority_queue
#include <set> // set
#include <stack> // stack
#include <deque> // deque
#include <unordered_map> // unordered_map
#include <unordered_set> // unordered_set
#include <bitset> // bitset
#include <cctype> // isupper, islower, isdigit, toupper, tolower
#include <iomanip>
#include <climits>
#include <cmath>
#include <functional>
#include <numeric>
#include <regex>
#include <array>
#include <fstream>
#include <sstream>
using namespace std;
/*
三目並べを価値反復法で強化学習
*/
/*
盤面はvector<int> で表す
0: 空白
1: o
2: x
*/
using Board = vector<int>;
constexpr int width = 3;
constexpr int height = 3;
constexpr int cells = width * height;
// 判定
int judge(const Board &board)
{
static const vector<vector<int>> win = {
{0, 1, 2},
{3, 4, 5},
{6, 7, 8},
{0, 3, 6},
{1, 4, 7},
{2, 5, 8},
{0, 4, 8},
{2, 4, 6},
};
// 0: 続行
// 1: oの勝ち
// 2: xの勝ち
// 3: 引き分け
for (const auto &w : win)
{
if (board[w[0]] == board[w[1]] && board[w[1]] == board[w[2]])
{
if (board[w[0]] == 1)
{
return 1;
}
else if (board[w[0]] == 2)
{
return 2;
}
}
}
for (const int &b : board)
{
if (b == 0)
{
return 0; // 続行
}
}
return 1; // 引き分け
}
// 報酬
double reward(const Board &board)
{
int j = judge(board);
static const vector<double> r = {0.0, 1.0, -1.0, 0.3};
// 0: 続行
// 1: oの勝ち
// 2: xの勝ち
// 3: 引き分け
return r[j];
}
// 盤面を表示
void print_board(const Board &board)
{
for (int i = 0; i < cells; i++)
{
if (board[i] == 0)
{
cout << " ";
}
else if (board[i] == 1)
{
cout << "o";
}
else if (board[i] == 2)
{
cout << "x";
}
if (i % width == width - 1)
{
cout << endl;
}
else
{
cout << "|";
}
}
}
// 盤面が有効かどうか
bool isVaild(const Board &board)
{
int cnt = 0;
for (auto &b : board)
{
if (b == 1)
{
cnt++;
}
if (b == 2)
{
cnt--;
}
}
return cnt == 0;
}
int main(void)
{
// コンピュータは常にoとする
// V[s] := 状態sにおける価値
map<Board, double> V;
map<Board, int> count;
// モンテカルロ法
const int N = 100000;
double gamma = 0.9;
for (int i = 0; i < N; i++)
{
Board board = {0, 0, 0, 0, 0, 0, 0, 0, 0};
vector<Board> history;
int turn = 0;
while (true)
{
history.push_back(board);
int act;
act = rand() % cells;
if (board[act] != 0)
continue;
board[act] = turn + 1;
if (judge(board) != 0)
break;
turn = 1 - turn;
}
history.push_back(board);
double r = reward(board);
reverse(history.begin(), history.end());
for (const auto &h : history)
{
V[h] += r;
count[h]++;
r *= gamma;
}
}
for (auto &[prevS, prevV] : V)
{
prevV /= count[prevS];
}
// 最適方策mu
map<Board, int> mu;
for (auto &[prevS, prevV] : V)
{
if (!isVaild(prevS))
continue;
int act;
double nextV = -1e9;
for (int i = 0; i < cells; i++)
{
if (prevS[i] != 0)
continue;
Board nextS = prevS;
nextS[i] = 1;
cout << i << " : " << V[nextS] << endl;
if (V[nextS] > nextV)
{
nextV = V[nextS];
act = i;
}
}
mu[prevS] = act;
cout << "mu [" << endl;
print_board(prevS);
cout << "] = " << mu[prevS] << endl
<< endl;
}
// 対戦開始
Board board = {0, 0, 0, 0, 0, 0, 0, 0, 0};
while (true)
{
// 先手はコンピュータ
// 盤面を表示
cout << endl;
print_board(board);
// 勝敗が決まっている場合
if (judge(board) != 0)
{
break;
}
// コンピュータの手を決める
int put = mu[board];
board[put] = 1;
// 勝敗が決まっている場合
if (judge(board) != 0)
{
break;
}
// 盤面を表示
cout << endl;
print_board(board);
// 人間の手を決める
while (true)
{
cout << "put:";
int put;
cin >> put;
if (board[put] == 0)
{
board[put] = 2;
break;
}
}
}
// 勝敗を表示
print_board(board);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment