Shibaken28/MonteCarlo.cpp Secret

## MonteCarlo.cpp
#include <iostream>      // cout, endl, cin
#include <string>        // string, to_string, stoi
#include <vector>        // vector
#include <algorithm>     // min, max, swap, sort, reverse, lower_bound, upper_bound
#include <utility>       // pair, make_pair
#include <tuple>         // tuple, make_tuple
#include <map>           // map
#include <queue>         // queue, priority_queue
#include <set>           // set
#include <stack>         // stack
#include <deque>         // deque
#include <unordered_map> // unordered_map
#include <unordered_set> // unordered_set
#include <bitset>        // bitset
#include <cctype>        // isupper, islower, isdigit, toupper, tolower
#include <iomanip>
#include <climits>
#include <cmath>
#include <functional>
#include <numeric>
#include <regex>
#include <array>
#include <fstream>
#include <sstream>

using namespace std;

/*
三目並べを価値反復法で強化学習
*/

/*
盤面はvector<int> で表す
0: 空白
1: o
2: x
*/
using Board = vector<int>;
constexpr int width = 3;
constexpr int height = 3;
constexpr int cells = width * height;

// 判定
int judge(const Board &board)
{
    static const vector<vector<int>> win = {
        {0, 1, 2},
        {3, 4, 5},
        {6, 7, 8},
        {0, 3, 6},
        {1, 4, 7},
        {2, 5, 8},
        {0, 4, 8},
        {2, 4, 6},
    };
    // 0: 続行
    // 1: oの勝ち
    // 2: xの勝ち
    // 3: 引き分け
    for (const auto &w : win)
    {
        if (board[w[0]] == board[w[1]] && board[w[1]] == board[w[2]])
        {
            if (board[w[0]] == 1)
            {
                return 1;
            }
            else if (board[w[0]] == 2)
            {
                return 2;
            }
        }
    }
    for (const int &b : board)
    {
        if (b == 0)
        {
            return 0; // 続行
        }
    }
    return 1; // 引き分け
}

// 報酬
double reward(const Board &board)
{
    int j = judge(board);
    static const vector<double> r = {0.0, 1.0, -1.0, 0.3};
    // 0: 続行
    // 1: oの勝ち
    // 2: xの勝ち
    // 3: 引き分け
    return r[j];
}

// 盤面を表示
void print_board(const Board &board)
{
    for (int i = 0; i < cells; i++)
    {
        if (board[i] == 0)
        {
            cout << " ";
        }
        else if (board[i] == 1)
        {
            cout << "o";
        }
        else if (board[i] == 2)
        {
            cout << "x";
        }
        if (i % width == width - 1)
        {
            cout << endl;
        }
        else
        {
            cout << "|";
        }
    }
}

// 盤面が有効かどうか
bool isVaild(const Board &board)
{
    int cnt = 0;
    for (auto &b : board)
    {
        if (b == 1)
        {
            cnt++;
        }
        if (b == 2)
        {
            cnt--;
        }
    }
    return cnt == 0;
}

int main(void)
{
    // コンピュータは常にoとする
    //  V[s] := 状態sにおける価値
    map<Board, double> V;
    map<Board, int> count;

    // モンテカルロ法
    const int N = 100000;
    double gamma = 0.9;
    for (int i = 0; i < N; i++)
    {
        Board board = {0, 0, 0, 0, 0, 0, 0, 0, 0};
        vector<Board> history;
        int turn = 0;
        while (true)
        {
            history.push_back(board);
            int act;
            act = rand() % cells;
            if (board[act] != 0)
                continue;
            board[act] = turn + 1;
            if (judge(board) != 0)
                break;
            turn = 1 - turn;
        }
        history.push_back(board);
        double r = reward(board);
        reverse(history.begin(), history.end());
        for (const auto &h : history)
        {
            V[h] += r;
            count[h]++;
            r *= gamma;
        }
    }

    for (auto &[prevS, prevV] : V)
    {
        prevV /= count[prevS];
    }

    // 最適方策mu
    map<Board, int> mu;
    for (auto &[prevS, prevV] : V)
    {
        if (!isVaild(prevS))
            continue;
        int act;
        double nextV = -1e9;
        for (int i = 0; i < cells; i++)
        {
            if (prevS[i] != 0)
                continue;
            Board nextS = prevS;
            nextS[i] = 1;
            cout << i << " : " << V[nextS] << endl;
            if (V[nextS] > nextV)
            {
                nextV = V[nextS];
                act = i;
            }
        }
        mu[prevS] = act;
        cout << "mu [" << endl;
        print_board(prevS);
        cout << "] = " << mu[prevS] << endl
             << endl;
    }

    // 対戦開始
    Board board = {0, 0, 0, 0, 0, 0, 0, 0, 0};
    while (true)
    {
        // 先手はコンピュータ
        // 盤面を表示
        cout << endl;
        print_board(board);
        // 勝敗が決まっている場合
        if (judge(board) != 0)
        {
            break;
        }
        // コンピュータの手を決める
        int put = mu[board];
        board[put] = 1;
        // 勝敗が決まっている場合
        if (judge(board) != 0)
        {
            break;
        }
        // 盤面を表示
        cout << endl;
        print_board(board);
        // 人間の手を決める
        while (true)
        {
            cout << "put:";
            int put;
            cin >> put;
            if (board[put] == 0)
            {
                board[put] = 2;
                break;
            }
        }
    }
    // 勝敗を表示
    print_board(board);
}
	#include <iostream> // cout, endl, cin
	#include <string> // string, to_string, stoi
	#include <vector> // vector
	#include <algorithm> // min, max, swap, sort, reverse, lower_bound, upper_bound
	#include <utility> // pair, make_pair
	#include <tuple> // tuple, make_tuple
	#include <map> // map
	#include <queue> // queue, priority_queue
	#include <set> // set
	#include <stack> // stack
	#include <deque> // deque
	#include <unordered_map> // unordered_map
	#include <unordered_set> // unordered_set
	#include <bitset> // bitset
	#include <cctype> // isupper, islower, isdigit, toupper, tolower
	#include <iomanip>
	#include <climits>
	#include <cmath>
	#include <functional>
	#include <numeric>
	#include <regex>
	#include <array>
	#include <fstream>
	#include <sstream>

	using namespace std;

	/*
	三目並べを価値反復法で強化学習
	*/

	/*
	盤面はvector<int> で表す
	0: 空白
	1: o
	2: x
	*/
	using Board = vector<int>;
	constexpr int width = 3;
	constexpr int height = 3;
	constexpr int cells = width * height;

	// 判定
	int judge(const Board &board)
	{
	static const vector<vector<int>> win = {
	{0, 1, 2},
	{3, 4, 5},
	{6, 7, 8},
	{0, 3, 6},
	{1, 4, 7},
	{2, 5, 8},
	{0, 4, 8},
	{2, 4, 6},
	};
	// 0: 続行
	// 1: oの勝ち
	// 2: xの勝ち
	// 3: 引き分け
	for (const auto &w : win)
	{
	if (board[w[0]] == board[w[1]] && board[w[1]] == board[w[2]])
	{
	if (board[w[0]] == 1)
	{
	return 1;
	}
	else if (board[w[0]] == 2)
	{
	return 2;
	}
	}
	}
	for (const int &b : board)
	{
	if (b == 0)
	{
	return 0; // 続行
	}
	}
	return 1; // 引き分け
	}

	// 報酬
	double reward(const Board &board)
	{
	int j = judge(board);
	static const vector<double> r = {0.0, 1.0, -1.0, 0.3};
	// 0: 続行
	// 1: oの勝ち
	// 2: xの勝ち
	// 3: 引き分け
	return r[j];
	}

	// 盤面を表示
	void print_board(const Board &board)
	{
	for (int i = 0; i < cells; i++)
	{
	if (board[i] == 0)
	{
	cout << " ";
	}
	else if (board[i] == 1)
	{
	cout << "o";
	}
	else if (board[i] == 2)
	{
	cout << "x";
	}
	if (i % width == width - 1)
	{
	cout << endl;
	}
	else
	{
	cout << "\|";
	}
	}
	}

	// 盤面が有効かどうか
	bool isVaild(const Board &board)
	{
	int cnt = 0;
	for (auto &b : board)
	{
	if (b == 1)
	{
	cnt++;
	}
	if (b == 2)
	{
	cnt--;
	}
	}
	return cnt == 0;
	}

	int main(void)
	{
	// コンピュータは常にoとする
	// V[s] := 状態sにおける価値
	map<Board, double> V;
	map<Board, int> count;

	// モンテカルロ法
	const int N = 100000;
	double gamma = 0.9;
	for (int i = 0; i < N; i++)
	{
	Board board = {0, 0, 0, 0, 0, 0, 0, 0, 0};
	vector<Board> history;
	int turn = 0;
	while (true)
	{
	history.push_back(board);
	int act;
	act = rand() % cells;
	if (board[act] != 0)
	continue;
	board[act] = turn + 1;
	if (judge(board) != 0)
	break;
	turn = 1 - turn;
	}
	history.push_back(board);
	double r = reward(board);
	reverse(history.begin(), history.end());
	for (const auto &h : history)
	{
	V[h] += r;
	count[h]++;
	r *= gamma;
	}
	}

	for (auto &[prevS, prevV] : V)
	{
	prevV /= count[prevS];
	}

	// 最適方策mu
	map<Board, int> mu;
	for (auto &[prevS, prevV] : V)
	{
	if (!isVaild(prevS))
	continue;
	int act;
	double nextV = -1e9;
	for (int i = 0; i < cells; i++)
	{
	if (prevS[i] != 0)
	continue;
	Board nextS = prevS;
	nextS[i] = 1;
	cout << i << " : " << V[nextS] << endl;
	if (V[nextS] > nextV)
	{
	nextV = V[nextS];
	act = i;
	}
	}
	mu[prevS] = act;
	cout << "mu [" << endl;
	print_board(prevS);
	cout << "] = " << mu[prevS] << endl
	<< endl;
	}

	// 対戦開始
	Board board = {0, 0, 0, 0, 0, 0, 0, 0, 0};
	while (true)
	{
	// 先手はコンピュータ
	// 盤面を表示
	cout << endl;
	print_board(board);
	// 勝敗が決まっている場合
	if (judge(board) != 0)
	{
	break;
	}
	// コンピュータの手を決める
	int put = mu[board];
	board[put] = 1;
	// 勝敗が決まっている場合
	if (judge(board) != 0)
	{
	break;
	}
	// 盤面を表示
	cout << endl;
	print_board(board);
	// 人間の手を決める
	while (true)
	{
	cout << "put:";
	int put;
	cin >> put;
	if (board[put] == 0)
	{
	board[put] = 2;
	break;
	}
	}
	}
	// 勝敗を表示
	print_board(board);
	}