Metaxal/prisoner-dilemma-read-code.rkt

## prisoner-dilemma-read-code.rkt
#lang racket

(require racket/sandbox)

#|
Self-link: http://gist.github.com/Metaxal/5723825

Original idea by Eliezer Yudkowski and Alex Mennen:
http://lesswrong.com/lw/7f2/prisoners_dilemma_tournament_results/4ru9
http://lesswrong.com/lw/hmx/prisoners_dilemma_with_visible_source_code/

Other resources:
http://intelligence.org/files/RobustCooperation.pdf
http://lesswrong.com/lw/hmw/robust_cooperation_in_the_prisoners_dilemma/

|#

;=========================;
;=== Sandbox evaluator ===;
;=========================;

(define time-limit 2 #;10) ; seconds
(define memory-limit 100) ; MB

(define (make-my-eval)
  (parameterize ([sandbox-path-permissions '(#;(read "/"))] ; no access to
                 [sandbox-eval-limits (list time-limit memory-limit)] ; per eval call
                 ;[sandbox-output current-output-port] ; Display prints, for debugging
                 [sandbox-output 'string]
                 )
    (make-evaluator 'racket ; or 'racket/base for smaller libs
                    '(define x 3))))

(define main-eval (make-my-eval))
(define in-game-eval (make-my-eval))

#;(module+ test
  (require rackunit)

  ; no read access
  (check-exn exn:fail? (λ()(my-eval '(file->string "/tmp/test.txt"))))
  ; time limit
  (check-exn exn:fail? (λ()(my-eval `(sleep ,(* 2 time-limit)))))
  )

;==========================;
;=== Prisoner's dilemma ===;
;==========================;

(define cooperate 0)
(define defect    1)
(define other     2)

(define (number->choice n)
  (vector-ref #(C D Other) n))

(define score
  ; Original matrix:
  ;    C     D     Other
#;#(#[(2 2) (0 3) (0 2)] ; C
    #[(3 0) (1 1) (1 0)] ; D
    #[(2 0) (0 1) (0 0)]); Other
  ; Better matrix:
  ;    C     D     Other
#;#(#[(2 2) (0 3) (2 0)] ; C
    #[(3 0) (1 1) (3 0)] ; D
    #[(0 2) (0 3) (0 0)] ; Other
    )
#;#(#[(4 4) (0 7) (4 0)] ; C
    #[(7 0) (1 1) (7 0)] ; D
    #[(0 4) (0 7) (0 0)] ; Other
    )
  ; Normal game, years served (cost matrix, lower is better)
  #(#[(1 1) (3 0) (1 1)] ; C
    #[(0 3) (2 2) (0 3)] ; D
    #[(1 1) (0 3) (3 3)] ; Other
    )
  )

(define (matrix-ref mat row col)
  (vector-ref (vector-ref mat row) col))

;; hist is the game history of choices made,
;; from the point of view of p1.
;; The last game played is the first in the list.
(define (one-game-player p1 p2 hist)
  #;(collect-garbage) ; WARNING: Takes a lot of time!
  ; Or maybe collect garbage every N runs, and give one eval space to each player?
  (define res
    (with-handlers ([values (λ(e)(printf "Error: ~a\n" (exn-message e))
                              other)])
      (let ([res (main-eval `(,p1 ',p1 ',p2 ',hist ,in-game-eval))])
        (if (or (eq? res cooperate) (eq? res defect))
            res
            other))))
  (define str (get-output main-eval))
  (unless (equal? str "")
    (display "Player says: ")
    (displayln str))
  res)

(define (one-game p1 p2 hist)
  (define p1-res (one-game-player p1 p2 hist))
  (define p2-res (one-game-player p2 p1 (map reverse hist)))
  ;(displayln (list 'choices: (map number->choice (list p1-res p2-res))))
  (values (list p1-res p2-res)
          (matrix-ref score p1-res p2-res)))

(define (n-games n p1 p2)
  (define-values (hist p1-score p2-score)
    (for/fold ([hist '()] [p1-score 0] [p2-score 0])
      ([i n])
      (define-values (choices scores)
        (one-game p1 p2 hist))
      (values (cons choices hist)
              (+ p1-score (first scores))
              (+ p2-score (second scores)))))
  (values hist p1-score p2-score))

(define (vector-update! vec i updater)
  (vector-set! vec i (updater (vector-ref vec i))))

(define (all-games n players)
  (define nplayers (vector-length players))
  (define scores (make-vector nplayers))
  (define triangle
    (for/list ([i nplayers #;(sub1 nplayers)])
      (for/list ([j (in-range i #;(add1 i) nplayers)])
        (define-values (h p1-sc p2-sc)
          (n-games n
                   (vector-ref players i)
                   (vector-ref players j)))
        (vector-update! scores i (λ(s)(+ s p1-sc)))
        (vector-update! scores j (λ(s)(+ s p2-sc)))
        (list p1-sc p2-sc))))
  (values scores triangle))

(define-syntax-rule (all-games/disp n (players ...))
  (begin
    (define vplayers (vector players ...))
    (define splayers '(players ...))
    (define nplayers (vector-length vplayers))
    (define-values (scores triangle)
      (all-games n vplayers))
    (define left-width 15)
    (define cell-width 9)
    (display (~a "" #:width left-width))
    (for ([p splayers #;(rest splayers)])
      (display (string-append
                (~s p #:width cell-width))))
    (newline)
    #;(for ([i (sub1 nplayers)])
      (display (~s (list-ref splayers i) #:width left-width))
      (display (~a ""  #:width (* cell-width i)))
      (for ([j (in-range (add1 i) nplayers)])
        (display (~s (list-ref (list-ref triangle i) (- j i 1)) #:width 9)))
      (newline))
    (for ([i nplayers])
      (display (~s (list-ref splayers i) #:width left-width))
      (display (~a ""  #:width (* cell-width i)))
      (for ([j (in-range i nplayers)])
        (display (~s (list-ref (list-ref triangle i) (- j i)) #:width 9)))
      (newline))
    (newline)
    (for ([i nplayers] [s splayers])
      (printf "~a:~a\n"
              (~a s #:width left-width)
              (~s (vector-ref scores i))))))

;======================;
;=== Some prisoners ===;
;======================;

; Warning: When reading some prisoner's code, we should ensure
; that it begins with (lambda... , and not (let ...

(define prisC `(λ _ ,cooperate))
(define prisD `(λ _ ,defect))
(define prisRan `(λ _ (random 2)))
(define pris1/2 `(λ(me opp hist eval)
                   (if (even? (length hist))
                       0
                       1)))
(define prisTitForTat `(λ(me opp hist eval)
                         ;(displayln hist)
                         (cond [(empty? hist) 0]
                               [(= 0 (cadar hist)) 0]
                               [else 1])))
; Warning: Nested calls!
; Maybe we should turn prevent using eval in the code?
; Or call (make-eval) ?
(define (prisEval C) `(λ(me opp hist eval)
                        (if (equal? me opp)
                            ,C ; cooperate with myself
                            (case (eval `(,opp ',opp ',me ',hist ,eval))
                              [(0) ,C]
                              [(1) 1]
                              [else 1]))))
(define prisEvalC (prisEval cooperate))
(define prisEvalD (prisEval defect)) ; useless, is equivalent to prisD

; The better opponent should test all future histories and take the action
; that maximizes expectation.

;(all-games 10 (vector prisC prisD prisRan pris1/2 prisTitForTat prisEvalC prisEvalD))
(all-games/disp 40 (prisC prisD prisRan pris1/2 prisTitForTat #;prisTitForTat prisEvalC #;prisEvalD))
#;(n-games 10 prisTitForTat prisEvalD)
#;(n-games 10 prisTitForTat prisEvalC)
	#lang racket

	(require racket/sandbox)

	#\|
	Self-link: http://gist.github.com/Metaxal/5723825

	Original idea by Eliezer Yudkowski and Alex Mennen:
	http://lesswrong.com/lw/7f2/prisoners_dilemma_tournament_results/4ru9
	http://lesswrong.com/lw/hmx/prisoners_dilemma_with_visible_source_code/

	Other resources:
	http://intelligence.org/files/RobustCooperation.pdf
	http://lesswrong.com/lw/hmw/robust_cooperation_in_the_prisoners_dilemma/

	\|#

	;=========================;
	;=== Sandbox evaluator ===;
	;=========================;

	(define time-limit 2 #;10) ; seconds
	(define memory-limit 100) ; MB

	(define (make-my-eval)
	(parameterize ([sandbox-path-permissions '(#;(read "/"))] ; no access to
	[sandbox-eval-limits (list time-limit memory-limit)] ; per eval call
	;[sandbox-output current-output-port] ; Display prints, for debugging
	[sandbox-output 'string]
	)
	(make-evaluator 'racket ; or 'racket/base for smaller libs
	'(define x 3))))

	(define main-eval (make-my-eval))
	(define in-game-eval (make-my-eval))

	#;(module+ test
	(require rackunit)

	; no read access
	(check-exn exn:fail? (λ()(my-eval '(file->string "/tmp/test.txt"))))
	; time limit
	(check-exn exn:fail? (λ()(my-eval `(sleep ,(* 2 time-limit)))))
	)

	;==========================;
	;=== Prisoner's dilemma ===;
	;==========================;

	(define cooperate 0)
	(define defect 1)
	(define other 2)

	(define (number->choice n)
	(vector-ref #(C D Other) n))

	(define score
	; Original matrix:
	; C D Other
	#;#(#[(2 2) (0 3) (0 2)] ; C
	#[(3 0) (1 1) (1 0)] ; D
	#[(2 0) (0 1) (0 0)]); Other
	; Better matrix:
	; C D Other
	#;#(#[(2 2) (0 3) (2 0)] ; C
	#[(3 0) (1 1) (3 0)] ; D
	#[(0 2) (0 3) (0 0)] ; Other
	)
	#;#(#[(4 4) (0 7) (4 0)] ; C
	#[(7 0) (1 1) (7 0)] ; D
	#[(0 4) (0 7) (0 0)] ; Other
	)
	; Normal game, years served (cost matrix, lower is better)
	#(#[(1 1) (3 0) (1 1)] ; C
	#[(0 3) (2 2) (0 3)] ; D
	#[(1 1) (0 3) (3 3)] ; Other
	)
	)

	(define (matrix-ref mat row col)
	(vector-ref (vector-ref mat row) col))

	;; hist is the game history of choices made,
	;; from the point of view of p1.
	;; The last game played is the first in the list.
	(define (one-game-player p1 p2 hist)
	#;(collect-garbage) ; WARNING: Takes a lot of time!
	; Or maybe collect garbage every N runs, and give one eval space to each player?
	(define res
	(with-handlers ([values (λ(e)(printf "Error: ~a\n" (exn-message e))
	other)])
	(let ([res (main-eval `(,p1 ',p1 ',p2 ',hist ,in-game-eval))])
	(if (or (eq? res cooperate) (eq? res defect))
	res
	other))))
	(define str (get-output main-eval))
	(unless (equal? str "")
	(display "Player says: ")
	(displayln str))
	res)

	(define (one-game p1 p2 hist)
	(define p1-res (one-game-player p1 p2 hist))
	(define p2-res (one-game-player p2 p1 (map reverse hist)))
	;(displayln (list 'choices: (map number->choice (list p1-res p2-res))))
	(values (list p1-res p2-res)
	(matrix-ref score p1-res p2-res)))

	(define (n-games n p1 p2)
	(define-values (hist p1-score p2-score)
	(for/fold ([hist '()] [p1-score 0] [p2-score 0])
	([i n])
	(define-values (choices scores)
	(one-game p1 p2 hist))
	(values (cons choices hist)
	(+ p1-score (first scores))
	(+ p2-score (second scores)))))
	(values hist p1-score p2-score))

	(define (vector-update! vec i updater)
	(vector-set! vec i (updater (vector-ref vec i))))

	(define (all-games n players)
	(define nplayers (vector-length players))
	(define scores (make-vector nplayers))
	(define triangle
	(for/list ([i nplayers #;(sub1 nplayers)])
	(for/list ([j (in-range i #;(add1 i) nplayers)])
	(define-values (h p1-sc p2-sc)
	(n-games n
	(vector-ref players i)
	(vector-ref players j)))
	(vector-update! scores i (λ(s)(+ s p1-sc)))
	(vector-update! scores j (λ(s)(+ s p2-sc)))
	(list p1-sc p2-sc))))
	(values scores triangle))

	(define-syntax-rule (all-games/disp n (players ...))
	(begin
	(define vplayers (vector players ...))
	(define splayers '(players ...))
	(define nplayers (vector-length vplayers))
	(define-values (scores triangle)
	(all-games n vplayers))
	(define left-width 15)
	(define cell-width 9)
	(display (~a "" #:width left-width))
	(for ([p splayers #;(rest splayers)])
	(display (string-append
	(~s p #:width cell-width))))
	(newline)
	#;(for ([i (sub1 nplayers)])
	(display (~s (list-ref splayers i) #:width left-width))
	(display (~a "" #:width (* cell-width i)))
	(for ([j (in-range (add1 i) nplayers)])
	(display (~s (list-ref (list-ref triangle i) (- j i 1)) #:width 9)))
	(newline))
	(for ([i nplayers])
	(display (~s (list-ref splayers i) #:width left-width))
	(display (~a "" #:width (* cell-width i)))
	(for ([j (in-range i nplayers)])
	(display (~s (list-ref (list-ref triangle i) (- j i)) #:width 9)))
	(newline))
	(newline)
	(for ([i nplayers] [s splayers])
	(printf "~a:~a\n"
	(~a s #:width left-width)
	(~s (vector-ref scores i))))))

	;======================;
	;=== Some prisoners ===;
	;======================;

	; Warning: When reading some prisoner's code, we should ensure
	; that it begins with (lambda... , and not (let ...

	(define prisC `(λ _ ,cooperate))
	(define prisD `(λ _ ,defect))
	(define prisRan `(λ _ (random 2)))
	(define pris1/2 `(λ(me opp hist eval)
	(if (even? (length hist))
	0
	1)))
	(define prisTitForTat `(λ(me opp hist eval)
	;(displayln hist)
	(cond [(empty? hist) 0]
	[(= 0 (cadar hist)) 0]
	[else 1])))
	; Warning: Nested calls!
	; Maybe we should turn prevent using eval in the code?
	; Or call (make-eval) ?
	(define (prisEval C) `(λ(me opp hist eval)
	(if (equal? me opp)
	,C ; cooperate with myself
	(case (eval `(,opp ',opp ',me ',hist ,eval))
	[(0) ,C]
	[(1) 1]
	[else 1]))))
	(define prisEvalC (prisEval cooperate))
	(define prisEvalD (prisEval defect)) ; useless, is equivalent to prisD

	; The better opponent should test all future histories and take the action
	; that maximizes expectation.

	;(all-games 10 (vector prisC prisD prisRan pris1/2 prisTitForTat prisEvalC prisEvalD))
	(all-games/disp 40 (prisC prisD prisRan pris1/2 prisTitForTat #;prisTitForTat prisEvalC #;prisEvalD))
	#;(n-games 10 prisTitForTat prisEvalD)
	#;(n-games 10 prisTitForTat prisEvalC)