;;
;; octi-strategies.lisp
;;
;;----------------------------------------------
;;   This software is licensed; see the file
;;   LICENSE for details.
;;----------------------------------------------


(in-package "USER")

#+Release
(eval-when (compile)
  (declaim (optimize (speed 3) (safety 1) (space 0) (debug 0))))

#-Release
(eval-when (compile)
  (declaim (optimize safety debug)))

;; final-value utilities

(defconstant losing-value most-negative-fixnum)
(defconstant winning-value most-positive-fixnum)

(defun final-state-p (board)
  (some
     #'(lambda (player)
         (player-won-p board player))
   (board-players board)))

(defun final-value (board player)
  "Returns WINNING-VALUE if PLAYER has won on BOARD,
   LOSING-VALUE if player has lost, 0 otherwise."
  (cond
   ((player-won-p board player)
    winning-value)
   ((player-won-p board (opponent player board))
    losing-value)
   (t 0)))

(defun num-octi-squares-occupied (board player)
  (loop
    for square in (octi-squares-for-player
                   (opponent player board))
    counting
    (square-occupied board square player)))

(defun difference-fn (eval-fn)
  #'(lambda (board player)
      (- (funcall eval-fn board player)
         (funcall eval-fn board (opponent player board)))))

(defun material-eval-fn (board player)
  (prongs-for-player board player))

(defun pods-eval-fn (board player)
  (pods-for-player board player))

(defun captured-pods-eval-fn (board player)
  (declare (ignore board))
  (- (player-max-pods player)
     (player-captured-pods player)))
  
(defun mobility-eval-fn (board player)
  (length (legal-moves player board)))

(defun jumps-eval-fn (board player)
  (length (legal-player-jump-moves player board)))

(defun leaps-eval-fn (board player)
  (length (legal-player-leap-moves player board)))

(defun octi-square-distance (board player)
  (let ((total-distance 0)
        (num-pieces 0))
    (dolist (square all-squares)
      (when (square-occupied board square player)
        (let ((num (stack-num-pieces (bref board square))))
          (incf num-pieces num)
          (incf total-distance
                (* num (min-distance
                        square
                        (octi-squares-for-player 
                         (opponent player board))))))))
    (if (= num-pieces 0)
        most-positive-fixnum
      (/ total-distance num-pieces))))

(defun neg-octi-square-distance (board player)
  (- (octi-square-distance board player)))

(defun bounding-box-size (board player)
  "Returns the area of the smallest rectangle that will enclose
   all of PLAYER's pieces on BOARD."
  (loop
    with occupied-squares = (player-squares-occupied board player)
    for square in occupied-squares
    minimize (square-x square) into min-x of-type fixnum
    minimize (square-y square) into min-y of-type fixnum
    maximize (square-x square) into max-x of-type fixnum
    maximize (square-y square) into max-y of-type fixnum
    finally (return (* (1+ (- max-x min-x))
                       (1+ (- max-y min-y))))))

          
(defun average-prong-direction (board player)
  (let ((dir-accum 0)
        (num-prongs 0))
    (dolist (square all-squares)
      (when (square-occupied board square player)
        (do-stack (piece (bref board square))
           (dotimes (i (length (piece-prongs piece)))
             (when (piece-has-prong-p piece i)
               (incf dir-accum i)
               (incf num-prongs))))))
    (/ dir-accum num-prongs)))

;; alpha-beta

(defparameter *ab-trace-level* 1)

(defparameter *ab-new-moves-hook* (new-hook)
  "Hook that is run when the alpha-beta searcher has
   generated new moves to visit.  Each function is called
   with three arguments: the board (board-p), the player
   (player-p), and the list of moves to expand (listp).")

(defparameter *ab-pre-visit-hook* (new-hook)
  "Hook that is run when the alpha-beta searcher is
   about to visit a move.  Each function is called with
   three arguments: the board (board-p), the player (player-p),
   and the move the expand (move-p).")

(defparameter *ab-new-best-move-hook* (new-hook)
  "Hook that is run when the alpha-beta searcher has 
   just chosen a new move as best.  Each function is
   called with four arguments: the board (board-p),
   the player (player-p), the move (move-p), and the
   move's value (numberp).")

(defparameter *ab-search-done-hook* (new-hook)
  "Hook to call when the alpha-beta searcher has completely finished.
   Called with no arguments.")

(defstruct ab-statistics
  (num-moves 0)
  (num-visited 0)
  (num-cutoffs 0)
  (time 0)
  (best-val 0))
  
(defparameter *ab-last-stats* (make-ab-statistics))

(defun ab-statistics-new-moves (board player moves)
  (declare (ignore board player))
  (incf (ab-statistics-num-moves *ab-last-stats*)
        (length moves)))
          
(defun ab-statistics-expand-move (board player)
  (declare (ignore board player))
  (incf (ab-statistics-num-visited *ab-last-stats*)))
  
(defun ab-statistics-new-best-move (board player move val)
  (declare (ignore board player move))
  (setf (ab-statistics-best-val *ab-last-stats*) val))
  
(add-hook *ab-new-moves-hook* 'ab-statistics-new-moves)
(add-hook *ab-pre-visit-hook* 'ab-statistics-expand-move)
(add-hook *ab-new-best-move-hook* 'ab-statistics-new-best-move)

(defun print-ab-statistics ()
  (print *ab-last-stats*))

(defmacro showing-ab-statistics (&body body)
  `(progn
     (setq *ab-last-stats* (make-ab-statistics
                            :time (get-universal-time)))
     (multiple-value-prog1 
         (progn ,@body)
       (setf 
        (ab-statistics-time *ab-last-stats*)
        (- (get-universal-time)
           (ab-statistics-time *ab-last-stats*))))))

(defvar *ab-tree* nil
  "The search tree from the last alpha-beta run.")

(defstruct ab-node 
  parent
  children
  move
  player
  value
  board)

(defun ab-explorer-console (player board achievable cutoff ply eval-fn)
  (do
      ((done nil))
      (done)
    (print-board board)
    (format t "~&~A to move.  ~A plies left." (player-name player) ply)
    (format t "~&Achievable: ~A   Cuttoff: ~A" achievable cutoff)
    (format t "~&Possible moves: ")
    (let ((node-list (ab-explorer player board achievable cutoff ply eval-fn)))
      (loop
        for node in node-list
        for num = 1 then (1+ num)
        do
        (progn
          (format t "~&~a. " num)
          (write-octi-move (ab-node-move node) board t)
          (format t " V: ~a" (ab-node-value node))))
      (format t "~&Choice (:QUIT to quit): ")
      (let ((choice (read)))
        (cond 
         ((eq choice :quit) (throw :explorer-quit nil))
         ((eq choice :eval) (print (eval (read))))
         ((eq choice :up) (setq done t))
         ((numberp choice)
          (let* ((ach (apply #'max
                             (mapcar #'ab-node-value
                               (subseq node-list 0 choice))))
                 (move (ab-node-move (nth (1- choice) node-list)))
                 (new-board (do-move move player (copy-board board))))
            (ab-explorer-console (opponent player board)
                                 new-board 
                                 (- cutoff) (- ach)
                                 (1- ply) eval-fn))))))))
                   
(defun ab-explorer (player board achievable cutoff ply eval-fn)
  "Dynamically returns all the immediate child nodes of the
   given position, returned by an alpha-beta search."
  (when (> ply 0)
    (sort 
     (loop for move in (legal-moves player board)
         collect
           (let* ((new-player (copy-player player))
                  (new-board (do-move move
                                      new-player
                                      (copy-board board)))
                  (val (- (alpha-beta (opponent new-player new-board)
                                      new-board
                                      (- cutoff) (- achievable)
                                      (- ply 1) eval-fn))))
             (when (> val achievable)
               (setf achievable val))
             (make-ab-node :player new-player 
                           :board new-board
                           :move move 
                           :value val))
         until
           (>= achievable cutoff))
     #'> :key #'ab-node-value)))


#+no-need
(defmethod alpha-beta :after (player board achievable cutoff ply eval-fn)
    (let ((bad-player (some 
                     #'(lambda (player)
                         (> (+ (abs (player-captured-pods player))
                               (abs (player-reserve-pods player))
                               (pods-for-player board player))
                            7))
                     (board-players board))))
    (when bad-player
      (break "Player ~a exceeds max pods!" bad-player))))

(defun alpha-beta (player board achievable cutoff ply eval-fn)
  "Find the best move, for PLAYER, according to EVAL-FN,
   searching PLY levels deep and backing up values, using
   cutoffs whenever possible."
  (run-hook *ab-pre-visit-hook* board player)
  (cond
   ((= ply 0) (funcall eval-fn board player))
   ((final-state-p board) (final-value board player))
   (t
    (let* ((moves (legal-moves player board))
           (best-move (first moves)))
      (run-hook *ab-new-moves-hook* board player moves)
      (loop for rest-moves on moves
          for move = (car rest-moves) do
            #|
            (let* ((board2 (copy-board board))
                   (player2 (find-player board2
                                         (player-name player)))
                   val)
              (setq board2 (do-move move player2 board2)
                    val
                    (- (alpha-beta
                        (opponent player2 board2) board2
                        (- cutoff) (- achievable) 
                        (- ply 1) eval-fn)))
              (when (> val achievable)
                (setf achievable val)
                (setf best-move move))
              (undo-move move player2 board2))
|#
            (let (#+debug (test (copy-board board)) val)
              #+debug (progn
                        (write-octi-move move board *standard-output*)
                        (terpri))
              (setq board (do-move move player board))
              (setq val
                    (- (alpha-beta
                        (opponent player board) board
                        (- cutoff) (- achievable) 
                        (- ply 1) eval-fn)))
              (when (> val achievable)
                (setq achievable val)
                (setq best-move move))
              (undo-move move player board)
              #+debug (progn
                        (princ "undo ")
                        (write-octi-move move board *standard-output*)
                        (terpri)
                        (unless (board-equal board test)
                          (error "!!!"))))
            until (>= achievable cutoff)
            finally (unless rest-moves
                      (incf (ab-statistics-num-cutoffs *ab-last-stats*))))
      (values achievable best-move)))))

(defun alpha-beta-searcher (depth eval-fn)
  #'(lambda (player board)
      (multiple-value-bind 
            (score move)
          (showing-ab-statistics
             (alpha-beta player board losing-value winning-value
                         depth eval-fn))
        (declare (ignore score))
        (run-hook *ab-search-done-hook*)
        move)))

(defun alpha-beta-explorer (depth eval-fn)
  #'(lambda (player board)
      (ab-explorer-console player board losing-value winning-value
                           depth eval-fn)))

	
;; strategies

(defvar *boards* ()
  "A place to save boards for debugging.")

(defun human (player board)
  (format t "~&~c Move (type 'h' for help): " (player-name player))
  (handler-case 
      (let ((ch (peek-char)))
        (case ch 
          (#\h (help-text)
           (human player board))
          (#\q (read-line)
               (throw :octi-quit nil))
          (#\e (read-char) ;; discard 'e'
           (print (eval (read)))
           (human player board))
          (#\d (read-line)
           (octi-debug player board)
           (human player board))
          (#\s (read-line) ;; discard 's'
           (push board *boards*)
           (human player board))
          (#\v (read-char) ;; discard 'v'
           (view-square board (read))
           (human player board))
          (#\u (read-line)
               (signal
                (make-condition 'undo)))
          (t
           (let* ((string (read-line))
                  (stream (make-string-input-stream string)))
             (read-octi-move stream board player)))))
    (error (c) 
      (format t "~&Error: ~a" c)
      (human player board))))
                                   

(defun get-piece (board square number)
  (and (integerp number)
       (squarep square)
       (nth number
	    (stack-all-pieces
	     (bref board square)))))

(defun help-text ()
  (format t "~2&Enter a move in Octi notation.")
  (format t "~&Or enter:")
  (format t "~&  q to quit")
  (format t "~&  s to save the current board to the global *BOARDS* list")
  (format t "~&  e <form> to read-eval-print <form> for debugging")
  (format t "~&  d to invoke the AI debugger")
  (format t "~&  v <square> to view all pieces on square")
  (format t "~&  h for this help screen~2&"))

(defun view-square (board square &optional (stream *standard-output*))
  (cond
   ((and (squarep square)
         (square-valid board square))
    (format stream "~&Pods on square ~A" square)
    (dolist (piece (stack-all-pieces
                    (bref board square)))
      (print piece)))
   (t
    (format stream "~&Invalid square ~a" square))))
	 

(defun random-strategy (player board)
  (random-elt (legal-moves player board)))

#|

;;  Below is a version of alpha-beta that explicitly 
;;  maintains a search tree (may be needed for
;;  certain learning algorithms).

(defvar *ab-tree* nil
  "The search tree from the last alpha-beta run.")

(defstruct ab-node 
  parent
  children
  move
  player
  value
  board)

  
(defun alpha-beta (player board achievable cutoff ply eval-fn
                   parent)
  "Find the best move, for PLAYER, according to EVAL-FN,
   searching PLY levels deep and backing up values, using
   cutoffs whenever possible."
  (run-hook *ab-pre-visit-hook* board player)
  (cond
   ((= ply 0) (funcall eval-fn board player))
   ((final-state-p board) (final-value board player))
   (t
    (let* ((moves (legal-moves player board))
           (best-move (first moves)))
      (run-hook *ab-new-moves-hook* board player moves)
      (loop for rest-moves on moves
            for move = (car rest-moves) do
            (let* ((new-player (copy-player player))
                   (board2 (do-move move 
                                    new-player
                                    (copy-board board)))
                   (node (make-ab-node :parent parent
                                       :player new-player
                                       :board board2
                                       :move move)))
              (push node (ab-node-children parent))
              (let ((val
                     (- (alpha-beta
                         (opponent new-player board2) board2
                         (- cutoff) (- achievable) 
                         (- ply 1) eval-fn node))))
                (when (> val achievable)
                  (setf (ab-node-value parent) val)
                  (setf achievable val)
                  (setf best-move move))))
          until (>= achievable cutoff)
          finally (unless rest-moves
                    (incf (ab-statistics-num-cutoffs *ab-last-stats*))))
      (values achievable best-move)))))


;; And this was the version before I added the undo.  Putting this down here because
;;  I haven't put htese sources undo cvs yet.

(defun alpha-beta (player board achievable cutoff ply eval-fn)
  "Find the best move, for PLAYER, according to EVAL-FN,
   searching PLY levels deep and backing up values, using
   cutoffs whenever possible."
  (run-hook *ab-pre-visit-hook* board player)
  (cond
   ((= ply 0) (funcall eval-fn board player))
   ((final-state-p board) (final-value board player))
   (t
    (let* ((moves (legal-moves player board))
           (best-move (first moves)))
      (run-hook *ab-new-moves-hook* board player moves)
      (loop for rest-moves on moves
            for move = (car rest-moves) do
            (let* (( (copy-board board))
                   (new-player (find-player new-board
                                            (player-name player)))
                   (board2 (do-move move 
                                    new-player
                                    new-board))
                   (val
                    (- (alpha-beta
                        (opponent new-player board2) board2
                        (- cutoff) (- achievable) 
                        (- ply 1) eval-fn))))
                (when (> val achievable)
                  (setf achievable val)
                  (setf best-move move)))
          until (>= achievable cutoff)
          finally (unless rest-moves
                    (incf (ab-statistics-num-cutoffs *ab-last-stats*))))
      (values achievable best-move)))))
|#