After being inspired by this fantastic genetic algorithm demo, I decided to get learning.

For my first attempt at anything like this, I found an excellent guide [1] and programmed the example. The algorithm had the task of making an equation that equalled a goal number. So, for example, I might tell it “Make me an equation nine long that creates 13”, and it could come back with (1 * 9 + 2 + 6 - 4).

It took quite a while to get it working since the algorithm isn’t deterministic I found the code quite hard to debug. It all works well now though, and I’m quite pleased with the code, it’s clean, and I managed to optimise it to run a lot faster.

(defvar *genes* '((0 (0 0 0 0))
                  (1 (0 0 0 1))
                  (2 (0 0 1 0))
                  (3 (0 0 1 1))
                  (4 (0 1 0 0))
                  (5 (0 1 0 1))
                  (6 (0 1 1 0))
                  (7 (0 1 1 1))
                  (8 (1 0 0 0))
                  (9 (1 0 0 1))
                  (+ (1 0 1 0))
                  (- (1 0 1 1))
                  (* (1 1 1 1))
                  (/ (1 1 0 1))))

(defvar *mutation-rate* 0.001)
(defvar *crossover-rate* 0.7)

(defun random-item (list)
  "Take a list and return one item from it at random"
  (nth (random (length list)) list))

(defun generate-random-chromosome (size)
  (loop for i from 1 to size
     append (cadr (random-item *genes*))))

(defun decode-gene (gene)
  (car (rassoc gene *genes* :key #'car :test #'equal)))

(defun decode-chromosome (chromosome)
  (remove nil (loop for (a b c d) on chromosome by #'cddddr
                collect (decode-gene (list a b c d)))))

; FIXME x / 0 is silently dropped and operator precedence isn't correct
(defun find-answer (dc)
  "Find the answer given by a decoded-chromosome"
  (cond ((not (consp dc)) dc)
        ((< (list-length dc) 3) (car (remove-if-not 'numberp dc)))
        ((not (numberp (car dc))) (find-answer (cdr dc)))
        ((numberp (cadr dc)) (find-answer (cons (car dc) (cddr dc))))
        ((not (numberp (caddr dc)))
         (find-answer (append (subseq dc 0 2) (cdddr dc))))
        ((and (eql '/ (cadr dc))
              (eql 0 (caddr dc)))
         (find-answer (cons (car dc) (cddr dc))))
        (t
         (let ((simplified (eval (list (cadr dc) (car dc) (caddr dc)))))
           (if (consp (cdddr dc))
               (find-answer (cons simplified (cdddr dc)))
               simplified)))))

(defun count-flaws (dc)
  "Return the number of items in a chromosome which are semantically wrong e.g. '(* / +) would be 3"
  (let ((should-be-number t)
        (flaw-count 0))
    (loop for gene in dc
       if (not (eql (numberp gene) should-be-number))
       do (incf flaw-count)
       do (setf should-be-number (if should-be-number nil t)))
    flaw-count))

; FIXME -100 is given to all symbol chromosomes, probably a bad idea?
(defun fitness (chromosome goal)
  "Return a fitness based on the distance from the answer and the number of flaws"
  (let ((answer (find-answer (decode-chromosome chromosome))))
    (if (numberp answer)
        (let ((distance (abs (- goal answer)))
              (flaws (count-flaws (decode-chromosome chromosome))))
          (+ (if (eql distance 0) 0 (/ 1 distance))
             (if (eql flaws 0) 0 (/ 1 flaws))))
        -100)))

(defun pool-fitness (pool goal)
  (loop for chromosome in pool
     collect (fitness chromosome goal)))

(defun mutate-bit (bit)
  "Take a 1 or 0 and mutate it."
  (if (< (random 1.0) *mutation-rate*)
      (if (eql bit 0) 1 0)
      bit))

(defun mutate (chromosome)
  "Returns a possibly mutated version of chromosome"
  (loop for bit in chromosome
     collect (mutate-bit bit)))

(defun crossover (first second)
  "Returns a mix of two chromosomes (might be the same)"
  (if (< (random 1.0) *crossover-rate*)
      (let ((point (+ (random (- (length first) 1)) 1)))
        (append (subseq first 0 point)
                (subseq second point)))
      (random-item (list first second))))

(defun make-roulette-wheel (fitness)
  (let* ((total-fitness (reduce #'+ fitness))
         (total-probability 0.0))
    (append (loop for x in fitness
               collect total-probability
               do (incf total-probability (/ x total-fitness))) '(1.0))))

(defun spin-the-wheel (pool roulette-wheel)
  (let ((ball (random 1.0)))
    (declare (type float ball))
    (loop for chromosome in pool
       for (position next-pos) of-type (float float) on roulette-wheel
       if (<= ball next-pos)
       do (return chromosome))))

(defun re-populate (pool fitness)
  (let ((roulette-wheel (make-roulette-wheel fitness)))
    (loop for i from 1 to (length pool)
       collect (mutate (crossover (spin-the-wheel pool roulette-wheel)
                                  (spin-the-wheel pool roulette-wheel))))))

(defun create-initial-pool (pool-size chromosome-size)
  (loop for i from 1 to pool-size
     collect (generate-random-chromosome chromosome-size)))

(defun find-best-chromosome (pool fitness)
  "Returns the fittest chromosome in the pool."
  (let ((best-score) (best-chromosome))
    (loop for chromosome in pool
       for score in fitness
       do (when (or (equalp score 0) (not best-score) (> score best-score))
            (setf best-score score)
            (setf best-chromosome chromosome)))
    (values best-chromosome best-score)))

(defun there-is-a-winner (pool fitness)
  "If any of the chromosomes in the pool have the answer return the first one that does."
  (let ((winner (position 0 fitness)))
    (if winner (nth winner pool))))

(defun display-turn (pool fitness turn)
  (multiple-value-bind (chromosome score) (find-best-chromosome pool fitness)
    (let ((avg-fitness (/ (reduce #'+ fitness) (+ 1 (length pool))))
          (*print-pretty* nil))
      (format t "~a - Average Fitness: ~F Best: ~w (fitness ~F)~%" turn avg-fitness (decode-chromosome chromosome) score))))

(defun genetic-algorithm (goal pop-size chromosome-size tries)
  (let ((pool (create-initial-pool pop-size chromosome-size))
        (fitness))
    (loop for i from 1 to tries
       do (setf fitness (pool-fitness pool goal))
       if (there-is-a-winner pool fitness) return it
       do (display-turn pool fitness i)
       do (setf pool (re-populate pool fitness))
       finally (return (find-best-chromosome pool fitness)))))

(defun skynet (&key goal pop-size chromosome-size tries mutation-rate crossover-rate)
  (let ((best (genetic-algorithm goal pop-size chromosome-size tries))
        (*print-pretty* nil)
        (*mutation-rate* mutation-rate)
        (*crossover-rate* crossover-rate))
    (format t "~w gave ~F (Aim was ~A)" (decode-chromosome best) (find-answer (decode-chromosome best)) goal)))

; Sensible parameter values according to t'internet:
;   - 0.1 to 0.001 for mutation rate
;   - 0.7 to 0.9 for crossover rate

(skynet :goal 13
        :pop-size 100
        :chromosome-size 9
        :tries 200
        :mutation-rate 0.001
        :crossover-rate 0.7)

As always feel free to use the code, any feedback would be useful. Next up I’m going to apply GA to a more interesting problem.

Cheers, Dave.

Edit: I optimised the code by using the SBCL profiler to see what was slowing things down. It turned out that the spin-the-wheel function was taking about 92% of the time with the functions it called. Firstly I examined the function and realised I could reduce the comparisons, that reduced it to about 87%. Next, the guys in #lisp explained that I should declare the variables in that function as floats so that SBCL could optimise things for me. When I tried that it reduced the time spent in spin-the-wheel down to around 36%!

I found it surprising that the ‘roulette wheel spinning’ function that we use to select chromosomes for crossover was the most computationally intensive part of the code. I’m curious to know if that’s normal. Also if you have any idea how I could optimise more I would be grateful, here is the profiler output.

[1] - According to the comments the guide is teaching very out of date methods, possibly consider a different source of information!


Edit: Before I replaced WordPress there were some excellent comments:

Foo said

That tutorial is really quite astonishingly crappy. So what I'm going to say is obviously exaggeration but is close to the truth:

(1) NO ONE has done roulette selection in the last ten years. Tournament selection, truncation selection, etc., yes. Roulette selection? Laughable.

(2) NO ONE has done binary encodings of non-binary problems in the last ten years. You use an encoding which matches your actual problem.

(3) NO ONE does one-point crossover any more. A parametrisation uniform crossover. Or at LEAST, heaven forbid, a two-point crossover.

(4) NO ONE does genetic programming (the little encoding of math the guy’s showing) in a fixed-length string. It borders on the hilariously ignorant.

Hey, it’s all fine if you’re doing this for the first time, and you go pick up a 1970 book on genetic algorithms and publish a web page on how you coded it up. But to claim that you’re doing a TUTORIAL? This guy should have been taken out behind the shed and shot.

Dominic Said

You might find this tutorial useful: http://www.obitko.com/tutorials/genetic-algorithms/index.php

Also, the “field guide to genetic programming” by Poli, Langdon and McPhee is very up-to-date, and can be downloaded for free: http://www.lulu.com/content/2167025