Forth 字句解析

http://eliza.newhaven.edu/lang/lectures.html "CS 636 / 536 Spring 2009 The Structure of Programming Languages Lecture Notes, Resources, and Homework" の Structure of Programming Languages – Lecture 2 (http://eliza.newhaven.edu/lang/attach/L2.pdf) が分かりやすかったので FORTH の字句解析を書いて見ている。

書いていて気づいたが境界で微妙に違っているっぽい。手動 Lex の限界か。しかし lex を書くにはオートマトンを実装し正規表現を実装し、、、と先が長いのだった。

項目 定義
Comment \ WS [ ∧\n ]* \n
Comment (WS [∧)]*)WS
StringLiteral .” WS [ ∧” ]* ” WS
Integer −? [0−9 ∧ WS]* WS
Word [∧WS]* W
(defun parse-token (in state buf)
  (flet ((mkstr (lst)
           (concatenate 'string (reverse lst))))
    (let ((c (read-char in nil 'eof nil)))
      (cond
        ((eql c 'eof)
         (case state
           (:in-word
            (cons :word (mkstr buf)))
           (:in-string
            (error "in-string expect quote, but found eof."))
           (:in-integer
            (cons :integer (parse-integer (mkstr buf))))
           (:in-comment
            (error "in-comment expect right paren, but found eof."))
           (t
            (cons state nil))))
        (t
         (case state
           (:in-word
            (case c
              (#\space
               (cons :word (mkstr buf)))
              (t
               (parse-token in :in-word (cons c buf)))))
           (:in-string
            (case c
              (#\"
               (cons :string (mkstr buf)))
              (t
               (parse-token in :in-string (cons c buf)))))
           (:in-integer
            (case c
              ((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
               (parse-token in :in-integer (cons c buf)))
              (#\space
               (cons :integer (parse-integer (mkstr buf))))
              (t
               (error "in-integer found invalid letter: ~a" c))))
           (:in-comment
            (case c
              (#\)
               (cons :comment (mkstr buf)))
              (t
               (parse-token in :in-comment (cons c buf)))))
           (t
            (case c
              (#\space
               (parse-token in nil buf))
              (#\(
               (parse-token in :in-comment buf))
              (#\.
               (if (char= (peek-char t in nil nil nil) #\")
                   (progn
                     (read-char in) ;; skip char
                     (parse-token in :in-string buf))
                   (parse-token in :in-word (cons c buf))))
              ((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
               (parse-token in :in-integer (cons c buf)))
              (t
               (parse-token in :in-word (cons c buf)))))))))))

(defun read-forth (string)
  "return a function of no arguments that returns (type . value)."
  (let ((in (make-string-input-stream string)))
    #'(lambda ()
        (parse-token in nil ()))))
FORTH> (setq *lex* (read-forth "-3 5 + ."))
#<CLOSURE (LAMBDA ()) {1002D2EE49}>
FORTH> (funcall *lex*)
(:INTEGER . -3)
FORTH> (funcall *lex*)
(:INTEGER . 5)
FORTH> (funcall *lex*)
(:WORD . "+")
FORTH> (funcall *lex*)
(:WORD . ".")
FORTH> (funcall *lex*)
(NIL)
;; 別の例
FORTH> (setq *lex* (read-forth ": hello ( -- ) .\" Hi, I'm here.\" ;"))
#<CLOSURE (LAMBDA ()) {1002D8A2D9}>
FORTH> (funcall *lex*)
(:WORD . ":")
FORTH> (funcall *lex*)
(:WORD . "hello")
FORTH> (funcall *lex*)
(:COMMENT . " -- ")
FORTH> (funcall *lex*)
(:STRING . " Hi, I'm here.")
FORTH> (funcall *lex*)
(:WORD . ";")
FORTH> (funcall *lex*)
(NIL)
FORTH>