Forth 字句解析
http://eliza.newhaven.edu/lang/lectures.html "CS 636 / 536 Spring 2009 The Structure of Programming Languages Lecture Notes, Resources, and Homework" の Structure of Programming Languages – Lecture 2 (http://eliza.newhaven.edu/lang/attach/L2.pdf) が分かりやすかったので FORTH の字句解析を書いて見ている。
書いていて気づいたが境界で微妙に違っているっぽい。手動 Lex の限界か。しかし lex を書くにはオートマトンを実装し正規表現を実装し、、、と先が長いのだった。
項目 | 定義 |
---|---|
Comment | \ WS [ ∧\n ]* \n |
Comment | (WS [∧)]*)WS |
StringLiteral | .” WS [ ∧” ]* ” WS |
Integer | −? [0−9 ∧ WS]* WS |
Word | [∧WS]* W |
(defun parse-token (in state buf) (flet ((mkstr (lst) (concatenate 'string (reverse lst)))) (let ((c (read-char in nil 'eof nil))) (cond ((eql c 'eof) (case state (:in-word (cons :word (mkstr buf))) (:in-string (error "in-string expect quote, but found eof.")) (:in-integer (cons :integer (parse-integer (mkstr buf)))) (:in-comment (error "in-comment expect right paren, but found eof.")) (t (cons state nil)))) (t (case state (:in-word (case c (#\space (cons :word (mkstr buf))) (t (parse-token in :in-word (cons c buf))))) (:in-string (case c (#\" (cons :string (mkstr buf))) (t (parse-token in :in-string (cons c buf))))) (:in-integer (case c ((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (parse-token in :in-integer (cons c buf))) (#\space (cons :integer (parse-integer (mkstr buf)))) (t (error "in-integer found invalid letter: ~a" c)))) (:in-comment (case c (#\) (cons :comment (mkstr buf))) (t (parse-token in :in-comment (cons c buf))))) (t (case c (#\space (parse-token in nil buf)) (#\( (parse-token in :in-comment buf)) (#\. (if (char= (peek-char t in nil nil nil) #\") (progn (read-char in) ;; skip char (parse-token in :in-string buf)) (parse-token in :in-word (cons c buf)))) ((#\- #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (parse-token in :in-integer (cons c buf))) (t (parse-token in :in-word (cons c buf))))))))))) (defun read-forth (string) "return a function of no arguments that returns (type . value)." (let ((in (make-string-input-stream string))) #'(lambda () (parse-token in nil ()))))
FORTH> (setq *lex* (read-forth "-3 5 + .")) #<CLOSURE (LAMBDA ()) {1002D2EE49}> FORTH> (funcall *lex*) (:INTEGER . -3) FORTH> (funcall *lex*) (:INTEGER . 5) FORTH> (funcall *lex*) (:WORD . "+") FORTH> (funcall *lex*) (:WORD . ".") FORTH> (funcall *lex*) (NIL) ;; 別の例 FORTH> (setq *lex* (read-forth ": hello ( -- ) .\" Hi, I'm here.\" ;")) #<CLOSURE (LAMBDA ()) {1002D8A2D9}> FORTH> (funcall *lex*) (:WORD . ":") FORTH> (funcall *lex*) (:WORD . "hello") FORTH> (funcall *lex*) (:COMMENT . " -- ") FORTH> (funcall *lex*) (:STRING . " Hi, I'm here.") FORTH> (funcall *lex*) (:WORD . ";") FORTH> (funcall *lex*) (NIL) FORTH>