quickCheck 入門 - 英文生成

英文生成の簡単な版は、QuickCheck を使うことで以下のように書けた(PAIP 2.2 A Straightforward Solution)。

module English where
import Test.QuickCheck
import Control.Monad

verb = elements ["hit", "took", "saw", "liked"] :: Gen String
noun = elements ["man", "ball", "woman", "table"] :: Gen String
article = elements ["the", "a"] :: Gen String
-- 補助関数
cat2 x y = x ++ " " ++ y

nounPhrase = liftM2 cat2 article noun :: Gen String
verbPhrase = liftM2 cat2 verb nounPhrase :: Gen String
sentence = liftM2 cat2 nounPhrase verbPhrase :: Gen String

これを改良して、ルールベースにすることを考える(PAIP 2.3 A Rule-Based Solution)。テキストでは、英文法をデータとして持ち(*simple-grammar*, *big-grammar*)、同じプログラムが文法データを変えれば違う英文を生成するようになっている。Lisp 版では、シンボルがルール名とそのルールの参照に使われている。
Haskell にはシンボルが無いので、シンボル用のデータ型 Sym String を用いた第一版を以下に示す。

module EnglishRule where
import Test.QuickCheck
import Control.Monad
import Data.List

data Sym = Sym String deriving (Eq)
instance Show Sym where
    show (Sym x) = x

data Rule = Rule [[Sym]] | Alt [String] deriving (Eq)

instance Show Rule where
    show (Rule rs) = show $ map (intercalate " + " . map show) rs
    show (Alt ws) = "(" ++ intercalate "," ws ++ ")"

type Grammar = [(Sym, Rule)]
simpleGrammar =
    [ (Sym "noun", Alt ["man", "ball", "woman", "table"]),
      (Sym "verb", Alt ["hit", "took", "saw", "liked"]),
      (Sym "article", Alt ["the", "a"]),
      (Sym "noun-phrase", Rule [[Sym "article", Sym "noun"]]),
      (Sym "verb-phrase", Rule [[Sym "verb", Sym "noun-phrase"]]),
      (Sym "sentence", Rule [[Sym "noun-phrase", Sym "verb-phrase"]])
    ] :: Grammar

generate :: Grammar -> Sym -> Gen String
generate g x = case lookup x g of
                 Just (Alt ws) -> elements ws
                 Just (Rule rs) -> oneof (map (generate' g x) rs)
                 Nothing -> error ("Unknown symbol: " ++ show x)

generate' g x [] = return ""
generate' g x (r:rs) = do
  a <- generate g r
  rest <- generate' g x rs
  if a /= "" && rest /= ""
  then
      return $ a ++ " " ++ rest
  else
      if a /= "" && rest == ""
      then
          return a
      else
          if a == "" && rest /= ""
          then
              return rest
          else
              return "" -- a == "" && rest == ""

bigGrammar =
    [ (Sym "sentence", Rule [[Sym "noun-phrase", Sym "verb-phrase"]]),
      (Sym "noun-phrase", Rule [[Sym "article", Sym "adj*", Sym "noun", Sym "pp*"],
                                [Sym "name"],
                                [Sym "pronoun"]]),
      (Sym "verb-phrase", Rule [[Sym "verb", Sym "noun-phrase", Sym "pp*"]]),
      (Sym "pp*", Rule [[], [Sym "pp", Sym "pp*"]]),
      (Sym "adj*", Rule [[], [Sym "adj", Sym "adj*"]]),
      (Sym "pp", Rule [[Sym "prep", Sym "noun-phrase"]]),
      (Sym "prep", Alt ["to", "in", "by", "with", "on"]),
      (Sym "adj", Alt ["big", "little", "blue", "green", "adiabatic"]),
      (Sym "article", Alt ["the", "a"]),
      (Sym "name", Alt ["Pat", "Kim", "Lee", "Terry", "Robin"]),
      (Sym "noun", Alt ["man", "ball", "woman", "table"]),
      (Sym "verb", Alt ["hit", "took", "saw", "liked"]),
      (Sym "pronoun", Alt ["he", "she", "it", "these", "those", "that"])
    ] :: Grammar

何カ所かかっこう悪いところがあり、リファクタリングが必要である。

  • Common Lisp 版に比べて長い。Haskell の言語要素、ライブラリの大きさを考えると、もっと簡潔にできるのではないか。
  • シンボルの代替 Sym String が如何にも冗長。もっと奇麗な、Haskell らしい構成は無いか。
  • generate' で空文字判定をしているところが不格好。Maybe モナドが使えるのでは。
  • QuickCheck の Gen String 型で妥当か。

続く。