2013-05-21

quickCheck - Lisp のコンス

Lisp のコンスを用いたバージョン。ほぼ PAIP 2.3 を直訳したつもり。
まず実例。ex 、ex' 関数は共に Object -> Object -> IO Object で、QuickCheck の sample' 関数を使っている。ex は文を、ex' は解析木を吐く。

*EnglishRule13> ex simpleGrammar (S "noun")
(ball)
*EnglishRule13> ex simpleGrammar (S "noun-phrase")
(a ball)
*EnglishRule13> ex simpleGrammar (S "sentence")
(a table linked a man)
*EnglishRule13> ex simpleGrammar (S "sentence")
(a ball took a woman)
*EnglishRule13> ex' bigGrammar (S "noun")
(noun woman)
*EnglishRule13> ex' bigGrammar (S "sentence")
(sentence (noun-phrase (name Lee)) (verb-phrase (verb saw) (noun-phrase (name Kim)) (pp*)))
*EnglishRule13> ex' bigGrammar (S "sentence")
(sentence (noun-phrase (pronoun these)) (verb-phrase (verb hit) (noun-phrase (article the) (adj* (adj adiabatic) (adj* (adj big) (adj*))) (noun ball) (pp* (pp (prep to) (noun-phrase (article the) (adj* (adj adiabatic) (adj*)) (noun woman) (pp* (pp (prep in) (noun-phrase (name Kim))) (pp* (pp (prep to) (noun-phrase (article a) (adj* (adj adiabatic) (adj* (adj big) (adj*))) (noun ball) (pp* (pp (prep on) (noun-phrase (pronoun he))) (pp* (pp (prep in) (noun-phrase (name Pat))) (pp*))))) (pp* (pp (prep on) (noun-phrase (article the) (adj* (adj little) (adj*)) (noun table) (pp*))) (pp*)))))) (pp* (pp (prep with) (noun-phrase (article the) (adj* (adj adiabatic) (adj* (adj adiabatic) (adj*))) (noun man) (pp*))) (pp*)))) (pp* (pp (prep with) (noun-phrase (pronoun it))) (pp* (pp (prep on) (noun-phrase (name Terry))) (pp* (pp (prep by) (noun-phrase (name Pat))) (pp* (pp (prep on) (noun-phrase (name Pat))) (pp* (pp (prep on) (noun-phrase (name Lee))) (pp* (pp (prep in) (noun-phrase (article a) (adj* (adj blue) (adj* (adj green) (adj* (adj big) (adj*)))) (noun table) (pp* (pp (prep in) (noun-phrase (article a) (adj* (adj little) (adj*)) (noun woman) (pp*))) (pp* (pp (prep with) (noun-phrase (name Kim))) (pp* (pp (prep to) (noun-phrase (name Terry))) (pp* (pp (prep with) (noun-phrase (name Kim))) (pp*))))))) (pp* (pp (prep on) (noun-phrase (pronoun she))) (pp*))))))))))
*EnglishRule13>

リスト操作関数を再実装する手間と型検査の安心感とを犠牲に、Lisp っぽいコードになっている。犠牲にしたものが大きすぎる気もするけど。
モナド版の mapcar', mappend' はもうちょっと共通化できるかもしれない。以下コード。

module EnglishRule13 where
import qualified Test.QuickCheck as Q
import Control.Monad
import Data.List

-- data
data Object = Nil | S String | I Int | Object :. Object deriving (Eq)
-- infixr 0 :.

instance Show Object where
    show Nil = "()"
    show (I x) = show x
    show (S x) = x
    show (x :. y) = showCons (x :. y)

-- HyperSpec "22.1.3.5 Printing Lists And Conses"
showCons x = step1 x ""
    where
      step1 x res = step2 x (res ++ "(")
      step2 x res = step3 x (res ++ show (car x))
      step3 x res = if consp (cdr x)
                    then
                        step2 (cdr x) (res ++ " ")
                    else
                        step4 x res
      step4 x res = if cdr x /= Nil
                    then
                        step5 x (res ++ " . " ++ show (cdr x))
                    else
                        step5 x res
      step5 x res = res ++ ")"

-- list operator
cons = (:.)
list1 x = x :. Nil
list2 x y = x :. (y :. Nil)
list3 x y z = x :. (y :. (z :. Nil))
list4 w x y z = w :. (x :. (y :. (z :. Nil)))

consp Nil = False
consp (S _) = False
consp (I _) = False
consp (_ :. _) = True

atom Nil = True
atom (S _) = True
atom (I _) = True
atom (_ :. _) = False

listp x = consp x || x == Nil

car (x :. _) = x
car x | not (consp x) = error ("car expect cons but got: " ++ show x)

cdr (_ :. y) = y
cdr Nil = Nil
cdr x | not (consp x) = error ("cdr expect cons but got: " ++ show x)

caar x = car (car x)
cadr x = car (cdr x)
cddr x = cdr (cdr x)

assoc :: Object -> Object -> Object
assoc item alist
    | alist == Nil = Nil
    | not (consp alist) = error ("assoc expect list but got: " ++ show alist)
    | not (consp (car alist)) = error ("assoc expect cons but got: " ++ show (car alist))
    | caar alist == item = car alist
    | caar alist /= item = assoc item (cdr alist)

-- | Utility. Generate Lisp List from Haskell Array
array2list = foldr cons Nil

list2array Nil = []
list2array (x :. y) 
    | listp y = x : list2array y
    | otherwise = error ("list2array expect list, but got: " ++ show y)

append x y
    | x == Nil = y
    | otherwise = cons (car x) (append (cdr x) y)

mappend :: (Object -> Object) -> Object -> Object
mappend f list
    | list == Nil = Nil
    | otherwise = append (f (car list)) (mappend f (cdr list))

-- | Apply fn to each element of list and append the results.
mappend' f list
    | list == Nil = return Nil
    | otherwise = liftM2 append (f (car list)) (mappend' f (cdr list)) 

mapcar f list
    | list == Nil = Nil
    | otherwise = cons (f (car list)) (mapcar f (cdr list))

mapcar' f list
    | list == Nil = return Nil
    | otherwise = liftM2 cons (f (car list)) (mapcar' f (cdr list))

empty = return Nil :: Q.Gen Object

-- | Generate a random sentence or phrase.
generate :: Object -> Object -> Q.Gen Object
generate grm phrase
    | listp phrase = mappend' (generate grm) phrase
    | rewrites grm phrase /= Nil =
        do
          y <- randomElt (rewrites grm phrase)
          generate grm y
    | otherwise = return $ list1 phrase

-- | Generate a random sentence or phrase wiht a complete parse tree.
generateTree :: Object -> Object -> Q.Gen Object
generateTree grm phrase
    | listp phrase = mapcar' (generateTree grm) phrase
    | rewrites grm phrase /= Nil =
        do
          y <- randomElt (rewrites grm phrase)
          t <- generateTree grm y
          return $ cons phrase t
    | otherwise = return $ list1 phrase

-- | sample data
ex grm phrase = do
  xs <- Q.sample' $ generate grm phrase
  return $ head xs

ex' grm phrase = do
  xs <- Q.sample' $ generateTree grm phrase
  return $ head xs


-- | The left-hand side of a rule.
ruleLhs = car
-- | The right-hand side of a rule.
ruleRhs rule = cdr (cdr rule)
-- | Return a list of the possible rewrites for this category.
rewrites grm category
    | atom category = ruleRhs (assoc category grm)
    | otherwise = error ("rewrite got: " ++ show category)

-- | Pick an element from a list at random.
oneOf set = do
  x <- randomElt set
  return $ list1 x

-- | Choose an element from a list at random.
randomElt :: Object -> Q.Gen Object
randomElt choices
    | listp choices = Q.oneof [Q.elements (list2array choices)]
    | otherwise = error ("randomElt expect choices but got: "  ++ show choices)

bigGrammar = array2list [
              d "sentence" [["noun-phrase", "verb-phrase"]],
              d "noun-phrase" [["article", "adj*", "noun", "pp*"],
                               ["name"],
                               ["pronoun"]],
              d "verb-phrase" [["verb", "noun-phrase", "pp*"]],
              d "pp*" [[], ["pp", "pp*"]],
              d "adj*" [[], ["adj", "adj*"]],
              d "pp" [["prep", "noun-phrase"]],
              w "prep" ["to", "in", "by", "with", "on"],
              w "adj" ["big", "little", "blue", "green", "adiabatic"],
              w "article" ["the", "a"],
              w "name" ["Pat", "Kim", "Lee", "Terry", "Robin"],
              w "noun" ["man", "ball", "woman", "table"],
              w "verb" ["hit", "took", "saw", "liked"],
              w "pronoun" ["he", "she", "it", "these", "those", "that"]
             ]
    where
      d x xs = S x :. (S "->" :. array2list (map (array2list . map S) xs))
      w x xs = S x :. (S "->" :. array2list (map S xs))

simpleGrammar = array2list [
                 d "sentence" [["noun-phrase", "verb-phrase"]],
                 d "noun-phrase" [["article", "noun"]],
                 d "verb-phrase" [["verb", "noun-phrase"]],
                 w "article" ["the", "a"],
                 w "noun" ["man", "ball", "woman", "table"],
                 w "verb" ["hit", "took", "saw", "linked"]
                ]
    where
      d x xs = S x :. (S "->" :. array2list (map (array2list . map S) xs))
      w x xs = S x :. (S "->" :. array2list (map S xs))

2013-05-20

quickCheck - Cons 版

コンスセルを使った版。途中。

module EnglishRule12 where
import qualified Test.QuickCheck as Q
import Control.Monad
import Data.List
import Data.Tree

-- data
data Object = Nil | S String | I Int | Object :. Object deriving (Eq)
-- infixr 0 :.

instance Show Object where
    show Nil = "()"
    show (I x) = show x
    show (S x) = x
    show (x :. y) = showCons (x :. y)

-- HyperSpec "22.1.3.5 Printing Lists And Conses"
showCons x = step1 x ""
    where
      step1 x res = step2 x (res ++ "(")
      step2 x res = step3 x (res ++ show (car x))
      step3 x res = if consp (cdr x)
                    then
                        step2 (cdr x) (res ++ " ")
                    else
                        step4 x res
      step4 x res = if cdr x /= Nil
                    then
                        step5 x (res ++ " . " ++ show (cdr x))
                    else
                        step5 x res
      step5 x res = res ++ ")"

-- list operator
cons = (:.)
list1 x = x :. Nil
list2 x y = x :. (y :. Nil)
list3 x y z = x :. (y :. (z :. Nil))
list4 w x y z = w :. (x :. (y :. (z :. Nil)))

consp Nil = False
consp (S _) = False
consp (I _) = False
consp (_ :. _) = True

atom Nil = True
atom (S _) = True
atom (I _) = True
atom (_ :. _) = False

listp x = consp x || x == Nil

car (x :. _) = x
car x | not (consp x) = error ("car expect cons but got: " ++ show x)

cdr (_ :. y) = y
cdr Nil = Nil
cdr x | not (consp x) = error ("cdr expect cons but got: " ++ show x)

caar x = car (car x)
cadr x = car (cdr x)
cddr x = cdr (cdr x)

assoc :: Object -> Object -> Object
assoc item alist
    | alist == Nil = Nil
    | not (consp alist) = error ("assoc expect list but got: " ++ show alist)
    | not (consp (car alist)) = error ("assoc expect cons but got: " ++ show (car alist))
    | caar alist == item = car alist
    | caar alist /= item = assoc item (cdr alist)

-- | Utility. Generate Lisp List from Haskell Array
array2list = foldr cons Nil

list2array Nil = []
list2array (x :. y) 
    | listp y = x : list2array y
    | otherwise = error ("list2array expect list, but got: " ++ show y)

append x y
    | x == Nil = y
    | otherwise = cons (car x) (append (cdr x) y)

mappend :: (Object -> Object) -> Object -> Object
mappend f list
    | list == Nil = Nil
    | otherwise = append (f (car list)) (mappend f (cdr list))

type Grammar = Object

append' :: Q.Gen Object -> Q.Gen Object -> Q.Gen Object
append' x y = do
  a <- x
  b <- y
  return $ append a b

empty = return Nil :: Q.Gen Object

-- | Generate a random sentence or phrase.
generate :: Object -> Object -> Q.Gen Object
generate grm phrase
    | listp phrase = foldr (append' . generate grm) empty (list2array phrase)
    | rewrites grm phrase /= Nil =
        do
          y <- randomElt (rewrites grm phrase)
          generate grm y
    | otherwise = return $ list1 phrase

generate' :: Object -> Object -> Q.Gen [Object]
generate' grm phrase
    | listp phrase = 
        do
          ts <- mapM (generate' grm) (list2array phrase)
          return $ concat ts
    | rewrites grm phrase /= Nil =
        do
          y <- randomElt (rewrites grm phrase)
          generate' grm y
    | otherwise = return [list1 phrase]

-- | The left-hand side of a rule.
ruleLhs = car
-- | The right-hand side of a rule.
ruleRhs rule = cdr (cdr rule)
-- | Return a list of the possible rewrites for this category.
rewrites grm category
    | atom category = ruleRhs (assoc category grm)
    | otherwise = error ("rewrite got: " ++ show category)

-- | Pick an element from a list at random.
oneOf set = do
  x <- randomElt set
  return $ list1 x

-- | Choose an element from a list at random.
randomElt :: Object -> Q.Gen Object
randomElt choices
    | listp choices = Q.oneof [Q.elements (list2array choices)]
    | otherwise = error ("randomElt expect choices but got: "  ++ show choices)

2013-05-14

quickCheck 英文生成

まだバグがあるが改良版。

module EnglishRule7 where
import Test.QuickCheck
import Control.Monad
import Data.List

-- data
type Name = String
data Rule = Word [String] | Rule [[String]] deriving (Eq, Show)
type Grammar = [(String, Rule)]
simpleGrammar = [ ("sentence", Rule [["noun-phrase", "verb-phrase"]]),
                  ("article", Word ["the", "a"]),
                  ("noun", Word ["man", "ball", "woman", "table"]),
                  ("verb", Word ["hit", "took", "saw", "liked"] ),
                  ("noun-phrase", Rule [["article", "noun"]]),
                  ("verb-phrase", Rule [["verb", "noun-phrase"]])
                ]  :: Grammar
bigGrammar = [ ("sentence", Rule [["noun-phrase", "verb-phrase"]]),
               ("noun-phrase", Rule [["article", "adj*", "noun", "pp*"],
                                     ["name"],
                                     ["pronoun"]]),
               ("verb-phrase", Rule [["verb", "noun-phrase", "pp*"]]),
               ("pp*", Rule [[], ["pp", "pp*"]]),
               ("adj*", Rule [[], ["adj", "adj*"]]),
               ("pp", Rule [["prep", "noun-phrase"]]),
               ("prep", Word ["to", "in", "by", "with", "on"]),
               ("adj", Word ["big", "little", "blue", "green", "adiabatic"]),
               ("article", Word ["the", "a"]),
               ("name", Word ["Pat", "Kim", "Lee", "Terry", "Robin"]),
               ("noun", Word ["man", "ball", "woman", "table"]),
               ("verb", Word ["hit", "took", "saw", "liked"]),
               ("pronoun", Word ["he", "she", "it", "these", "those", "that"]) ] :: Grammar


mkgen :: Grammar -> String -> Gen String
mkgen grm k = case lookup k grm of
                Just r -> mkgen' grm k r
                Nothing -> error $ "key not found: " ++ k

mkgen' grm x (Word ws) = elements ws
mkgen' grm x (Rule rs) = oneof $ map (rule2gen grm x) rs

rule2gen :: Grammar -> String -> [String] -> Gen String
rule2gen grm x [] = return ""
rule2gen grm x (r:rs) = mkgen grm r `catgen` rule2gen grm x rs

catgen :: Gen String -> Gen String -> Gen String
catgen g1 g2 = do
  s1 <- g1
  s2 <- g2
  return $ catWords s1 s2

catWords "" "" = ""
catWords x "" = x
catWords "" y = y
catWords x y = x ++ " " ++ y

--
data Tree = EmptyNode | Node String String | Tree String Tree Tree deriving (Eq)
instance Show Tree where
    show (Node n x) = "(" ++ n ++ " " ++ x ++ ")"
    show EmptyNode = "()"
    show (Tree "" t1 EmptyNode) = "(" ++ show t1 ++ ")"
    show (Tree "" EmptyNode t2) = "(" ++ show t2 ++ ")"
    show (Tree "" t1 t2) = "(" ++ show t1 ++ " " ++ show t2 ++ ")"
    show (Tree n t1 EmptyNode) = "(" ++ n ++ " " ++ show t1 ++ ")"
    show (Tree n EmptyNode t2) = "(" ++ n ++ " " ++ show t2 ++ ")"
    show (Tree n t1 t2) = "(" ++ n ++ " " ++ show t1 ++ " " ++ show t2 ++ ")"


mkgenTree :: Grammar -> String -> Gen Tree
mkgenTree grm k = case lookup k grm of
                Just r -> mkgenTree' grm k r
                Nothing -> error $ "key not found: " ++ k

mkgenTree' grm x (Word ws) = do
  s <- elements ws
  return $ Node x s
mkgenTree' grm x (Rule rs) = oneof $ map (rule2gentree grm x) rs

rule2gentree :: Grammar -> String -> [String] -> Gen Tree
rule2gentree grm x [] = return EmptyNode
rule2gentree grm x (r:rs) = catgentree x (mkgenTree grm r) (rule2gentree grm x rs)

catgentree :: String -> Gen Tree -> Gen Tree -> Gen Tree

catgentree x g1 g2 = do
  s1 <- g1
  s2 <- g2
  if (s1 == EmptyNode && s2 == EmptyNode)
  then
      return EmptyNode
  else
      if (s1 /= EmptyNode && s2 == EmptyNode) 
      then
          return s1
      else
          if (s1 == EmptyNode && s2 /= EmptyNode)
          then
              return s2
          else
              return $ Tree x s1 s2

2013-05-12

quickCheck - 英文生成(4)

PAIP では同じ英文生成のルールデータを用いて、解析木(Parse Tree)を生成する。わたしの作ったデータ構造だとちょっと上手くいかなかったため、Rule データ型を改良して名前を持てるようにした。

module EnglishRule6 where
import Test.QuickCheck
import Control.Monad
import Data.List

import System

type Name = String
data Rule = Empty Name | Cat Name Rule Rule | Or Name Rule Rule | Alt Name [String] deriving (Eq)

type Grammar = [Rule]

instance Show Rule where
    -- nameless
    show (Empty "") = "()"
    show (Cat "" r1 r2) = show r1 ++ " + " ++ show r2
    show (Or "" r1 r2) = show r1 ++ " | " ++ show r2
    show (Alt "" ws) = "(" ++ intercalate "," ws ++ ")"
    -- named
    show (Empty x) = x ++ " -> " ++ "()"
    show (Cat x r1 r2) = x ++ " -> " ++ nameOf r1 ++ " + " ++ nameOf r2
    show (Or x r1 r2) = x ++ " -> " ++ nameOf r1 ++ " | " ++ nameOf r2
    show (Alt x ws) = x ++ " -> " ++ "(" ++ intercalate "," ws ++ ")"

nameOf :: Rule -> Name
nameOf (Empty x) = x
nameOf (Cat x _ _) = x
nameOf (Or x _ _) = x
nameOf (Alt x _) = x

-- 補助関数
catWords :: String -> String -> String
catWords "" "" = ""
catWords x "" = x
catWords "" y = y
catWords x y = x ++ " " ++ y

rule2gen :: Rule -> Gen String
rule2gen (Empty _) = return ""
rule2gen (Alt _ ws) = elements ws
rule2gen (Cat _ r1 r2) = do
  s1 <- rule2gen r1
  s2 <- rule2gen r2
  return $ catWords s1 s2
rule2gen (Or _ r1 r2) = oneof [rule2gen r1, rule2gen r2]

lookupRule :: String -> [Rule] -> Maybe Rule
lookupRule k rs = lookup k rs'
    where rs' = map (\r -> (nameOf r, r)) rs

generate :: Grammar -> String -> Gen String
generate g x = case lookupRule x g of
                 Just r -> rule2gen r
                 Nothing -> error ("unknown: " ++ x)

解析木のために、PAIP では Lisp のリストを使っているが、Haskell ではデータ型を用意して、文字列を生成する替わりに Tree を生成するようにする。Tree の名前を多相的にしているのは、あとでユニークなインデックスを振った (Int, String) を名前にするため。

-- Parse Tree
data Tree a = EmptyNode | Node a String | Tree a (Tree a) (Tree a) deriving (Eq)
instance Show a => Show (Tree a) where
    show (Node n x) = "(" ++ show n ++ " " ++ x ++ ")"
    show EmptyNode = ""
    show (Tree n t1 EmptyNode) = "(" ++ show n ++ " " ++ show t1 ++ ")"
    show (Tree n EmptyNode t2) = "(" ++ show n ++ " " ++ show t2 ++ ")"
    show (Tree n t1 t2) = "(" ++ show n ++ " " ++ show t1 ++ " " ++ show t2 ++ ")"

上記を生成するためには、 Gen String ではなく Gen Tree 型の関数を生成すればよい。文法ルール自体は変更しなくてよい。

rule2genTree :: Rule -> Gen (Tree String)
rule2genTree (Empty n) = return EmptyNode
rule2genTree (Alt n ws) = do
  s <- elements ws
  return $ Node n s
rule2genTree (Cat n r1 r2) = do
  t1 <- rule2genTree r1
  t2 <- rule2genTree r2
  return $ Tree n t1 t2
rule2genTree (Or n r1 r2) = do
  t <- oneof [ rule2genTree r1, rule2genTree r2 ]
  case t of
    EmptyNode -> return EmptyNode
    x -> return $ Tree n t EmptyNode

-- 一般化
generate' :: (Rule -> Gen a) -> [Rule] -> String -> Gen a
generate' f rs x = case lookupRule x rs of
                     Just r -> f r
                     Nothing -> error ("Fail to lookup rule for : " ++ x)
generateStr :: [Rule] -> String -> Gen String
generateStr = generate' rule2gen
generateTree :: [Rule] -> String -> Gen (Tree String)
generateTree = generate' rule2genTree

rule2genTree と rule2gen はほとんど同じ構造をしているので、もう一段まとめられるような気もする。

2013-05-07

quickCheck 入門 - 英文生成(3)

少し考え方を変え、定義をそのままデータ構造にした。Grammar データを単なる Rule の配列でなくタプルのリストにしているのは、あとからルールを参照するため。

module EnglishRule3 where
import Test.QuickCheck
import Control.Monad
import Data.List

data Rule = Empty | Rule :+ Rule | Rule :| Rule | Alt [String] deriving (Eq)
type Grammar = [(String, Rule)]

-- 再帰的データ構造のため Show しないこと
-- instance Show Rule where
--     show Empty = "()"
--     show (r1 :+ r2) = (show r1) ++ " + " ++ (show r2)
--     show (r1 :| r2) = show r1 ++ " | " ++ show r2
--     show (Alt ws) = "(" ++ intercalate "," ws ++ ")"

-- 補助関数
catWords :: String -> String -> String
catWords "" "" = ""
catWords x "" = x
catWords "" y = y
catWords x y = x ++ " " ++ y

rule2gen :: Rule -> Gen String
rule2gen Empty = return ""
rule2gen (Alt ws) = elements ws
rule2gen (r1 :+ r2) = do
  s1 <- rule2gen r1
  s2 <- rule2gen r2
  return $ catWords s1 s2
rule2gen (r1 :| r2) = oneof [rule2gen r1, rule2gen r2]

generate :: Grammar -> String -> Gen String
generate g x = case lookup x g of
                 Just r -> rule2gen r
                 Nothing -> error ("unknown :" ++ x)

ルールの定義は以下の通り。

simpleGrammar = [ ("noun", noun),
                  ("verb", verb),
                  ("article", article),
                  ("noun-phrase", noun_phrase),
                  ("verb-phrase", verb_phrase),
                  ("sentence", sentence) ]
    where
      noun = Alt ["man", "ball", "woman", "table"]
      verb = Alt ["hit", "took", "saw", "liked"]
      article = Alt ["the", "a"]
      noun_phrase = article :+ noun
      verb_phrase = verb :+ noun_phrase
      sentence = noun_phrase :+ verb_phrase

bigGrammar = [ ("sentence", sentence),
               ("noun-phrase", noun_phrase),
               ("verb-phrase", verb_phrase),
               ("pp*", ppStar),
               ("adj*" , adjStar),
               ("pp", pp),
               ("prep", prep),
               ("adj", adj),
               ("article", article),
               ("name", name),
               ("noun", noun),
               ("verb", verb),
               ("pronoun", pronoun) ]
    where
      sentence = noun_phrase :+ verb_phrase
      noun_phrase = (article :+ (adjStar :+ (noun :+ ppStar))) :| (name :| pronoun)
      verb_phrase = verb :+ (noun_phrase :+ ppStar)
      ppStar = Empty :| (pp :+ ppStar)
      adjStar = Empty :| (adj :+ adjStar)
      pp = prep :+ noun_phrase
      prep = Alt ["to", "in", "by", "with", "on"]
      adj = Alt ["big", "little", "blue", "green", "adiabatic"]
      article = Alt ["the", "a"]
      name = Alt ["Pat", "Kim", "Lee", "Terry", "Robin"]
      noun = Alt ["man", "ball", "woman", "table"]
      verb = Alt ["hit", "took", "saw", "liked"]
      pronoun = Alt ["he", "she", "it", "these", "those", "that"]

実行例。

*EnglishRule3> sample' $ generate simpleGrammar "verb"
["liked","hit","hit","saw","hit","hit","hit","liked","took","saw","saw"]
*EnglishRule3> sample' $ generate simpleGrammar "sentence"
["a woman saw the man","the woman saw a woman","the ball liked a table","the woman took a woman","the table saw a man","the table liked the table","the table took a ball","a woman hit a woman","the woman hit a woman","the man liked the table","a man hit a table"]
*EnglishRule3> sample' $ generate bigGrammar "sentence"
["the big woman to Terry saw the man by Lee with a table with she with Kim","Lee saw these by the woman in he to a little big woman by a man to the ball by the ball in Terry to a big table in the table by a man to those by the big big blue adiabatic green adiabatic adiabatic table to that in Terry on the ball on he on a man by Terry by the ball on these in a green green big adiabatic ball in the woman in Terry in Lee with that with the blue table by a adiabatic little man with a woman with a man by Kim on the man on the blue green table on he in the blue blue table to the little ball to a man with the ball","the ball liked the table","Terry hit Robin","Pat saw Lee","a ball saw he","a blue little blue big man in Robin in the ball hit a big blue table on the little green big woman to a ball with Lee to it by Robin in the table by a little big green man on the green woman by it in Lee to a green ball on the woman by a blue woman on a green ball to she to a green ball","a ball hit a blue little green big table on a little little man on Kim to a blue adiabatic man to the adiabatic little woman to a man by Lee by the man in a man with that to these with a table in a man to a big man to she in Lee with he in she in he in Terry in a big man by the woman in Pat","a ball took the woman to the table in she on she with a big blue green table in the adiabatic little table on a woman on the adiabatic table in these to the green adiabatic woman by Lee on a big adiabatic man with Robin in the green big green man with Terry","Terry took these","those took Terry in Robin"]

2013-05-07

quickCheck 入門 - 英文生成(2)

Maybe 型を使ってみた途中版。無駄なスペースが生成されるバグあり。あまり整頓された感じはしない。

module EnglishRule2 where
import Test.QuickCheck
import Control.Monad
import Data.List
import Control.Applicative 

data Sym = Sym String deriving (Eq)
instance Show Sym where
    show (Sym x) = x

data Rule = Rule [Maybe [Sym]] | Alt [String] deriving (Eq)

instance Show Rule where
    show (Rule rs) = show $ map (\r -> case r of
                                          Just x -> intercalate " + " (map show x)
                                          Nothing -> "()") rs
    show (Alt ws) = "(" ++ intercalate "," ws ++ ")"

type Grammar = [(Sym, Rule)]

generate :: Grammar -> Sym -> Gen String
generate g x = case lookup x g of
                 Just r -> rule2gen g r
                 Nothing -> error ("Unknown symbol: " ++ show x)

rule2gen g (Alt ws) = elements ws
rule2gen g (Rule rs) = oneof $ map (rule2gen' g) rs

rule2gen' g (Just (r:rs)) = do
  a <- generate g r
  rest <- rule2gen' g (Just rs)
  return (a ++ " " ++ rest)
rule2gen' g (Just []) = return ""
rule2gen' g Nothing = return ""

simpleGrammar =
    [ (Sym "noun", Alt ["man", "ball", "woman", "table"]),
      (Sym "verb", Alt ["hit", "took", "saw", "liked"]),
      (Sym "article", Alt ["the", "a"]),
      (Sym "noun-phrase", Rule [Just [Sym "article", Sym "noun"]]),
      (Sym "verb-phrase", Rule [Just [Sym "verb", Sym "noun-phrase"]]),
      (Sym "sentence", Rule [Just [Sym "noun-phrase", Sym "verb-phrase"]])
    ] :: Grammar

bigGrammar =
    [ (Sym "sentence", Rule [ Just [Sym "noun-phrase", Sym "verb-phrase"]]),
      (Sym "noun-phrase", Rule [ Just [Sym "article", Sym "adj*", Sym "noun", Sym "pp*"],
                                 Just [Sym "name"],
                                 Just [Sym "pronoun"]]),
      (Sym "verb-phrase", Rule [ Just [Sym "verb", Sym "noun-phrase", Sym "pp*"]]),
      (Sym "pp*", Rule [ Nothing , Just [Sym "pp", Sym "pp*"]]),
      (Sym "adj*", Rule [ Nothing, Just [Sym "adj", Sym "adj*"]]),
      (Sym "pp", Rule [ Just [Sym "prep", Sym "noun-phrase"]]),
      (Sym "prep", Alt ["to", "in", "by", "with", "on"]),
      (Sym "adj", Alt ["big", "little", "blue", "green", "adiabatic"]),
      (Sym "article", Alt ["the", "a"]),
      (Sym "name", Alt ["Pat", "Kim", "Lee", "Terry", "Robin"]),
      (Sym "noun", Alt ["man", "ball", "woman", "table"]),
      (Sym "verb", Alt ["hit", "took", "saw", "liked"]),
      (Sym "pronoun", Alt ["he", "she", "it", "these", "those", "that"])
    ] :: Grammar

2013-05-05

quickCheck 入門 - 英文生成

英文生成の簡単な版は、QuickCheck を使うことで以下のように書けた(PAIP 2.2 A Straightforward Solution)。

module English where
import Test.QuickCheck
import Control.Monad

verb = elements ["hit", "took", "saw", "liked"] :: Gen String
noun = elements ["man", "ball", "woman", "table"] :: Gen String
article = elements ["the", "a"] :: Gen String
-- 補助関数
cat2 x y = x ++ " " ++ y

nounPhrase = liftM2 cat2 article noun :: Gen String
verbPhrase = liftM2 cat2 verb nounPhrase :: Gen String
sentence = liftM2 cat2 nounPhrase verbPhrase :: Gen String

これを改良して、ルールベースにすることを考える(PAIP 2.3 A Rule-Based Solution)。テキストでは、英文法をデータとして持ち(*simple-grammar*, *big-grammar*)、同じプログラムが文法データを変えれば違う英文を生成するようになっている。Lisp 版では、シンボルがルール名とそのルールの参照に使われている。
Haskell にはシンボルが無いので、シンボル用のデータ型 Sym String を用いた第一版を以下に示す。

module EnglishRule where
import Test.QuickCheck
import Control.Monad
import Data.List

data Sym = Sym String deriving (Eq)
instance Show Sym where
    show (Sym x) = x

data Rule = Rule [[Sym]] | Alt [String] deriving (Eq)

instance Show Rule where
    show (Rule rs) = show $ map (intercalate " + " . map show) rs
    show (Alt ws) = "(" ++ intercalate "," ws ++ ")"

type Grammar = [(Sym, Rule)]
simpleGrammar =
    [ (Sym "noun", Alt ["man", "ball", "woman", "table"]),
      (Sym "verb", Alt ["hit", "took", "saw", "liked"]),
      (Sym "article", Alt ["the", "a"]),
      (Sym "noun-phrase", Rule [[Sym "article", Sym "noun"]]),
      (Sym "verb-phrase", Rule [[Sym "verb", Sym "noun-phrase"]]),
      (Sym "sentence", Rule [[Sym "noun-phrase", Sym "verb-phrase"]])
    ] :: Grammar

generate :: Grammar -> Sym -> Gen String
generate g x = case lookup x g of
                 Just (Alt ws) -> elements ws
                 Just (Rule rs) -> oneof (map (generate' g x) rs)
                 Nothing -> error ("Unknown symbol: " ++ show x)

generate' g x [] = return ""
generate' g x (r:rs) = do
  a <- generate g r
  rest <- generate' g x rs
  if a /= "" && rest /= ""
  then
      return $ a ++ " " ++ rest
  else
      if a /= "" && rest == ""
      then
          return a
      else
          if a == "" && rest /= ""
          then
              return rest
          else
              return "" -- a == "" && rest == ""

bigGrammar =
    [ (Sym "sentence", Rule [[Sym "noun-phrase", Sym "verb-phrase"]]),
      (Sym "noun-phrase", Rule [[Sym "article", Sym "adj*", Sym "noun", Sym "pp*"],
                                [Sym "name"],
                                [Sym "pronoun"]]),
      (Sym "verb-phrase", Rule [[Sym "verb", Sym "noun-phrase", Sym "pp*"]]),
      (Sym "pp*", Rule [[], [Sym "pp", Sym "pp*"]]),
      (Sym "adj*", Rule [[], [Sym "adj", Sym "adj*"]]),
      (Sym "pp", Rule [[Sym "prep", Sym "noun-phrase"]]),
      (Sym "prep", Alt ["to", "in", "by", "with", "on"]),
      (Sym "adj", Alt ["big", "little", "blue", "green", "adiabatic"]),
      (Sym "article", Alt ["the", "a"]),
      (Sym "name", Alt ["Pat", "Kim", "Lee", "Terry", "Robin"]),
      (Sym "noun", Alt ["man", "ball", "woman", "table"]),
      (Sym "verb", Alt ["hit", "took", "saw", "liked"]),
      (Sym "pronoun", Alt ["he", "she", "it", "these", "those", "that"])
    ] :: Grammar

何カ所かかっこう悪いところがあり、リファクタリングが必要である。

Common Lisp 版に比べて長い。Haskell の言語要素、ライブラリの大きさを考えると、もっと簡潔にできるのではないか。
シンボルの代替 Sym String が如何にも冗長。もっと奇麗な、Haskell らしい構成は無いか。
generate' で空文字判定をしているところが不格好。Maybe モナドが使えるのでは。
QuickCheck の Gen String 型で妥当か。

続く。