quickCheck - Lisp のコンス
Lisp のコンスを用いたバージョン。ほぼ PAIP 2.3 を直訳したつもり。
まず実例。ex 、ex' 関数は共に Object -> Object -> IO Object で、QuickCheck の sample' 関数を使っている。ex は文を、ex' は解析木を吐く。
*EnglishRule13> ex simpleGrammar (S "noun") (ball) *EnglishRule13> ex simpleGrammar (S "noun-phrase") (a ball) *EnglishRule13> ex simpleGrammar (S "sentence") (a table linked a man) *EnglishRule13> ex simpleGrammar (S "sentence") (a ball took a woman) *EnglishRule13> ex' bigGrammar (S "noun") (noun woman) *EnglishRule13> ex' bigGrammar (S "sentence") (sentence (noun-phrase (name Lee)) (verb-phrase (verb saw) (noun-phrase (name Kim)) (pp*))) *EnglishRule13> ex' bigGrammar (S "sentence") (sentence (noun-phrase (pronoun these)) (verb-phrase (verb hit) (noun-phrase (article the) (adj* (adj adiabatic) (adj* (adj big) (adj*))) (noun ball) (pp* (pp (prep to) (noun-phrase (article the) (adj* (adj adiabatic) (adj*)) (noun woman) (pp* (pp (prep in) (noun-phrase (name Kim))) (pp* (pp (prep to) (noun-phrase (article a) (adj* (adj adiabatic) (adj* (adj big) (adj*))) (noun ball) (pp* (pp (prep on) (noun-phrase (pronoun he))) (pp* (pp (prep in) (noun-phrase (name Pat))) (pp*))))) (pp* (pp (prep on) (noun-phrase (article the) (adj* (adj little) (adj*)) (noun table) (pp*))) (pp*)))))) (pp* (pp (prep with) (noun-phrase (article the) (adj* (adj adiabatic) (adj* (adj adiabatic) (adj*))) (noun man) (pp*))) (pp*)))) (pp* (pp (prep with) (noun-phrase (pronoun it))) (pp* (pp (prep on) (noun-phrase (name Terry))) (pp* (pp (prep by) (noun-phrase (name Pat))) (pp* (pp (prep on) (noun-phrase (name Pat))) (pp* (pp (prep on) (noun-phrase (name Lee))) (pp* (pp (prep in) (noun-phrase (article a) (adj* (adj blue) (adj* (adj green) (adj* (adj big) (adj*)))) (noun table) (pp* (pp (prep in) (noun-phrase (article a) (adj* (adj little) (adj*)) (noun woman) (pp*))) (pp* (pp (prep with) (noun-phrase (name Kim))) (pp* (pp (prep to) (noun-phrase (name Terry))) (pp* (pp (prep with) (noun-phrase (name Kim))) (pp*))))))) (pp* (pp (prep on) (noun-phrase (pronoun she))) (pp*)))))))))) *EnglishRule13>
リスト操作関数を再実装する手間と型検査の安心感とを犠牲に、Lisp っぽいコードになっている。犠牲にしたものが大きすぎる気もするけど。
モナド版の mapcar', mappend' はもうちょっと共通化できるかもしれない。以下コード。
module EnglishRule13 where import qualified Test.QuickCheck as Q import Control.Monad import Data.List -- data data Object = Nil | S String | I Int | Object :. Object deriving (Eq) -- infixr 0 :. instance Show Object where show Nil = "()" show (I x) = show x show (S x) = x show (x :. y) = showCons (x :. y) -- HyperSpec "22.1.3.5 Printing Lists And Conses" showCons x = step1 x "" where step1 x res = step2 x (res ++ "(") step2 x res = step3 x (res ++ show (car x)) step3 x res = if consp (cdr x) then step2 (cdr x) (res ++ " ") else step4 x res step4 x res = if cdr x /= Nil then step5 x (res ++ " . " ++ show (cdr x)) else step5 x res step5 x res = res ++ ")" -- list operator cons = (:.) list1 x = x :. Nil list2 x y = x :. (y :. Nil) list3 x y z = x :. (y :. (z :. Nil)) list4 w x y z = w :. (x :. (y :. (z :. Nil))) consp Nil = False consp (S _) = False consp (I _) = False consp (_ :. _) = True atom Nil = True atom (S _) = True atom (I _) = True atom (_ :. _) = False listp x = consp x || x == Nil car (x :. _) = x car x | not (consp x) = error ("car expect cons but got: " ++ show x) cdr (_ :. y) = y cdr Nil = Nil cdr x | not (consp x) = error ("cdr expect cons but got: " ++ show x) caar x = car (car x) cadr x = car (cdr x) cddr x = cdr (cdr x) assoc :: Object -> Object -> Object assoc item alist | alist == Nil = Nil | not (consp alist) = error ("assoc expect list but got: " ++ show alist) | not (consp (car alist)) = error ("assoc expect cons but got: " ++ show (car alist)) | caar alist == item = car alist | caar alist /= item = assoc item (cdr alist) -- | Utility. Generate Lisp List from Haskell Array array2list = foldr cons Nil list2array Nil = [] list2array (x :. y) | listp y = x : list2array y | otherwise = error ("list2array expect list, but got: " ++ show y) append x y | x == Nil = y | otherwise = cons (car x) (append (cdr x) y) mappend :: (Object -> Object) -> Object -> Object mappend f list | list == Nil = Nil | otherwise = append (f (car list)) (mappend f (cdr list)) -- | Apply fn to each element of list and append the results. mappend' f list | list == Nil = return Nil | otherwise = liftM2 append (f (car list)) (mappend' f (cdr list)) mapcar f list | list == Nil = Nil | otherwise = cons (f (car list)) (mapcar f (cdr list)) mapcar' f list | list == Nil = return Nil | otherwise = liftM2 cons (f (car list)) (mapcar' f (cdr list)) empty = return Nil :: Q.Gen Object -- | Generate a random sentence or phrase. generate :: Object -> Object -> Q.Gen Object generate grm phrase | listp phrase = mappend' (generate grm) phrase | rewrites grm phrase /= Nil = do y <- randomElt (rewrites grm phrase) generate grm y | otherwise = return $ list1 phrase -- | Generate a random sentence or phrase wiht a complete parse tree. generateTree :: Object -> Object -> Q.Gen Object generateTree grm phrase | listp phrase = mapcar' (generateTree grm) phrase | rewrites grm phrase /= Nil = do y <- randomElt (rewrites grm phrase) t <- generateTree grm y return $ cons phrase t | otherwise = return $ list1 phrase -- | sample data ex grm phrase = do xs <- Q.sample' $ generate grm phrase return $ head xs ex' grm phrase = do xs <- Q.sample' $ generateTree grm phrase return $ head xs -- | The left-hand side of a rule. ruleLhs = car -- | The right-hand side of a rule. ruleRhs rule = cdr (cdr rule) -- | Return a list of the possible rewrites for this category. rewrites grm category | atom category = ruleRhs (assoc category grm) | otherwise = error ("rewrite got: " ++ show category) -- | Pick an element from a list at random. oneOf set = do x <- randomElt set return $ list1 x -- | Choose an element from a list at random. randomElt :: Object -> Q.Gen Object randomElt choices | listp choices = Q.oneof [Q.elements (list2array choices)] | otherwise = error ("randomElt expect choices but got: " ++ show choices) bigGrammar = array2list [ d "sentence" [["noun-phrase", "verb-phrase"]], d "noun-phrase" [["article", "adj*", "noun", "pp*"], ["name"], ["pronoun"]], d "verb-phrase" [["verb", "noun-phrase", "pp*"]], d "pp*" [[], ["pp", "pp*"]], d "adj*" [[], ["adj", "adj*"]], d "pp" [["prep", "noun-phrase"]], w "prep" ["to", "in", "by", "with", "on"], w "adj" ["big", "little", "blue", "green", "adiabatic"], w "article" ["the", "a"], w "name" ["Pat", "Kim", "Lee", "Terry", "Robin"], w "noun" ["man", "ball", "woman", "table"], w "verb" ["hit", "took", "saw", "liked"], w "pronoun" ["he", "she", "it", "these", "those", "that"] ] where d x xs = S x :. (S "->" :. array2list (map (array2list . map S) xs)) w x xs = S x :. (S "->" :. array2list (map S xs)) simpleGrammar = array2list [ d "sentence" [["noun-phrase", "verb-phrase"]], d "noun-phrase" [["article", "noun"]], d "verb-phrase" [["verb", "noun-phrase"]], w "article" ["the", "a"], w "noun" ["man", "ball", "woman", "table"], w "verb" ["hit", "took", "saw", "linked"] ] where d x xs = S x :. (S "->" :. array2list (map (array2list . map S) xs)) w x xs = S x :. (S "->" :. array2list (map S xs))
quickCheck - Cons 版
コンスセルを使った版。途中。
module EnglishRule12 where import qualified Test.QuickCheck as Q import Control.Monad import Data.List import Data.Tree -- data data Object = Nil | S String | I Int | Object :. Object deriving (Eq) -- infixr 0 :. instance Show Object where show Nil = "()" show (I x) = show x show (S x) = x show (x :. y) = showCons (x :. y) -- HyperSpec "22.1.3.5 Printing Lists And Conses" showCons x = step1 x "" where step1 x res = step2 x (res ++ "(") step2 x res = step3 x (res ++ show (car x)) step3 x res = if consp (cdr x) then step2 (cdr x) (res ++ " ") else step4 x res step4 x res = if cdr x /= Nil then step5 x (res ++ " . " ++ show (cdr x)) else step5 x res step5 x res = res ++ ")" -- list operator cons = (:.) list1 x = x :. Nil list2 x y = x :. (y :. Nil) list3 x y z = x :. (y :. (z :. Nil)) list4 w x y z = w :. (x :. (y :. (z :. Nil))) consp Nil = False consp (S _) = False consp (I _) = False consp (_ :. _) = True atom Nil = True atom (S _) = True atom (I _) = True atom (_ :. _) = False listp x = consp x || x == Nil car (x :. _) = x car x | not (consp x) = error ("car expect cons but got: " ++ show x) cdr (_ :. y) = y cdr Nil = Nil cdr x | not (consp x) = error ("cdr expect cons but got: " ++ show x) caar x = car (car x) cadr x = car (cdr x) cddr x = cdr (cdr x) assoc :: Object -> Object -> Object assoc item alist | alist == Nil = Nil | not (consp alist) = error ("assoc expect list but got: " ++ show alist) | not (consp (car alist)) = error ("assoc expect cons but got: " ++ show (car alist)) | caar alist == item = car alist | caar alist /= item = assoc item (cdr alist) -- | Utility. Generate Lisp List from Haskell Array array2list = foldr cons Nil list2array Nil = [] list2array (x :. y) | listp y = x : list2array y | otherwise = error ("list2array expect list, but got: " ++ show y) append x y | x == Nil = y | otherwise = cons (car x) (append (cdr x) y) mappend :: (Object -> Object) -> Object -> Object mappend f list | list == Nil = Nil | otherwise = append (f (car list)) (mappend f (cdr list)) type Grammar = Object append' :: Q.Gen Object -> Q.Gen Object -> Q.Gen Object append' x y = do a <- x b <- y return $ append a b empty = return Nil :: Q.Gen Object -- | Generate a random sentence or phrase. generate :: Object -> Object -> Q.Gen Object generate grm phrase | listp phrase = foldr (append' . generate grm) empty (list2array phrase) | rewrites grm phrase /= Nil = do y <- randomElt (rewrites grm phrase) generate grm y | otherwise = return $ list1 phrase generate' :: Object -> Object -> Q.Gen [Object] generate' grm phrase | listp phrase = do ts <- mapM (generate' grm) (list2array phrase) return $ concat ts | rewrites grm phrase /= Nil = do y <- randomElt (rewrites grm phrase) generate' grm y | otherwise = return [list1 phrase] -- | The left-hand side of a rule. ruleLhs = car -- | The right-hand side of a rule. ruleRhs rule = cdr (cdr rule) -- | Return a list of the possible rewrites for this category. rewrites grm category | atom category = ruleRhs (assoc category grm) | otherwise = error ("rewrite got: " ++ show category) -- | Pick an element from a list at random. oneOf set = do x <- randomElt set return $ list1 x -- | Choose an element from a list at random. randomElt :: Object -> Q.Gen Object randomElt choices | listp choices = Q.oneof [Q.elements (list2array choices)] | otherwise = error ("randomElt expect choices but got: " ++ show choices)
quickCheck 英文生成
まだバグがあるが改良版。
module EnglishRule7 where import Test.QuickCheck import Control.Monad import Data.List -- data type Name = String data Rule = Word [String] | Rule [[String]] deriving (Eq, Show) type Grammar = [(String, Rule)] simpleGrammar = [ ("sentence", Rule [["noun-phrase", "verb-phrase"]]), ("article", Word ["the", "a"]), ("noun", Word ["man", "ball", "woman", "table"]), ("verb", Word ["hit", "took", "saw", "liked"] ), ("noun-phrase", Rule [["article", "noun"]]), ("verb-phrase", Rule [["verb", "noun-phrase"]]) ] :: Grammar bigGrammar = [ ("sentence", Rule [["noun-phrase", "verb-phrase"]]), ("noun-phrase", Rule [["article", "adj*", "noun", "pp*"], ["name"], ["pronoun"]]), ("verb-phrase", Rule [["verb", "noun-phrase", "pp*"]]), ("pp*", Rule [[], ["pp", "pp*"]]), ("adj*", Rule [[], ["adj", "adj*"]]), ("pp", Rule [["prep", "noun-phrase"]]), ("prep", Word ["to", "in", "by", "with", "on"]), ("adj", Word ["big", "little", "blue", "green", "adiabatic"]), ("article", Word ["the", "a"]), ("name", Word ["Pat", "Kim", "Lee", "Terry", "Robin"]), ("noun", Word ["man", "ball", "woman", "table"]), ("verb", Word ["hit", "took", "saw", "liked"]), ("pronoun", Word ["he", "she", "it", "these", "those", "that"]) ] :: Grammar mkgen :: Grammar -> String -> Gen String mkgen grm k = case lookup k grm of Just r -> mkgen' grm k r Nothing -> error $ "key not found: " ++ k mkgen' grm x (Word ws) = elements ws mkgen' grm x (Rule rs) = oneof $ map (rule2gen grm x) rs rule2gen :: Grammar -> String -> [String] -> Gen String rule2gen grm x [] = return "" rule2gen grm x (r:rs) = mkgen grm r `catgen` rule2gen grm x rs catgen :: Gen String -> Gen String -> Gen String catgen g1 g2 = do s1 <- g1 s2 <- g2 return $ catWords s1 s2 catWords "" "" = "" catWords x "" = x catWords "" y = y catWords x y = x ++ " " ++ y -- data Tree = EmptyNode | Node String String | Tree String Tree Tree deriving (Eq) instance Show Tree where show (Node n x) = "(" ++ n ++ " " ++ x ++ ")" show EmptyNode = "()" show (Tree "" t1 EmptyNode) = "(" ++ show t1 ++ ")" show (Tree "" EmptyNode t2) = "(" ++ show t2 ++ ")" show (Tree "" t1 t2) = "(" ++ show t1 ++ " " ++ show t2 ++ ")" show (Tree n t1 EmptyNode) = "(" ++ n ++ " " ++ show t1 ++ ")" show (Tree n EmptyNode t2) = "(" ++ n ++ " " ++ show t2 ++ ")" show (Tree n t1 t2) = "(" ++ n ++ " " ++ show t1 ++ " " ++ show t2 ++ ")" mkgenTree :: Grammar -> String -> Gen Tree mkgenTree grm k = case lookup k grm of Just r -> mkgenTree' grm k r Nothing -> error $ "key not found: " ++ k mkgenTree' grm x (Word ws) = do s <- elements ws return $ Node x s mkgenTree' grm x (Rule rs) = oneof $ map (rule2gentree grm x) rs rule2gentree :: Grammar -> String -> [String] -> Gen Tree rule2gentree grm x [] = return EmptyNode rule2gentree grm x (r:rs) = catgentree x (mkgenTree grm r) (rule2gentree grm x rs) catgentree :: String -> Gen Tree -> Gen Tree -> Gen Tree catgentree x g1 g2 = do s1 <- g1 s2 <- g2 if (s1 == EmptyNode && s2 == EmptyNode) then return EmptyNode else if (s1 /= EmptyNode && s2 == EmptyNode) then return s1 else if (s1 == EmptyNode && s2 /= EmptyNode) then return s2 else return $ Tree x s1 s2
quickCheck - 英文生成(4)
PAIP では同じ英文生成のルールデータを用いて、解析木(Parse Tree)を生成する。わたしの作ったデータ構造だとちょっと上手くいかなかったため、Rule データ型を改良して名前を持てるようにした。
module EnglishRule6 where import Test.QuickCheck import Control.Monad import Data.List import System type Name = String data Rule = Empty Name | Cat Name Rule Rule | Or Name Rule Rule | Alt Name [String] deriving (Eq) type Grammar = [Rule] instance Show Rule where -- nameless show (Empty "") = "()" show (Cat "" r1 r2) = show r1 ++ " + " ++ show r2 show (Or "" r1 r2) = show r1 ++ " | " ++ show r2 show (Alt "" ws) = "(" ++ intercalate "," ws ++ ")" -- named show (Empty x) = x ++ " -> " ++ "()" show (Cat x r1 r2) = x ++ " -> " ++ nameOf r1 ++ " + " ++ nameOf r2 show (Or x r1 r2) = x ++ " -> " ++ nameOf r1 ++ " | " ++ nameOf r2 show (Alt x ws) = x ++ " -> " ++ "(" ++ intercalate "," ws ++ ")" nameOf :: Rule -> Name nameOf (Empty x) = x nameOf (Cat x _ _) = x nameOf (Or x _ _) = x nameOf (Alt x _) = x -- 補助関数 catWords :: String -> String -> String catWords "" "" = "" catWords x "" = x catWords "" y = y catWords x y = x ++ " " ++ y rule2gen :: Rule -> Gen String rule2gen (Empty _) = return "" rule2gen (Alt _ ws) = elements ws rule2gen (Cat _ r1 r2) = do s1 <- rule2gen r1 s2 <- rule2gen r2 return $ catWords s1 s2 rule2gen (Or _ r1 r2) = oneof [rule2gen r1, rule2gen r2] lookupRule :: String -> [Rule] -> Maybe Rule lookupRule k rs = lookup k rs' where rs' = map (\r -> (nameOf r, r)) rs generate :: Grammar -> String -> Gen String generate g x = case lookupRule x g of Just r -> rule2gen r Nothing -> error ("unknown: " ++ x)
解析木のために、PAIP では Lisp のリストを使っているが、Haskell ではデータ型を用意して、文字列を生成する替わりに Tree を生成するようにする。Tree の名前を多相的にしているのは、あとでユニークなインデックスを振った (Int, String) を名前にするため。
-- Parse Tree data Tree a = EmptyNode | Node a String | Tree a (Tree a) (Tree a) deriving (Eq) instance Show a => Show (Tree a) where show (Node n x) = "(" ++ show n ++ " " ++ x ++ ")" show EmptyNode = "" show (Tree n t1 EmptyNode) = "(" ++ show n ++ " " ++ show t1 ++ ")" show (Tree n EmptyNode t2) = "(" ++ show n ++ " " ++ show t2 ++ ")" show (Tree n t1 t2) = "(" ++ show n ++ " " ++ show t1 ++ " " ++ show t2 ++ ")"
上記を生成するためには、 Gen String ではなく Gen Tree 型の関数を生成すればよい。文法ルール自体は変更しなくてよい。
rule2genTree :: Rule -> Gen (Tree String) rule2genTree (Empty n) = return EmptyNode rule2genTree (Alt n ws) = do s <- elements ws return $ Node n s rule2genTree (Cat n r1 r2) = do t1 <- rule2genTree r1 t2 <- rule2genTree r2 return $ Tree n t1 t2 rule2genTree (Or n r1 r2) = do t <- oneof [ rule2genTree r1, rule2genTree r2 ] case t of EmptyNode -> return EmptyNode x -> return $ Tree n t EmptyNode -- 一般化 generate' :: (Rule -> Gen a) -> [Rule] -> String -> Gen a generate' f rs x = case lookupRule x rs of Just r -> f r Nothing -> error ("Fail to lookup rule for : " ++ x) generateStr :: [Rule] -> String -> Gen String generateStr = generate' rule2gen generateTree :: [Rule] -> String -> Gen (Tree String) generateTree = generate' rule2genTree
- rule2genTree と rule2gen はほとんど同じ構造をしているので、もう一段まとめられるような気もする。
quickCheck 入門 - 英文生成(3)
少し考え方を変え、定義をそのままデータ構造にした。Grammar データを単なる Rule の配列でなくタプルのリストにしているのは、あとからルールを参照するため。
module EnglishRule3 where import Test.QuickCheck import Control.Monad import Data.List data Rule = Empty | Rule :+ Rule | Rule :| Rule | Alt [String] deriving (Eq) type Grammar = [(String, Rule)] -- 再帰的データ構造のため Show しないこと -- instance Show Rule where -- show Empty = "()" -- show (r1 :+ r2) = (show r1) ++ " + " ++ (show r2) -- show (r1 :| r2) = show r1 ++ " | " ++ show r2 -- show (Alt ws) = "(" ++ intercalate "," ws ++ ")" -- 補助関数 catWords :: String -> String -> String catWords "" "" = "" catWords x "" = x catWords "" y = y catWords x y = x ++ " " ++ y rule2gen :: Rule -> Gen String rule2gen Empty = return "" rule2gen (Alt ws) = elements ws rule2gen (r1 :+ r2) = do s1 <- rule2gen r1 s2 <- rule2gen r2 return $ catWords s1 s2 rule2gen (r1 :| r2) = oneof [rule2gen r1, rule2gen r2] generate :: Grammar -> String -> Gen String generate g x = case lookup x g of Just r -> rule2gen r Nothing -> error ("unknown :" ++ x)
ルールの定義は以下の通り。
simpleGrammar = [ ("noun", noun), ("verb", verb), ("article", article), ("noun-phrase", noun_phrase), ("verb-phrase", verb_phrase), ("sentence", sentence) ] where noun = Alt ["man", "ball", "woman", "table"] verb = Alt ["hit", "took", "saw", "liked"] article = Alt ["the", "a"] noun_phrase = article :+ noun verb_phrase = verb :+ noun_phrase sentence = noun_phrase :+ verb_phrase bigGrammar = [ ("sentence", sentence), ("noun-phrase", noun_phrase), ("verb-phrase", verb_phrase), ("pp*", ppStar), ("adj*" , adjStar), ("pp", pp), ("prep", prep), ("adj", adj), ("article", article), ("name", name), ("noun", noun), ("verb", verb), ("pronoun", pronoun) ] where sentence = noun_phrase :+ verb_phrase noun_phrase = (article :+ (adjStar :+ (noun :+ ppStar))) :| (name :| pronoun) verb_phrase = verb :+ (noun_phrase :+ ppStar) ppStar = Empty :| (pp :+ ppStar) adjStar = Empty :| (adj :+ adjStar) pp = prep :+ noun_phrase prep = Alt ["to", "in", "by", "with", "on"] adj = Alt ["big", "little", "blue", "green", "adiabatic"] article = Alt ["the", "a"] name = Alt ["Pat", "Kim", "Lee", "Terry", "Robin"] noun = Alt ["man", "ball", "woman", "table"] verb = Alt ["hit", "took", "saw", "liked"] pronoun = Alt ["he", "she", "it", "these", "those", "that"]
実行例。
*EnglishRule3> sample' $ generate simpleGrammar "verb" ["liked","hit","hit","saw","hit","hit","hit","liked","took","saw","saw"] *EnglishRule3> sample' $ generate simpleGrammar "sentence" ["a woman saw the man","the woman saw a woman","the ball liked a table","the woman took a woman","the table saw a man","the table liked the table","the table took a ball","a woman hit a woman","the woman hit a woman","the man liked the table","a man hit a table"] *EnglishRule3> sample' $ generate bigGrammar "sentence" ["the big woman to Terry saw the man by Lee with a table with she with Kim","Lee saw these by the woman in he to a little big woman by a man to the ball by the ball in Terry to a big table in the table by a man to those by the big big blue adiabatic green adiabatic adiabatic table to that in Terry on the ball on he on a man by Terry by the ball on these in a green green big adiabatic ball in the woman in Terry in Lee with that with the blue table by a adiabatic little man with a woman with a man by Kim on the man on the blue green table on he in the blue blue table to the little ball to a man with the ball","the ball liked the table","Terry hit Robin","Pat saw Lee","a ball saw he","a blue little blue big man in Robin in the ball hit a big blue table on the little green big woman to a ball with Lee to it by Robin in the table by a little big green man on the green woman by it in Lee to a green ball on the woman by a blue woman on a green ball to she to a green ball","a ball hit a blue little green big table on a little little man on Kim to a blue adiabatic man to the adiabatic little woman to a man by Lee by the man in a man with that to these with a table in a man to a big man to she in Lee with he in she in he in Terry in a big man by the woman in Pat","a ball took the woman to the table in she on she with a big blue green table in the adiabatic little table on a woman on the adiabatic table in these to the green adiabatic woman by Lee on a big adiabatic man with Robin in the green big green man with Terry","Terry took these","those took Terry in Robin"]
quickCheck 入門 - 英文生成(2)
Maybe 型を使ってみた途中版。無駄なスペースが生成されるバグあり。あまり整頓された感じはしない。
module EnglishRule2 where import Test.QuickCheck import Control.Monad import Data.List import Control.Applicative data Sym = Sym String deriving (Eq) instance Show Sym where show (Sym x) = x data Rule = Rule [Maybe [Sym]] | Alt [String] deriving (Eq) instance Show Rule where show (Rule rs) = show $ map (\r -> case r of Just x -> intercalate " + " (map show x) Nothing -> "()") rs show (Alt ws) = "(" ++ intercalate "," ws ++ ")" type Grammar = [(Sym, Rule)] generate :: Grammar -> Sym -> Gen String generate g x = case lookup x g of Just r -> rule2gen g r Nothing -> error ("Unknown symbol: " ++ show x) rule2gen g (Alt ws) = elements ws rule2gen g (Rule rs) = oneof $ map (rule2gen' g) rs rule2gen' g (Just (r:rs)) = do a <- generate g r rest <- rule2gen' g (Just rs) return (a ++ " " ++ rest) rule2gen' g (Just []) = return "" rule2gen' g Nothing = return "" simpleGrammar = [ (Sym "noun", Alt ["man", "ball", "woman", "table"]), (Sym "verb", Alt ["hit", "took", "saw", "liked"]), (Sym "article", Alt ["the", "a"]), (Sym "noun-phrase", Rule [Just [Sym "article", Sym "noun"]]), (Sym "verb-phrase", Rule [Just [Sym "verb", Sym "noun-phrase"]]), (Sym "sentence", Rule [Just [Sym "noun-phrase", Sym "verb-phrase"]]) ] :: Grammar bigGrammar = [ (Sym "sentence", Rule [ Just [Sym "noun-phrase", Sym "verb-phrase"]]), (Sym "noun-phrase", Rule [ Just [Sym "article", Sym "adj*", Sym "noun", Sym "pp*"], Just [Sym "name"], Just [Sym "pronoun"]]), (Sym "verb-phrase", Rule [ Just [Sym "verb", Sym "noun-phrase", Sym "pp*"]]), (Sym "pp*", Rule [ Nothing , Just [Sym "pp", Sym "pp*"]]), (Sym "adj*", Rule [ Nothing, Just [Sym "adj", Sym "adj*"]]), (Sym "pp", Rule [ Just [Sym "prep", Sym "noun-phrase"]]), (Sym "prep", Alt ["to", "in", "by", "with", "on"]), (Sym "adj", Alt ["big", "little", "blue", "green", "adiabatic"]), (Sym "article", Alt ["the", "a"]), (Sym "name", Alt ["Pat", "Kim", "Lee", "Terry", "Robin"]), (Sym "noun", Alt ["man", "ball", "woman", "table"]), (Sym "verb", Alt ["hit", "took", "saw", "liked"]), (Sym "pronoun", Alt ["he", "she", "it", "these", "those", "that"]) ] :: Grammar
quickCheck 入門 - 英文生成
英文生成の簡単な版は、QuickCheck を使うことで以下のように書けた(PAIP 2.2 A Straightforward Solution)。
module English where import Test.QuickCheck import Control.Monad verb = elements ["hit", "took", "saw", "liked"] :: Gen String noun = elements ["man", "ball", "woman", "table"] :: Gen String article = elements ["the", "a"] :: Gen String -- 補助関数 cat2 x y = x ++ " " ++ y nounPhrase = liftM2 cat2 article noun :: Gen String verbPhrase = liftM2 cat2 verb nounPhrase :: Gen String sentence = liftM2 cat2 nounPhrase verbPhrase :: Gen String
これを改良して、ルールベースにすることを考える(PAIP 2.3 A Rule-Based Solution)。テキストでは、英文法をデータとして持ち(*simple-grammar*, *big-grammar*)、同じプログラムが文法データを変えれば違う英文を生成するようになっている。Lisp 版では、シンボルがルール名とそのルールの参照に使われている。
Haskell にはシンボルが無いので、シンボル用のデータ型 Sym String を用いた第一版を以下に示す。
module EnglishRule where import Test.QuickCheck import Control.Monad import Data.List data Sym = Sym String deriving (Eq) instance Show Sym where show (Sym x) = x data Rule = Rule [[Sym]] | Alt [String] deriving (Eq) instance Show Rule where show (Rule rs) = show $ map (intercalate " + " . map show) rs show (Alt ws) = "(" ++ intercalate "," ws ++ ")" type Grammar = [(Sym, Rule)] simpleGrammar = [ (Sym "noun", Alt ["man", "ball", "woman", "table"]), (Sym "verb", Alt ["hit", "took", "saw", "liked"]), (Sym "article", Alt ["the", "a"]), (Sym "noun-phrase", Rule [[Sym "article", Sym "noun"]]), (Sym "verb-phrase", Rule [[Sym "verb", Sym "noun-phrase"]]), (Sym "sentence", Rule [[Sym "noun-phrase", Sym "verb-phrase"]]) ] :: Grammar generate :: Grammar -> Sym -> Gen String generate g x = case lookup x g of Just (Alt ws) -> elements ws Just (Rule rs) -> oneof (map (generate' g x) rs) Nothing -> error ("Unknown symbol: " ++ show x) generate' g x [] = return "" generate' g x (r:rs) = do a <- generate g r rest <- generate' g x rs if a /= "" && rest /= "" then return $ a ++ " " ++ rest else if a /= "" && rest == "" then return a else if a == "" && rest /= "" then return rest else return "" -- a == "" && rest == "" bigGrammar = [ (Sym "sentence", Rule [[Sym "noun-phrase", Sym "verb-phrase"]]), (Sym "noun-phrase", Rule [[Sym "article", Sym "adj*", Sym "noun", Sym "pp*"], [Sym "name"], [Sym "pronoun"]]), (Sym "verb-phrase", Rule [[Sym "verb", Sym "noun-phrase", Sym "pp*"]]), (Sym "pp*", Rule [[], [Sym "pp", Sym "pp*"]]), (Sym "adj*", Rule [[], [Sym "adj", Sym "adj*"]]), (Sym "pp", Rule [[Sym "prep", Sym "noun-phrase"]]), (Sym "prep", Alt ["to", "in", "by", "with", "on"]), (Sym "adj", Alt ["big", "little", "blue", "green", "adiabatic"]), (Sym "article", Alt ["the", "a"]), (Sym "name", Alt ["Pat", "Kim", "Lee", "Terry", "Robin"]), (Sym "noun", Alt ["man", "ball", "woman", "table"]), (Sym "verb", Alt ["hit", "took", "saw", "liked"]), (Sym "pronoun", Alt ["he", "she", "it", "these", "those", "that"]) ] :: Grammar
何カ所かかっこう悪いところがあり、リファクタリングが必要である。
- Common Lisp 版に比べて長い。Haskell の言語要素、ライブラリの大きさを考えると、もっと簡潔にできるのではないか。
- シンボルの代替 Sym String が如何にも冗長。もっと奇麗な、Haskell らしい構成は無いか。
- generate' で空文字判定をしているところが不格好。Maybe モナドが使えるのでは。
- QuickCheck の Gen String 型で妥当か。
続く。