quickCheck - 英文生成(4)
PAIP では同じ英文生成のルールデータを用いて、解析木(Parse Tree)を生成する。わたしの作ったデータ構造だとちょっと上手くいかなかったため、Rule データ型を改良して名前を持てるようにした。
module EnglishRule6 where import Test.QuickCheck import Control.Monad import Data.List import System type Name = String data Rule = Empty Name | Cat Name Rule Rule | Or Name Rule Rule | Alt Name [String] deriving (Eq) type Grammar = [Rule] instance Show Rule where -- nameless show (Empty "") = "()" show (Cat "" r1 r2) = show r1 ++ " + " ++ show r2 show (Or "" r1 r2) = show r1 ++ " | " ++ show r2 show (Alt "" ws) = "(" ++ intercalate "," ws ++ ")" -- named show (Empty x) = x ++ " -> " ++ "()" show (Cat x r1 r2) = x ++ " -> " ++ nameOf r1 ++ " + " ++ nameOf r2 show (Or x r1 r2) = x ++ " -> " ++ nameOf r1 ++ " | " ++ nameOf r2 show (Alt x ws) = x ++ " -> " ++ "(" ++ intercalate "," ws ++ ")" nameOf :: Rule -> Name nameOf (Empty x) = x nameOf (Cat x _ _) = x nameOf (Or x _ _) = x nameOf (Alt x _) = x -- 補助関数 catWords :: String -> String -> String catWords "" "" = "" catWords x "" = x catWords "" y = y catWords x y = x ++ " " ++ y rule2gen :: Rule -> Gen String rule2gen (Empty _) = return "" rule2gen (Alt _ ws) = elements ws rule2gen (Cat _ r1 r2) = do s1 <- rule2gen r1 s2 <- rule2gen r2 return $ catWords s1 s2 rule2gen (Or _ r1 r2) = oneof [rule2gen r1, rule2gen r2] lookupRule :: String -> [Rule] -> Maybe Rule lookupRule k rs = lookup k rs' where rs' = map (\r -> (nameOf r, r)) rs generate :: Grammar -> String -> Gen String generate g x = case lookupRule x g of Just r -> rule2gen r Nothing -> error ("unknown: " ++ x)
解析木のために、PAIP では Lisp のリストを使っているが、Haskell ではデータ型を用意して、文字列を生成する替わりに Tree を生成するようにする。Tree の名前を多相的にしているのは、あとでユニークなインデックスを振った (Int, String) を名前にするため。
-- Parse Tree data Tree a = EmptyNode | Node a String | Tree a (Tree a) (Tree a) deriving (Eq) instance Show a => Show (Tree a) where show (Node n x) = "(" ++ show n ++ " " ++ x ++ ")" show EmptyNode = "" show (Tree n t1 EmptyNode) = "(" ++ show n ++ " " ++ show t1 ++ ")" show (Tree n EmptyNode t2) = "(" ++ show n ++ " " ++ show t2 ++ ")" show (Tree n t1 t2) = "(" ++ show n ++ " " ++ show t1 ++ " " ++ show t2 ++ ")"
上記を生成するためには、 Gen String ではなく Gen Tree 型の関数を生成すればよい。文法ルール自体は変更しなくてよい。
rule2genTree :: Rule -> Gen (Tree String) rule2genTree (Empty n) = return EmptyNode rule2genTree (Alt n ws) = do s <- elements ws return $ Node n s rule2genTree (Cat n r1 r2) = do t1 <- rule2genTree r1 t2 <- rule2genTree r2 return $ Tree n t1 t2 rule2genTree (Or n r1 r2) = do t <- oneof [ rule2genTree r1, rule2genTree r2 ] case t of EmptyNode -> return EmptyNode x -> return $ Tree n t EmptyNode -- 一般化 generate' :: (Rule -> Gen a) -> [Rule] -> String -> Gen a generate' f rs x = case lookupRule x rs of Just r -> f r Nothing -> error ("Fail to lookup rule for : " ++ x) generateStr :: [Rule] -> String -> Gen String generateStr = generate' rule2gen generateTree :: [Rule] -> String -> Gen (Tree String) generateTree = generate' rule2genTree
- rule2genTree と rule2gen はほとんど同じ構造をしているので、もう一段まとめられるような気もする。