quickCheck - 英文生成(4)

PAIP では同じ英文生成のルールデータを用いて、解析木(Parse Tree)を生成する。わたしの作ったデータ構造だとちょっと上手くいかなかったため、Rule データ型を改良して名前を持てるようにした。

module EnglishRule6 where
import Test.QuickCheck
import Control.Monad
import Data.List

import System

type Name = String
data Rule = Empty Name | Cat Name Rule Rule | Or Name Rule Rule | Alt Name [String] deriving (Eq)

type Grammar = [Rule]

instance Show Rule where
    -- nameless
    show (Empty "") = "()"
    show (Cat "" r1 r2) = show r1 ++ " + " ++ show r2
    show (Or "" r1 r2) = show r1 ++ " | " ++ show r2
    show (Alt "" ws) = "(" ++ intercalate "," ws ++ ")"
    -- named
    show (Empty x) = x ++ " -> " ++ "()"
    show (Cat x r1 r2) = x ++ " -> " ++ nameOf r1 ++ " + " ++ nameOf r2
    show (Or x r1 r2) = x ++ " -> " ++ nameOf r1 ++ " | " ++ nameOf r2
    show (Alt x ws) = x ++ " -> " ++ "(" ++ intercalate "," ws ++ ")"

nameOf :: Rule -> Name
nameOf (Empty x) = x
nameOf (Cat x _ _) = x
nameOf (Or x _ _) = x
nameOf (Alt x _) = x

-- 補助関数
catWords :: String -> String -> String
catWords "" "" = ""
catWords x "" = x
catWords "" y = y
catWords x y = x ++ " " ++ y

rule2gen :: Rule -> Gen String
rule2gen (Empty _) = return ""
rule2gen (Alt _ ws) = elements ws
rule2gen (Cat _ r1 r2) = do
  s1 <- rule2gen r1
  s2 <- rule2gen r2
  return $ catWords s1 s2
rule2gen (Or _ r1 r2) = oneof [rule2gen r1, rule2gen r2]

lookupRule :: String -> [Rule] -> Maybe Rule
lookupRule k rs = lookup k rs'
    where rs' = map (\r -> (nameOf r, r)) rs

generate :: Grammar -> String -> Gen String
generate g x = case lookupRule x g of
                 Just r -> rule2gen r
                 Nothing -> error ("unknown: " ++ x)

解析木のために、PAIP では Lisp のリストを使っているが、Haskell ではデータ型を用意して、文字列を生成する替わりに Tree を生成するようにする。Tree の名前を多相的にしているのは、あとでユニークなインデックスを振った (Int, String) を名前にするため。

-- Parse Tree
data Tree a = EmptyNode | Node a String | Tree a (Tree a) (Tree a) deriving (Eq)
instance Show a => Show (Tree a) where
    show (Node n x) = "(" ++ show n ++ " " ++ x ++ ")"
    show EmptyNode = ""
    show (Tree n t1 EmptyNode) = "(" ++ show n ++ " " ++ show t1 ++ ")"
    show (Tree n EmptyNode t2) = "(" ++ show n ++ " " ++ show t2 ++ ")"
    show (Tree n t1 t2) = "(" ++ show n ++ " " ++ show t1 ++ " " ++ show t2 ++ ")"

上記を生成するためには、 Gen String ではなく Gen Tree 型の関数を生成すればよい。文法ルール自体は変更しなくてよい。

rule2genTree :: Rule -> Gen (Tree String)
rule2genTree (Empty n) = return EmptyNode
rule2genTree (Alt n ws) = do
  s <- elements ws
  return $ Node n s
rule2genTree (Cat n r1 r2) = do
  t1 <- rule2genTree r1
  t2 <- rule2genTree r2
  return $ Tree n t1 t2
rule2genTree (Or n r1 r2) = do
  t <- oneof [ rule2genTree r1, rule2genTree r2 ]
  case t of
    EmptyNode -> return EmptyNode
    x -> return $ Tree n t EmptyNode

-- 一般化
generate' :: (Rule -> Gen a) -> [Rule] -> String -> Gen a
generate' f rs x = case lookupRule x rs of
                     Just r -> f r
                     Nothing -> error ("Fail to lookup rule for : " ++ x)
generateStr :: [Rule] -> String -> Gen String
generateStr = generate' rule2gen
generateTree :: [Rule] -> String -> Gen (Tree String)
generateTree = generate' rule2genTree
  • rule2genTree と rule2gen はほとんど同じ構造をしているので、もう一段まとめられるような気もする。