2010-11-11 44 views
2

我最近一直在使用FParsec,并且发现缺乏通用解析器对我来说是一个主要的停止点。我的这个小库的目标是简单以及对通用输入的支持。你能想到任何可以改善这一点的补充,或者是特别糟糕的吗?这是解析器组合器库的合理基础吗?

 
open LazyList 

type State<'a, 'b> (input:LazyList<'a>, data:'b) = 
    member this.Input = input 
    member this.Data = data 

type Result<'a, 'b, 'c> = 
| Success of 'c * State<'a, 'b> 
| Failure of string * State<'a, 'b> 

type Parser<'a,'b, 'c> = State<'a, 'b> -> Result<'a, 'b, 'c> 

let (>>=) left right state = 
    match left state with 
    | Success (result, state) -> (right result) state 
    | Failure (message, _) -> Result<'a, 'b, 'd>.Failure (message, state) 

let (<|>) left right state = 
    match left state with 
    | Success (_, _) as result -> result 
    | Failure (_, _) -> right state 

let (|>>) parser transform state = 
    match parser state with 
    | Success (result, state) -> Success (transform result, state) 
    | Failure (message, _) -> Failure (message, state) 

let (<?>) parser errorMessage state = 
    match parser state with 
    | Success (_, _) as result -> result 
    | Failure (_, _) -> Failure (errorMessage, state)      

type ParseMonad() = 
    member this.Bind (f, g) = f >>= g 
    member this.Return x s = Success(x, s) 
    member this.Zero() s = Failure("", s)       
    member this.Delay (f:unit -> Parser<_,_,_>) = f() 

let parse = ParseMonad() 

回溯

令人惊讶的是并没有采取太多的代码来实现你的描述。这是有点草率,但似乎工作得很好。

let (>>=) left right state = 
    seq { 
     for res in left state do 
      match res with 
      | Success(v, s) -> 
       let v = 
        right v s 
        |> List.tryFind (
         fun res -> 
          match res with 
          | Success (_, _) -> true 
          | _ -> false 
        ) 
       match v with 
       | Some v -> yield v 
       | None ->() 
    } |> Seq.toList 

let (<|>) left right state = 
    left state @ right state 

回溯第2部分

开关周围的代码,以使用惰性列表和尾部调用优化递归。

let (>>=) left right state = 
    let rec readRight lst = 
     match lst with 
     | Cons (x, xs) -> 
      match x with 
      | Success (r, s) as q -> LazyList.ofList [q]      
      | Failure (m, s) -> readRight xs 
     | Nil -> LazyList.empty<Result<'a, 'b, 'd>> 
    let rec readLeft lst = 
     match lst with 
     | Cons (x, xs) -> 
      match x with 
      | Success (r, s) -> 
       match readRight (right r s) with 
       | Cons (x, xs) -> 
        match x with 
        | Success (r, s) as q -> LazyList.ofList [q]      
        | Failure (m, s) -> readRight xs 
       | Nil -> readLeft xs 
      | Failure (m, s) -> readLeft xs 
     | Nil -> LazyList.empty<Result<'a, 'b, 'd>> 
    readLeft (left state) 

let (<|>) (left:Parser<'a, 'b, 'c>) (right:Parser<'a, 'b, 'c>) state = 
    LazyList.delayed (fun() -> left state) 
    |> LazyList.append 
    <| LazyList.delayed (fun() -> right state) 
+0

说不上来,如果有什么有用的东西,但也可能http://lorgonblog.wordpress.com/2008/02/看16/monadic-parser-combinators-in-f/ – Brian 2010-11-11 07:08:47

+0

@Brian - 说实话,post是我开始解析器组合体学习体验的地方。 :) – ChaosPandion 2010-11-11 14:40:15

回答

2

我认为,你需要做出一个重要的设计决策是你是否想支持您的解析器或不回溯(我不记得多少有关分析理论,但是这可能指定类型您的解析器可以处理的语言)。

回溯。在您的实现中,解析器可能会失败(Failure大小写)或只产生一个结果(Success大小写)。另一种选择是生成零个或多个结果(例如,表示结果为seq<'c>)。对不起,如果这是你已经考虑:-),但无论如何...

区别是你的解析器总是遵循第一个可能的选项。例如,如果您编写如下内容:

let! s1 = (str "ab" <|> str "a") 
let! s2 = str "bcd" 

使用您的实现时,输入“abcd”会失败。它将选择<|>运算符的第一个分支,它将处理前两个字符,并且序列中的下一个分析器将失败。基于序列的实现将能够回溯并跟随<|>中的第二条路径并解析输入。

合并。对我而言,另一个想法是您还可以将Combine成员添加到解析器计算构建器。这有点微妙(因为您需要了解计算表达式的翻译方式),但它有时可能有用。如果添加:

member x.Combine(a, b) = a <|> b 
member x.ReturnFrom(p) = p 

然后就可以很好地编写递归解析器:

let rec many p acc = 
    parser { let! r = p     // Parse 'p' at least once 
      return! many p (r::acc)  // Try parsing 'p' multiple times 
      return r::acc |> List.rev } // If fails, return the result 
+0

我只是在想如何使用组合我的优势。至于回溯,说实话我还没有考虑过。我一定会加入它并进行实验。感谢您的建议,一如既往的富有洞察力。 – ChaosPandion 2010-11-11 04:20:39

+0

我刚刚检查了FParsec实现,他们故意没有包含'Combine'或'ReturnFrom'方法。 – ChaosPandion 2010-11-11 04:24:18

+0

你是2这个2。我已经实现了基本的回溯,并且随着对它的更好理解,我会对其进行改进。再次感谢。 – ChaosPandion 2010-11-11 06:55:09

相关问题