I’ve written a few compilers, the most visible one being the PowerBuilder .NET compiler that can compile a PowerBuilder application into either a .NET Windows Forms application or an ASP.NET application. We used ANTLR 2 as the parser generator, which works great for us except for one thing: the context sensitive keywords. PowerScript, the programming language provided by PowerBuilder, allows some keywords, e.g. the keyword “update” of embedded SQL, to be used as identifiers outside of embedded SQL statements. We had to introduce a member variable in the lexer class to specify the current context that parser is in in order for so the lexer to do the right thing in different contexts.
Later I discovered Irony from irony.codeplex.com, that allows you to write grammar in C# code directly, which eliminates the step to generate a lexer and a parser classes from a grammar specification. Irony also has a Grammar Explorer with which you can debug and test your grammar. However, since the tool is still in beta stage, I haven’t invested a lot of time in it.
ANTLR and Irony may look very different, they have at least one thing in common: they both have two distinct phases: scanning and parsing, which means that, by default, the scanner doesn’t know what context the parser is in, thus making it harder to have the lexer to different things in different contexts.
I kept searching for other solutions until I found FParsec, a parser combinator library for F#, a functional programming language from Microsoft. A parser takes a string as the input and generates some form of output, e.g. an AST. By definition, a parser is a function. And therefore it is very natural to write parsers using functional programming languages. FParsec provides some basic parsers, which can be combined to parse higher language constructs of a particular language, until the highest level parser is defined to parse the entire compilation unit. There isn’t a distinct between lexing and parsing. You define parsers to parse both terminals and non-terminals. At any point in parsing, you can decide exactly which parser to use to parse the next token, making it very easy to support context sensitive keywords.
The following F# code is what I’ve written to parse BarModel script, whose grammar is appended at the end.
module Parser
open FParsec
open Ast
type Line = Blank | Statement of Statement
type BarModelParser () =
let ws : Parser< unit, unit> = skipMany (pchar ‘ ‘ <|> pchar ‘\t’)
let str_ws c = pstring c .>> ws
let strCI_ws c = pstringCI c .>> ws
// Tokens
let addToken : Parser< string, unit> = strCI_ws “add”
let cutToken : Parser< string, unit> = strCI_ws “cut”
let shiftToken : Parser< string, unit> = strCI_ws “shift”
let updateToken : Parser< string, unit> = strCI_ws “update”
let barToken : Parser< string, unit> = strCI_ws “bar”
let boxToken : Parser< string, unit> = strCI_ws “box”
let labelToken : Parser< string, unit> = strCI_ws “label”
let titleToken : Parser< string, unit> = strCI_ws “title”
let sizeToken : Parser< string, unit> = strCI_ws “size”
let fillToken : Parser< string, unit> = strCI_ws “fill”
let strokeToken : Parser< string, unit> = strCI_ws “stroke”
let strokeStyleToken : Parser< string, unit> = strCI_ws “stroke-style”
let positionToken : Parser< string, unit> = strCI_ws “position”
let toToken : Parser< string, unit> = strCI_ws “to”
let asToken : Parser< string, unit> = strCI_ws “as”
let byToken : Parser< string, unit> = strCI_ws “by”
let intoToken : Parser< string, unit> = strCI_ws “into”
let fromToken : Parser< string, unit> = strCI_ws “from”
let withToken : Parser< string, unit> = strCI_ws “with”
let leftToken : Parser< string, unit> = strCI_ws “left”
let rightToken : Parser< string, unit> = strCI_ws “right”
let aboveToken : Parser< string, unit> = strCI_ws “above”
let belowToken : Parser< string, unit> = strCI_ws “below”
// Identifier
let identifier =
let isValidFirstChar c = isLetter c || c = ‘_’
let isValidChar c = isLetter c || isDigit c || c = ‘_’
many1Satisfy2L isValidFirstChar isValidChar “identifier”
// Literals
let floatNumber = pfloat .>> ws
let intNumber = pint32 .>> ws
let number : Parser< double, unit> = floatNumber <|> (intNumber |>> fun i -> (float i))
let numberString : Parser< string, unit> =
(floatNumber |>> fun f -> (string f)) <|>
(intNumber |>> fun i -> (string i))
let doubleQuoteStringLiteral = between (pstring “\””) (pstring “\”” ) (manySatisfy ((<>) ‘\”‘ ))
let singleQuoteStringLiteral = between (pstring “\’”) (pstring “\’” ) (manySatisfy ((<>) ‘\” ))
let stringLiteral = doubleQuoteStringLiteral <|> singleQuoteStringLiteral
let hexColor = pstring “#” >>. many1Satisfy isHex |>> fun c -> (“#” + c)
let color = identifier <|> hexColor <|> stringLiteral
let strokeStyle =
(strCI_ws “solid” >>% Solid) <|>
(strCI_ws “dotted” >>% Dotted) <|>
(strCI_ws “dashed” >>% Dashed)
let position = (aboveToken >>% Above) <|> (belowToken >>% Below)
// Properties
let titleProperty =
titleToken >>. str_ws “=” >>. (stringLiteral <|> identifier <|> numberString) .>> ws
|>> fun s -> TitleProperty(s)
let sizeValue =
(identifier |>> fun i -> Symbol(i)) <|>
(stringLiteral |>> fun s -> Symbol(s)) <|>
(number |>> fun n -> Value(n))
let sizeProperty = sizeToken >>. str_ws “=” >>. sizeValue .>> ws |>> fun s -> SizeProperty(s)
let fillProperty = fillToken >>. str_ws “=” >>. color .>> ws |>> fun c -> FillProperty(c)
let strokeProperty = strokeToken >>. str_ws “=” >>. color .>> ws |>> fun c -> StrokeProperty(c)
let strokeStyleProperty = strokeStyleToken >>. str_ws “=” >>. strokeStyle .>> ws |>> fun s -> StrokeStyleProperty(s)
let boxProperty = sizeProperty <|> fillProperty <|> strokeProperty <|> strokeStyleProperty
let boxProperties = many boxProperty
let boxProperties1 = many1 boxProperty
let positionProperty = positionToken >>. str_ws “=” >>. position .>> ws |>> fun p -> PositionProperty(p)
let labelProperties = many1 (titleProperty <|> positionProperty)
// Bar Specifier
let barAlias = identifier |>> fun i -> BarAlias(i)
let singleBarIndex =
intNumber |>> fun i -> BarIndex(i)
let rangeBarIndex =
(pipe3 intNumber toToken intNumber ( fun i _ j -> BarRange(i,j)))
let barIndex =
barToken >>.
between (str_ws “[“) (str_ws “]” ) (attempt rangeBarIndex <|> singleBarIndex)
let barSpec = (attempt barIndex <|> barAlias) .>> ws
// Box Specifier
let entireBar =
intNumber |>> fun i -> EntireBar(i)
let singleBoxIndex =
pipe3 intNumber (str_ws “,”) intNumber (fun i _ j -> BoxIndex(i, j))
let rangeBoxIndex =
pipe5 intNumber (str_ws “,”) intNumber toToken intNumber (fun i _ j _ k -> BoxRange(i,j,k))
let boxAlias = (identifier .>> ws) |>> fun i -> BoxAlias(i)
let boxIndex =
boxToken >>.
between (str_ws “[“) (str_ws “]” )
(
attempt rangeBoxIndex <|>
attempt singleBoxIndex <|>
entireBar
)
let boxSpec = (attempt boxIndex <|> boxAlias) .>> ws
let toBar = toToken >>. barSpec
let toBox = toToken >>. boxSpec
let toBarOrBox =
toToken >>.
(
attempt boxIndex <|>
attempt barIndex <|>
boxAlias
)
let asBarAlias = asToken >>. identifier
let asBoxAlias = asToken >>. identifier
// Statements
let addBar = pipe3 barToken titleProperty (opt asBarAlias) ( fun _ title alias -> AddBar(title, alias))
let withLabelKeywords = withToken .>>. labelToken
let withLabel = withLabelKeywords >>. labelProperties
let addBox = pipe5 boxToken boxProperties toBar (opt asBoxAlias) (opt withLabel) (fun _ props bar alias label -> AddBox(props, bar, alias, label))
let addLabel =
pipe3 labelToken labelProperties toBarOrBox ( fun _ props shape -> AddLabel(props, shape))
let addStatement =
addToken >>.
(
addBar <|>
addBox <|>
addLabel
)
let ratios = sepBy1 intNumber (str_ws “:”)
let cutBoxIntoRatios = pipe3 boxSpec intoToken ratios ( fun box _ r -> CutInto(box, r))
let side = (leftToken >>% Left) <|> (rightToken >>% Right)
let cutBoxFrom = pipe5 boxSpec fromToken side byToken sizeValue ( fun box _ side _ size -> CutFrom(box, side, size))
let cutBox =
cutToken >>.
(
attempt cutBoxIntoRatios <|>
cutBoxFrom
)
let shiftBox = pipe4 shiftToken boxSpec toToken barSpec ( fun _ box _ bar -> ShiftBox(box, bar))
let updateBox = pipe3 updateToken boxSpec boxProperties1 ( fun _ box props -> UpdateBox(box, props))
let primitive =
(boxSpec |>> fun b -> Box(b)) <|>
(floatNumber |>> fun f -> Literal(f)) <|>
(identifier |>> fun id -> Id(id))
let boxes = sepBy1 boxSpec (str_ws “:”)
let ratioConstraint = pipe3 boxes (str_ws “=”) ratios (fun b _ r -> RatioConstraint(b, r))
let oppa = new OperatorPrecedenceParser< Expression, unit , unit >()
let arithmetic = oppa.ExpressionParser
let terma = primitive .>> ws
do oppa.TermParser <- terma
do oppa.AddOperator( InfixOperator(“+” , ws, 1, Associativity .Left, fun x y ->Arithmetic(x, Add, y)))
do oppa.AddOperator( InfixOperator(“-“ , ws, 1, Associativity .Left, fun x y ->Arithmetic(x, Subtract, y)))
let additiveConstraint = pipe3 arithmetic (str_ws “=”) arithmetic (fun left _ right -> AdditiveConstraint(left, right))
let statement =
addStatement <|>
cutBox <|>
shiftBox <|>
updateBox <|>
attempt ratioConstraint <|>
attempt additiveConstraint
let pcomment = pchar ‘#’ >>. skipManySatisfy ( fun c -> c <> ‘\n’) >>. pchar ‘\n’
let peol = pcomment <|> (pchar ‘\n’)
let pline = spaces >>. statement .>> peol |>> ( fun i -> Statement i)
let pblank = spaces >>. peol |>> ( fun _ -> Blank)
let plines = many (pline <|> pblank) .>> eof
member this.parse (program: string) =
match run plines program with
| Success(result, s, p) ->
let r = result |> List.choose ( function Statement i -> Some i | Blank -> None)
Success(r, s, p)
| Failure(errorMsg, e, s) -> Failure(errorMsg, e, s)
ParserTester.fs defines the unit tests for the Parser
module ParserTester
open FParsec
open Ast
open Parser
open FsUnit
open FsUnit.MsTest
open Microsoft.VisualStudio.TestTools.UnitTesting
[<TestClass>]
type “Add bar“ () =
[<TestMethod>]
member x.“Add a bar with a title.“() =
let parser = new BarModelParser()
parser.parse “add bar title=apple\n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [AddBar(TitleProperty(title), _)] -> title |> should equal “apple”
| _ -> “error” |> should throw typeof<System.Exception >
| Failure(e, _, _) -> e |> should throw typeof<System. Exception>
[<TestMethod>]
member x.“Add a bar as alias“() =
let parser = new BarModelParser()
parser.parse “add bar title=orange as bar1\n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [AddBar(TitleProperty(title), Some(alias))] ->
title |> should equal “orange”
alias |> should equal “bar1”
| _ -> “error” |> should throw typeof<System.Exception >
| Failure(e, _, _) -> e |> should throw typeof<System. Exception>
[<TestMethod>]
member x.“Add a box“() =
let parser = new BarModelParser()
parser.parse “add box to bar[1]\n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [AddBox(props, BarIndex(i), alias, label)] ->
props |> List.length |> should equal 0
i |> should equal 1
alias.IsNone |> should equal true
label.IsNone |> should equal true
| _ -> “error” |> should throw typeof<System.Exception >
| Failure(e, _, _) -> e |> should throw typeof<System. Exception>
[<TestMethod>]
member x.“Add a box with size“() =
let parser = new BarModelParser()
parser.parse “add box size=100 to bar1\n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [AddBox(props, BarAlias(a), alias, label)] ->
props |> List.length |> should equal 1
match props.Head with
| SizeProperty(Value(size)) -> size |> should equal 100.0
| _ -> “size property” |> should throw typeof<System.Exception >
a |> should equal “bar1”
alias.IsNone |> should equal true
label.IsNone |> should equal true
| _ -> “error” |> should throw typeof<System.Exception >
| Failure(e, _, _) -> e |> should throw typeof<System. Exception>
[<TestMethod>]
member x.“Add a box with size fill stroke“() =
let parser = new BarModelParser()
parser.parse “add box size = 100 fill= red stroke =#ababab to bar1\n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [AddBox(props, BarAlias(a), alias, label)] ->
props |> List.length |> should equal 3
match props.Head with
| SizeProperty(Value(size)) -> size |> should equal 100.0
| _ -> “size property” |> should throw typeof<System.Exception >
match props.Tail.Head with
| FillProperty(color) -> color |> should equal “red”
| _ -> “fill property” |> should throw typeof<System.Exception >
match props.Tail.Tail.Head with
| StrokeProperty(rgb) -> rgb |> should equal “#ababab”
| _ -> “stroke property” |> should throw typeof<System.Exception >
a |> should equal “bar1”
alias.IsNone |> should equal true
label.IsNone |> should equal true
| _ -> “error” |> should throw typeof<System.Exception >
| Failure(e, _, _) -> e |> should throw typeof<System. Exception>
[<TestMethod>]
member x.“Add label to a box“() =
let parser = new BarModelParser()
parser.parse “add label title=\”tree\” position=above to box[2, 1 to 6]\n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [AddLabel(props, box)] ->
props |> List.length |> should equal 2
match props.Head with
| TitleProperty(title) -> title |> should equal “tree”
| _ -> “title” |> should throw typeof<System.Exception >
match props.Tail.Head with
| PositionProperty(pos) -> pos |> should equal Above
| _ -> “position” |> should throw typeof<System.Exception >
| _ -> “properties” |> should throw typeof<System.Exception >
| Failure(err, _, _) -> err |> should throw typeof<System. Exception>
[<TestMethod>]
member x.“Add label to bars“() =
let parser = new BarModelParser()
parser.parse “add label title=300 to bar[1 to 6]\n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [AddLabel(props, bar)] ->
props |> List.length |> should equal 1
match props.Head with
| TitleProperty(title) -> title |> should equal “300”
| _ -> “title” |> should throw typeof<System.Exception >
| _ -> “properties” |> should throw typeof<System.Exception >
| Failure(err, _, _) -> err |> should throw typeof<System. Exception>
[<TestMethod>]
member x.“Cut box into equal units“() =
let parser = new BarModelParser()
parser.parse “cut box[1, 10] into 9 \n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [CutInto(BoxIndex(bar, box), ratios)] ->
bar |> should equal 1
box |> should equal 10
| _ -> “error” |> should throw typeof<System.Exception >
| Failure(err, _, _) -> err |> should throw typeof<System. Exception>
[<TestMethod>]
member x.“Cut box into ratios“() =
let parser = new BarModelParser()
parser.parse “cut box[1, 10] into 2 : 3 : 4 \n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [CutInto(BoxIndex(bar, box), ratios)] ->
bar |> should equal 1
box |> should equal 10
List.length ratios |> should equal 3
| _ -> “error” |> should throw typeof<System.Exception >
| Failure(err, _, _) -> err |> should throw typeof<System. Exception>
[<TestMethod>]
member x.“Cut box from left“() =
let parser = new BarModelParser()
parser.parse “cut box[1, 10] from left by 300\n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [CutFrom(BoxIndex(bar, box), side, Value(sz))] ->
bar |> should equal 1
box |> should equal 10
side |> should equal Left
sz |> should equal 300.0
| _ -> “error” |> should throw typeof<System.Exception >
| Failure(err, _, _) -> err |> should throw typeof<System. Exception>
[<TestMethod>]
member x.“Cut box from right“() =
let parser = new BarModelParser()
parser.parse “cut box[5] from right by 300\n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [CutFrom(EntireBar(bar), side, Value(size))] ->
bar |> should equal 5
side |> should equal Right
size |> should equal 300.0
| _ -> “error” |> should throw typeof<System.Exception >
| Failure(err, _, _) -> err |> should throw typeof<System. Exception>
[<TestMethod>]
member x.“Shift box“() =
let parser = new BarModelParser()
parser.parse “shift box[1, 1] to bar[4]\n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [ShiftBox(BoxIndex(bar, box), BarIndex(bar2))] ->
bar |> should equal 1
box |> should equal 1
bar2 |> should equal 4
| _ -> “error” |> should throw typeof<System.Exception >
| Failure(err, _, _) -> err |> should throw typeof<System. Exception>
[<TestMethod>]
member x.“Update box“() =
let parser = new BarModelParser()
parser.parse “update box[1, 1] fill=blue\n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [UpdateBox(BoxIndex(bar, box), props)] ->
bar |> should equal 1
box |> should equal 1
props.Length |> should equal 1
match props.Head with
| FillProperty(c) -> c |> should equal “blue”
| _ -> “error” |> should throw typeof<System.Exception >
| _ -> “error” |> should throw typeof<System.Exception >
| Failure(err, _, _) -> err |> should throw typeof<System. Exception>
[<TestMethod>]
member x.“Additive constraint“() =
let parser = new BarModelParser()
parser.parse “box[1, 1]+box[2] – box[3, 2 to 7] = 210\n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [AdditiveConstraint(left, right)] ->
match left with
| Arithmetic(Arithmetic(Box(BoxIndex(bar1, box1)), op1, Box(EntireBar(bar2))), op2, Box(BoxRange(bar3, box31, box32))) ->
bar1 |> should equal 1
box1 |> should equal 1
op1 |> should equal Add
bar2 |> should equal 2
bar3 |> should equal 3
box31 |> should equal 2
box32 |> should equal 7
| _ -> “error” |> should throw typeof<System.Exception >
match right with
| Literal(value) -> value |> should equal 210.0
| _ -> “error” |> should throw typeof<System.Exception >
| _ -> “error” |> should throw typeof<System.Exception >
| Failure(err, _, _) -> err |> should throw typeof<System. Exception>
[<TestMethod>]
member x.“Ratio constraint“() =
let parser = new BarModelParser()
parser.parse “box[1, 1]:box[2] : box[3, 2 to 7] = 5 :4: 3\n” |> function
| Success(s, _, _) ->
s |> List.length |> should equal 1
match s with
| [RatioConstraint(boxes, ratios)] ->
List.length boxes |> should equal 3
List.length ratios |> should equal 3
| _ -> “error” |> should throw typeof<System.Exception >
| Failure(err, _, _) -> err |> should throw typeof<System. Exception>
Leave a comment