JSON Tutorials
Setup
Firstly, install the tbnf
command-line tool from
the release page and add it
to your PATH.
Create the TBNF grammar
Create the Json.tbnf
with the following content at the root of your project
(or any where you want):
// Json.tbnf
extern var toInt : str -> int
extern var toFlt : str -> float
extern var getStr : token -> str
extern var unesc : str -> str
extern var appendList : <a> (list<a>, a) -> list<a>
type Json
type JsonPair(name: str, value: Json)
case JInt : int -> Json
case JFlt : float -> Json
case JStr : str -> Json
case JNull : () -> Json
case JList : (elements: list<Json>) -> Json
case JDict : list<JsonPair> -> Json
case JBool : bool -> Json
ignore space
digit = [0-9] ;
start : json { $1 }
int = digit+ ;
float = digit* "." int ;
str = "\"" ( "\\" _ | ! "\"" )* "\"" ;
space = ("\t" | "\n" | "\r" | " ")+;
seplist(sep, elt) : elt { [$1] }
| seplist(sep, elt) sep elt
{ appendList($1, $3) }
jsonpair : <str> ":" json { JsonPair(unesc(getStr($1)), $3) }
json : <int> { JInt(toInt(getStr($1))) }
| <float> { JFlt(toFlt(getStr($1))) }
| "null" { JNull() }
| <str> { JStr(unesc(getStr($1))) }
| "[" "]" { JList([]) }
| "{" "}" { JDict([]) }
| "true" { JBool(true) }
| "false" { JBool(false) }
| "[" seplist(",", json) "]" { JList($2) }
| "{" seplist(",", jsonpair) "}" { JDict($2) }
Define the type mapper
TBNF has several builtin types:
int
float
str
bool
list<a>
- tuples, e.g.:
int * str
is a tuple of anint
and astr
int * str * bool
is a triple of anint
, astr
and abool
To associate a type in the target language with all TBNF builtin types, you may define how types map to the target language with the tbnf.config.js
file:
-
Create
tbnf.config.js
file at the your desired output directory. -
Define the type mapper in the file:
// file: Grammar/tbnf.config.cs
"use strict";
function rename_type(x) {
if (x == "str") return "string"
if (x == "token") return "IToken"
if (x == "float") return "double"
if (["int", "bool"].includes(x)) return x
if ('list' == x) return 'System.Collections.Generic.List'
if (x == "params") return "Parameters"
return x
}
function rename_var(x) {
if (x == 'params') return x + "v"
return x
}
function rename_field(x) {
if (x == 'params') return "parameters"
return x
}
module.exports = { rename_type, rename_var, rename_field }
// file: src/grammar/tbnf.config.js
"use strict";
function rename_type(x) {
if (x == "list") return "Array";
if (x == "int") return "number";
if (x == "float") return "number";
if (x == "str") return "string";
if (x == "bool") return "boolean";
if (x == "token") return "antlr.Token";
return x + "_t";
}
module.exports = {
rename_type
};
Tip
tbnf.config.cs
files used above is suitable for common use cases.
Generate the parser
Note
When targeting C#, you might be aware of the following points:
- Do not use
-lang Json
because it will make conflicts between theJson
type andJson
namespace! - Assure
tbnf
's-lang
option is aligned with antlr4's-package
option!
# install dependencies
dotnet add package Antlr4.Runtime.Standard
# create the grammar directory
tbnf Json.tbnf -o Grammar/ -lang JsonCS --backend csharp-antlr
antlr4 -Dlanguage=CSharp Grammar/JsonCS.g4 -o Grammar/ -package JsonCS
# install dependencies
pnpm add antlr4ng --save
pnpm add -g antlr-ng
# create the parser
tbnf Json.tbnf -o src/grammar -lang Json -be typescript-antlr -ae tagged-union
antlr-ng -Dlanguage=TypeScript src/grammar/Json.g4 -o src/grammar
Implement the interfaces
In Json.tbnf
, we see some extern
declarations:
extern var parseInt : str -> int
extern var parseFlt : str -> float
extern var getStr : token -> str
extern var unesc : str -> str
extern var appendList : <a> (list<a>, a) -> list<a>
These declarations are required by the grammar to achieve semantic actions, but they are not provided by the generated parser itself.
We need to implement these functions in
<outDir>/<YourLanguage>Require.<suffix>
file, where:
-
<YourLanguage>
the language name specified with
-lang/--language
option. -
<outDir>
the output directory specified with
-o/--outDir
option. -
<suffix>
the suffix is decided by the backend you've chosen with
-be/--backend
option.
We need to implement these functions in Grammar/JsonCSRequire.cs
file.
namespace JsonCS;
using System.Linq;
using System.Collections.Generic;
using Antlr4.Runtime;
public partial class JsonCSParser
{
public static int toInt(string s)
{
return int.Parse(s);
}
public static double toFlt(string s)
{
return double.Parse(s);
}
public static string getStr(IToken s)
{
return s.Text ?? "";
}
public static List<T> appendList<T>(List<T> x, T y)
{
// for modern .NET
// return [...x, y];
return x.Append(y).ToList();
}
public static string unesc(string s)
{
var r = new System.Text.StringBuilder();
int i = 1;
var n = s.Length - 1;
while (i < n)
{
if (s[i] == '\\')
{
i++;
switch (s[i])
{
case 'b':
r.Append('\b');
break;
case 'f':
r.Append('\f');
break;
case 'n':
r.Append('\n');
break;
case 'r':
r.Append('\r');
break;
case 't':
r.Append('\t');
break;
case '\\':
r.Append('\\');
break;
case '"':
r.Append('"');
break;
case '\'':
r.Append('\'');
break;
default:
r.Append(s[i]);
break;
}
}
else
{
r.Append(s[i]);
}
i++;
}
return r.ToString();
}
}
We need to implement these functions in src/grammar/JsonRequire.ts
file.
import { Token } from "antlr4ng";
export const toInt = parseInt;
export const toFlt = parseFloat;
export const getStr = (x: Token) => x.text ?? "";
export function appendList<T>(x: T[], y: T)
{
return [...x, y];
}
// we should export these for the generated parser to use
export * from './JsonConstructor';
// JSON.parse is also a valid implementation,
// but we might not use an existing JSON parser to implement the current JSON parser...
export function unesc(s: string)
{
let r = "";
let i = 1;
let n = s.length - 1;
while (i < n)
{
if (s[i] == '\\')
{
i++;
switch (s[i])
{
case 'b':
r += '\b';
break;
case 't':
r += '\t';
break;
case 'n':
r += '\n';
break;
case 'f':
r += '\f';
break;
case 'r':
r += '\r';
break;
case '\\':
r += '\\';
break;
case '\"':
r += '\"';
break;
case '\'':
r += '\'';
break;
default:
r += s[i];
break;
}
}
else
{
r += s[i];
}
i++;
}
return r;
}
Create the final parser
We finally create the Program.cs
file to export the parser and handle parse exceptions:
// Program.cs
namespace JsonCS;
using Antlr4.Runtime;
sealed class SyntaxError : System.Exception
{
public int Line { get; }
public int Column { get; }
public SyntaxError(string message, int line, int column) : base(message)
{
Line = line;
Column = column;
}
}
class ExcErrorListener : BaseErrorListener
{
public override void SyntaxError(TextWriter output, IRecognizer recognizer, IToken offendingSymbol, int line, int charPositionInLine, string msg, RecognitionException e)
{
throw new SyntaxError(
"Syntax error at line " + line + ":" + charPositionInLine + ": " + msg,
line,
charPositionInLine
);
}
}
class LexerErrorListener : IAntlrErrorListener<int>
{
public void SyntaxError(TextWriter output, IRecognizer recognizer, int offendingSymbol, int line, int charPositionInLine, string msg, RecognitionException e)
{
throw new SyntaxError(msg, line, charPositionInLine);
}
}
public partial class JsonCSParser
{
public static Json Parse(string s)
{
ICharStream stream = CharStreams.fromString(s);
var lexer = new JsonCSLexer(stream);
lexer.RemoveErrorListeners();
lexer.AddErrorListener(new LexerErrorListener());
ITokenStream tokens = new CommonTokenStream(lexer);
var parser = new JsonCSParser(tokens);
parser.RemoveErrorListeners();
parser.AddErrorListener(new ExcErrorListener());
parser.BuildParseTree = false;
var result = parser.start().result;
return result;
}
}
public static class Program
{
public static void Main(string[] args)
{
var json = JsonCSParser.Parse("{\"name\": \"John\", \"age\": 30, \"city\": \"New York\"}");
Console.WriteLine(json);
}
}
You can then compile/test the package:
> dotnet run
Output:
JDict { value = System.Collections.Generic.List`1[JsonCS.JsonPair] }
We finally create the src/index.ts
file to export the parser and handle parse exceptions:
// src/index.ts
import * as antlr from "antlr4ng";
import { JsonParser } from "./grammar/JsonParser";
import { JsonLexer } from "./grammar/JsonLexer";
import { CommonTokenStream } from "antlr4ng";
class ExcErrorListener extends antlr.BaseErrorListener {
syntaxError(recognizer: antlr.Recognizer<any>, offendingSymbol: any, line: number, charPositionInLine: number, msg: string, e: antlr.RecognitionException | null) {
throw new SyntaxError("Syntax error at line " + line + ":" + charPositionInLine + ": " + msg);
}
}
export function parseJson(text: string) {
const m_InputStream = antlr.CharStream.fromString(text);
const m_Lexer = new JsonLexer(m_InputStream);
m_Lexer.removeErrorListeners();
m_Lexer.addErrorListener(new ExcErrorListener());
const m_tokenStream = new CommonTokenStream(m_Lexer);
const m_Parser = new JsonParser(m_tokenStream);
m_Parser.removeErrorListeners();
m_Parser.addErrorListener(new ExcErrorListener());
m_Parser.buildParseTrees = false;
var start = m_Parser.start();
return start.result;
}
// test the parser
console.log(parseJson(`{"name": "John", "age": 30, "city": "New York"}`));
You can then compile/test the package:
tsc -p . -outDir dist
node dist/index.js
Output:
{
'$type': 'JDict',
value: [
{ name: 'name', value: [Object] },
{ name: 'age', value: [Object] },
{ name: 'city', value: [Object] }
]
}