Try with logos and prattle.

This commit is contained in:
James Harton 2019-02-08 18:49:15 +13:00
parent d786485269
commit 4fd67d9a16
6 changed files with 318 additions and 42 deletions

View file

@ -1,6 +1,6 @@
MIT License MIT License
Copyright (c) 2018 James Harton Copyright (c) 2019 James Harton
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

View file

@ -3,7 +3,7 @@ use logos::{Lexer, Logos};
use prattle; use prattle;
#[derive(Logos, Debug, PartialEq)] #[derive(Logos, Debug, PartialEq)]
pub enum Token { pub enum LanguageToken {
#[end] #[end]
End, End,
@ -16,14 +16,30 @@ pub enum Token {
Assign, Assign,
#[regex = ":[a-zA-Z_]+"] #[regex = ":[a-zA-Z_]+"]
Atom, Atom,
#[token = "true"]
BooleanTrue,
#[token = "false"]
BooleanFalse,
#[token = ":"]
Colon,
#[token = ","]
Comma,
#[regex = "[A-Z][a-zA-Z0-9_]+"]
Constant,
#[token = "/"] #[token = "/"]
Div, Div,
#[regex = "[0-9]*\\.[0-9]+([eE][+-]?[0-9]+)?|[0-9]+[eE][+-]?[0-9]+"] #[regex = "[0-9]*\\.[0-9]+([eE][+-]?[0-9]+)?|[0-9]+[eE][+-]?[0-9]+"]
Float, Float,
#[regex = "0[bB][01][01_]*"]
IntegerBinary,
#[regex = "[1-9][0-9]*|0"] #[regex = "[1-9][0-9]*|0"]
Integer, IntegerDecimal,
#[regex = "0[xX][0-9a-fA-F][0-9a-fA-F_]*"]
IntegerHex,
#[regex = "0[oO][0-7][0-7_]*"]
IntegerOctal,
#[regex = "[a-z_][a-zA-Z0-9_]*"] #[regex = "[a-z_][a-zA-Z0-9_]*"]
Local, Ident,
#[token = "&&"] #[token = "&&"]
LogicalAnd, LogicalAnd,
#[token = "||"] #[token = "||"]
@ -32,43 +48,82 @@ pub enum Token {
Mod, Mod,
#[token = "*"] #[token = "*"]
Mult, Mult,
#[token = "("]
ParensLeft,
#[token = ")"]
ParensRight,
#[token = "."]
Period,
#[regex = r#""([^"\\]|\\t|\\u|\\n|\\")*""#]
String,
#[token = "-"] #[token = "-"]
Sub, Sub,
} }
pub struct HuiaLexer<'a> { pub struct HuiaLexer<'a> {
lexer: Lexer<Token, &'a str>, lexer: Lexer<LanguageToken, &'a str>,
next: Option<IRToken>, next: Option<IRToken>,
prev: Option<IRToken>, prev: Option<IRToken>,
current: Option<IRToken>, current: Option<IRToken>,
} }
fn lexer_to_token<'a>(lexer: &mut Lexer<Token, &'a str>) -> Option<IRToken> { fn lexer_to_token<'a>(lexer: &mut Lexer<LanguageToken, &'a str>) -> Option<IRToken> {
match lexer.token { match lexer.token {
Token::Add => Some(IRToken::Add), LanguageToken::Add => Some(IRToken::Add),
Token::Assign => Some(IRToken::Assign), LanguageToken::Assign => Some(IRToken::Assign),
Token::Atom => { LanguageToken::Atom => {
let slice = lexer.slice(); let slice = lexer.slice();
let len = slice.len(); let len = slice.len();
Some(IRToken::Atom(slice.get(1..len).unwrap().to_string())) Some(IRToken::Atom(slice.get(1..len).unwrap().to_string()))
} }
Token::Div => Some(IRToken::Div), LanguageToken::BooleanFalse => Some(IRToken::Atom("true".to_string())),
Token::Float => Some(IRToken::Float(lexer.slice().to_string())), LanguageToken::Colon => Some(IRToken::Colon),
Token::Local => Some(IRToken::Local(lexer.slice().to_string())), LanguageToken::Comma => Some(IRToken::Comma),
Token::Integer => Some(IRToken::Integer(lexer.slice().to_string())), LanguageToken::Constant => Some(IRToken::Constant(lexer.slice().to_string())),
Token::LogicalAnd => Some(IRToken::LogicalAnd), LanguageToken::Div => Some(IRToken::Div),
Token::LogicalOr => Some(IRToken::LogicalOr), LanguageToken::Float => Some(IRToken::Float(lexer.slice().to_string())),
Token::Mod => Some(IRToken::Mod), LanguageToken::Ident => Some(IRToken::Ident(lexer.slice().to_string())),
Token::Mult => Some(IRToken::Mult), LanguageToken::IntegerBinary => {
Token::Sub => Some(IRToken::Sub), let slice = lexer.slice();
Token::End => None, let len = slice.len();
Some(IRToken::IntegerBinary(
slice.get(2..len).unwrap().to_string(),
))
}
LanguageToken::IntegerDecimal => Some(IRToken::IntegerDecimal(lexer.slice().to_string())),
LanguageToken::IntegerHex => {
let slice = lexer.slice();
let len = slice.len();
Some(IRToken::IntegerHex(slice.get(2..len).unwrap().to_string()))
}
LanguageToken::IntegerOctal => {
let slice = lexer.slice();
let len = slice.len();
Some(IRToken::IntegerOctal(
slice.get(2..len).unwrap().to_string(),
))
}
LanguageToken::LogicalAnd => Some(IRToken::LogicalAnd),
LanguageToken::LogicalOr => Some(IRToken::LogicalOr),
LanguageToken::Mod => Some(IRToken::Mod),
LanguageToken::Mult => Some(IRToken::Mult),
LanguageToken::ParensLeft => Some(IRToken::ParensLeft),
LanguageToken::ParensRight => Some(IRToken::ParensRight),
LanguageToken::Period => Some(IRToken::Period),
LanguageToken::Sub => Some(IRToken::Sub),
LanguageToken::String => {
let slice = lexer.slice();
let len = slice.len();
Some(IRToken::String(slice.get(1..len - 1).unwrap().to_string()))
}
LanguageToken::End => None,
_ => unreachable!("Unexpected token {:?}", lexer.token), _ => unreachable!("Unexpected token {:?}", lexer.token),
} }
} }
impl<'a> HuiaLexer<'a> { impl<'a> HuiaLexer<'a> {
pub fn new(source: &'a str) -> Self { pub fn new(source: &'a str) -> Self {
let mut lex = Token::lexer(source); let mut lex = LanguageToken::lexer(source);
let next = lexer_to_token(&mut lex); let next = lexer_to_token(&mut lex);
@ -104,7 +159,7 @@ impl<'a> Into<Vec<IRToken>> for HuiaLexer<'a> {
let mut lexer = self.lexer; let mut lexer = self.lexer;
let mut result = Vec::new(); let mut result = Vec::new();
while lexer.token != Token::End { while lexer.token != LanguageToken::End {
result.push(lexer_to_token(&mut lexer).unwrap()); result.push(lexer_to_token(&mut lexer).unwrap());
lexer.advance(); lexer.advance();
} }
@ -119,28 +174,47 @@ mod test {
#[test] #[test]
fn test_basic_lex() { fn test_basic_lex() {
let mut lexer = Token::lexer("a = 1.23 + 2.34"); let mut lexer = LanguageToken::lexer("a = 1.23 + 2.34");
assert_eq!(lexer.token, Token::Local); assert_eq!(lexer.token, LanguageToken::Ident);
assert_eq!(lexer.slice(), "a"); assert_eq!(lexer.slice(), "a");
lexer.advance(); lexer.advance();
assert_eq!(lexer.token, Token::Assign); assert_eq!(lexer.token, LanguageToken::Assign);
assert_eq!(lexer.slice(), "="); assert_eq!(lexer.slice(), "=");
lexer.advance(); lexer.advance();
assert_eq!(lexer.token, Token::Float); assert_eq!(lexer.token, LanguageToken::Float);
assert_eq!(lexer.slice(), "1.23"); assert_eq!(lexer.slice(), "1.23");
lexer.advance(); lexer.advance();
assert_eq!(lexer.token, Token::Add); assert_eq!(lexer.token, LanguageToken::Add);
assert_eq!(lexer.slice(), "+"); assert_eq!(lexer.slice(), "+");
lexer.advance(); lexer.advance();
assert_eq!(lexer.token, Token::Float); assert_eq!(lexer.token, LanguageToken::Float);
assert_eq!(lexer.slice(), "2.34"); assert_eq!(lexer.slice(), "2.34");
lexer.advance(); lexer.advance();
} }
#[test]
fn test_wat() {
let mut lexer = HuiaLexer::new(
"
type Greeter do
def hello(whom: String) do
\"Hello %s!\".format(whom)
end
def hello do
\"Hello World!\"
end
end
",
);
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, []);
}
#[test] #[test]
fn test_converts_into_irtokens() { fn test_converts_into_irtokens() {
let lexer = HuiaLexer::new("a = 1.23 + 2.34"); let lexer = HuiaLexer::new("a = 1.23 + 2.34");
@ -149,7 +223,7 @@ mod test {
assert_eq!( assert_eq!(
result, result,
[ [
IRToken::Local("a".to_string()), IRToken::Ident("a".to_string()),
IRToken::Assign, IRToken::Assign,
IRToken::Float("1.23".to_string()), IRToken::Float("1.23".to_string()),
IRToken::Add, IRToken::Add,
@ -179,6 +253,27 @@ mod test {
assert_eq!(result, [IRToken::Atom("marty".to_string())]); assert_eq!(result, [IRToken::Atom("marty".to_string())]);
} }
#[test]
fn test_colon() {
let lexer = HuiaLexer::new(":");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Colon]);
}
#[test]
fn test_comma() {
let lexer = HuiaLexer::new(",");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Comma]);
}
#[test]
fn test_constant() {
let lexer = HuiaLexer::new("Marty");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Constant("Marty".to_string())]);
}
#[test] #[test]
fn test_div() { fn test_div() {
let lexer = HuiaLexer::new("/"); let lexer = HuiaLexer::new("/");
@ -194,17 +289,41 @@ mod test {
} }
#[test] #[test]
fn test_integer() { fn test_integer_binary() {
let lexer = HuiaLexer::new("123"); let lexer = HuiaLexer::new("0b101_01");
let result: Vec<IRToken> = lexer.into(); let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Integer("123".to_string())]); assert_eq!(result, [IRToken::IntegerBinary("101_01".to_string())]);
} }
#[test] #[test]
fn test_local() { fn test_integer_decimal() {
let lexer = HuiaLexer::new("123");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::IntegerDecimal("123".to_string())]);
}
#[test]
fn test_integer_hex() {
let lexer = HuiaLexer::new("0Xfedcba987654_3210");
let result: Vec<IRToken> = lexer.into();
assert_eq!(
result,
[IRToken::IntegerHex("fedcba987654_3210".to_string())]
);
}
#[test]
fn test_integer_octal() {
let lexer = HuiaLexer::new("0o7654_3210");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::IntegerOctal("7654_3210".to_string())]);
}
#[test]
fn test_ident() {
let lexer = HuiaLexer::new("marty"); let lexer = HuiaLexer::new("marty");
let result: Vec<IRToken> = lexer.into(); let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Local("marty".to_string())]); assert_eq!(result, [IRToken::Ident("marty".to_string())]);
} }
#[test] #[test]
@ -235,6 +354,27 @@ mod test {
assert_eq!(result, [IRToken::Mult]); assert_eq!(result, [IRToken::Mult]);
} }
#[test]
fn test_parens_left() {
let lexer = HuiaLexer::new("(");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::ParensLeft]);
}
#[test]
fn test_parens_right() {
let lexer = HuiaLexer::new(")");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::ParensRight]);
}
#[test]
fn test_string() {
let lexer = HuiaLexer::new("\"Marty McFly\"");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::String("Marty McFly".to_string())]);
}
#[test] #[test]
fn test_sub() { fn test_sub() {
let lexer = HuiaLexer::new("-"); let lexer = HuiaLexer::new("-");

View file

@ -6,6 +6,7 @@ extern crate lazy_static;
mod lexer; mod lexer;
mod parser; mod parser;
mod term;
mod token; mod token;
// pub use parser::parser_spec; // pub use parser::parser_spec;

View file

@ -11,18 +11,36 @@ lazy_static! {
// Literals have the lowest precedence. // Literals have the lowest precedence.
spec.add_null_associations( spec.add_null_associations(
vec![ vec![
IRToken::Atom("".to_string()),
IRToken::Constant("".to_string()),
IRToken::Float("".to_string()), IRToken::Float("".to_string()),
IRToken::Local("".to_string()), IRToken::Ident("".to_string()),
IRToken::Integer("".to_string()), IRToken::IntegerBinary("".to_string()),
IRToken::IntegerDecimal("".to_string()),
IRToken::IntegerHex("".to_string()),
IRToken::IntegerOctal("".to_string()),
IRToken::String("".to_string())
], ],
PrecedenceLevel::Root, PrecedenceLevel::Root,
|_, token: IRToken, _| Ok(Node::Simple(token.clone())), |_, token: IRToken, _| Ok(Node::Simple(token.clone())),
).unwrap(); ).unwrap();
// Colon
spec.add_left_assoc(
IRToken::Colon,
PrecedenceLevel::First,
|parser, token, lbp, node| {
Ok(Node::Composite {
token: token.clone(),
children: vec![node, parser.parse_expr(lbp)?],
})
},
).unwrap();
// Assignment // Assignment
spec.add_left_assoc( spec.add_left_assoc(
IRToken::Assign, IRToken::Assign,
PrecedenceLevel::First, PrecedenceLevel::Second,
|parser, token, lbp, node| { |parser, token, lbp, node| {
Ok(Node::Composite { Ok(Node::Composite {
token: token.clone(), token: token.clone(),
@ -34,7 +52,7 @@ lazy_static! {
// LogicalOr // LogicalOr
spec.add_left_assoc( spec.add_left_assoc(
IRToken::LogicalOr, IRToken::LogicalOr,
PrecedenceLevel::Second, PrecedenceLevel::Third,
|parser, token, lbp, node| { |parser, token, lbp, node| {
Ok(Node::Composite { Ok(Node::Composite {
token: token.clone(), token: token.clone(),
@ -46,7 +64,7 @@ lazy_static! {
// LogicalAnd // LogicalAnd
spec.add_left_assoc( spec.add_left_assoc(
IRToken::LogicalAnd, IRToken::LogicalAnd,
PrecedenceLevel::Third, PrecedenceLevel::Fourth,
|parser, token, lbp, node| { |parser, token, lbp, node| {
Ok(Node::Composite { Ok(Node::Composite {
token: token.clone(), token: token.clone(),
@ -58,7 +76,7 @@ lazy_static! {
// Add, Sub // Add, Sub
spec.add_left_associations( spec.add_left_associations(
vec![IRToken::Add, IRToken::Sub], vec![IRToken::Add, IRToken::Sub],
PrecedenceLevel::Fourth, PrecedenceLevel::Fifth,
|parser, token, lbp, node| { |parser, token, lbp, node| {
Ok(Node::Composite { Ok(Node::Composite {
token: token.clone(), token: token.clone(),
@ -70,7 +88,7 @@ lazy_static! {
// Div, Mod, Mult // Div, Mod, Mult
spec.add_left_associations( spec.add_left_associations(
vec![IRToken::Div, IRToken::Mod, IRToken::Mult], vec![IRToken::Div, IRToken::Mod, IRToken::Mult],
PrecedenceLevel::Fifth, PrecedenceLevel::Sixth,
|parser, token, lbp, node| { |parser, token, lbp, node| {
Ok(Node::Composite { Ok(Node::Composite {
token: token.clone(), token: token.clone(),
@ -79,6 +97,32 @@ lazy_static! {
}, },
).unwrap(); ).unwrap();
// Object indexing
spec.add_left_associations(
vec![IRToken::Period],
PrecedenceLevel::Seventh,
|parser, token, lbp, node| {
Ok(Node::Composite {
token: token.clone(),
children: vec![node, parser.parse_expr(lbp)?]
})
}
).unwrap();
// Method call
spec.add_left_associations(
vec![IRToken::ParensLeft],
PrecedenceLevel::Eighth,
|parser, token, lbp, node| {
let res = parser.parse_expr(lbp)?;
parser.consume(IRToken::ParensRight)?;
Ok(Node::Composite {
token: token.clone(),
children: vec![node, res]
})
}
).unwrap();
spec spec
}; };
} }
@ -107,4 +151,15 @@ mod test {
assert!(parser.parse().is_ok()); assert!(parser.parse().is_ok());
} }
#[test]
fn test_call() {
let tokens = HuiaLexer::new("\"Hello %s\".format(whom)");
let mut parser = HuiaParser::new(tokens);
let nodes = parser.parse().unwrap();
print!("{:?}", nodes);
assert!(false);
// assert_eq!(nodes, "wat");
}
} }

68
huia-irparser/src/term.rs Normal file
View file

@ -0,0 +1,68 @@
use std::string;
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub enum Term {
Atom(Atom),
Constant(Constant),
Float(Float),
Integer(Integer),
Ident(Ident),
String(String),
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub struct Atom(string::String);
impl Atom {
fn new(value: &str) -> Self {
Atom(value.to_string())
}
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub struct Constant(string::String);
impl Constant {
fn new(value: &str) -> Self {
Constant(value.to_string())
}
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub struct Float(f64);
impl Float {
fn new(value: &str) -> Self {
Float(value.parse().unwrap())
}
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub struct Integer(i64, usize);
impl Integer {
fn new(value: &str, radix: usize) -> Self {
match i64::from_str_radix(value, radix as u32) {
Ok(i) => Integer(i, radix),
Err(_) => unreachable!("Unable to parse integer {:?} radix {:?}", value, radix)
}
}
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub struct Ident(string::String);
impl Ident {
fn new(value: &str) -> Self {
Ident(value.to_string())
}
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub struct String(string::String);
impl String {
fn new(value: &str) -> Self {
String(value.to_string())
}
}

View file

@ -5,14 +5,26 @@ pub enum IRToken {
Add, Add,
Assign, Assign,
Atom(String), Atom(String),
Colon,
Comma,
Constant(String),
Div, Div,
Float(String), Float(String),
Integer(String), Ident(String),
Local(String), IntegerBinary(String),
IntegerDecimal(String),
IntegerHex(String),
IntegerOctal(String),
InterpolateStart,
InterpolateEnd,
LogicalAnd, LogicalAnd,
LogicalOr, LogicalOr,
Mod, Mod,
Mult, Mult,
ParensLeft,
ParensRight,
Period,
String(String),
Sub, Sub,
} }