From 4fd67d9a1652084f4260bbf408899d5c841af5a0 Mon Sep 17 00:00:00 2001 From: James Harton Date: Fri, 8 Feb 2019 18:49:15 +1300 Subject: [PATCH] Try with logos and prattle. --- LICENSE | 2 +- huia-irparser/src/lexer.rs | 204 ++++++++++++++++++++++++++++++------ huia-irparser/src/lib.rs | 1 + huia-irparser/src/parser.rs | 69 ++++++++++-- huia-irparser/src/term.rs | 68 ++++++++++++ huia-irparser/src/token.rs | 16 ++- 6 files changed, 318 insertions(+), 42 deletions(-) create mode 100644 huia-irparser/src/term.rs diff --git a/LICENSE b/LICENSE index 0c15df0..1d8321e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2018 James Harton +Copyright (c) 2019 James Harton Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/huia-irparser/src/lexer.rs b/huia-irparser/src/lexer.rs index 871acb7..b66e937 100644 --- a/huia-irparser/src/lexer.rs +++ b/huia-irparser/src/lexer.rs @@ -3,7 +3,7 @@ use logos::{Lexer, Logos}; use prattle; #[derive(Logos, Debug, PartialEq)] -pub enum Token { +pub enum LanguageToken { #[end] End, @@ -16,14 +16,30 @@ pub enum Token { Assign, #[regex = ":[a-zA-Z_]+"] Atom, + #[token = "true"] + BooleanTrue, + #[token = "false"] + BooleanFalse, + #[token = ":"] + Colon, + #[token = ","] + Comma, + #[regex = "[A-Z][a-zA-Z0-9_]+"] + Constant, #[token = "/"] Div, #[regex = "[0-9]*\\.[0-9]+([eE][+-]?[0-9]+)?|[0-9]+[eE][+-]?[0-9]+"] Float, + #[regex = "0[bB][01][01_]*"] + IntegerBinary, #[regex = "[1-9][0-9]*|0"] - Integer, + IntegerDecimal, + #[regex = "0[xX][0-9a-fA-F][0-9a-fA-F_]*"] + IntegerHex, + #[regex = "0[oO][0-7][0-7_]*"] + IntegerOctal, #[regex = "[a-z_][a-zA-Z0-9_]*"] - Local, + Ident, #[token = "&&"] LogicalAnd, #[token = "||"] @@ -32,43 +48,82 @@ pub enum Token { Mod, #[token = "*"] Mult, + #[token = "("] + ParensLeft, + #[token = ")"] + ParensRight, + #[token = "."] + Period, + #[regex = r#""([^"\\]|\\t|\\u|\\n|\\")*""#] + String, #[token = "-"] Sub, } pub struct HuiaLexer<'a> { - lexer: Lexer, + lexer: Lexer, next: Option, prev: Option, current: Option, } -fn lexer_to_token<'a>(lexer: &mut Lexer) -> Option { +fn lexer_to_token<'a>(lexer: &mut Lexer) -> Option { match lexer.token { - Token::Add => Some(IRToken::Add), - Token::Assign => Some(IRToken::Assign), - Token::Atom => { + LanguageToken::Add => Some(IRToken::Add), + LanguageToken::Assign => Some(IRToken::Assign), + LanguageToken::Atom => { let slice = lexer.slice(); let len = slice.len(); Some(IRToken::Atom(slice.get(1..len).unwrap().to_string())) } - Token::Div => Some(IRToken::Div), - Token::Float => Some(IRToken::Float(lexer.slice().to_string())), - Token::Local => Some(IRToken::Local(lexer.slice().to_string())), - Token::Integer => Some(IRToken::Integer(lexer.slice().to_string())), - Token::LogicalAnd => Some(IRToken::LogicalAnd), - Token::LogicalOr => Some(IRToken::LogicalOr), - Token::Mod => Some(IRToken::Mod), - Token::Mult => Some(IRToken::Mult), - Token::Sub => Some(IRToken::Sub), - Token::End => None, + LanguageToken::BooleanFalse => Some(IRToken::Atom("true".to_string())), + LanguageToken::Colon => Some(IRToken::Colon), + LanguageToken::Comma => Some(IRToken::Comma), + LanguageToken::Constant => Some(IRToken::Constant(lexer.slice().to_string())), + LanguageToken::Div => Some(IRToken::Div), + LanguageToken::Float => Some(IRToken::Float(lexer.slice().to_string())), + LanguageToken::Ident => Some(IRToken::Ident(lexer.slice().to_string())), + LanguageToken::IntegerBinary => { + let slice = lexer.slice(); + let len = slice.len(); + Some(IRToken::IntegerBinary( + slice.get(2..len).unwrap().to_string(), + )) + } + LanguageToken::IntegerDecimal => Some(IRToken::IntegerDecimal(lexer.slice().to_string())), + LanguageToken::IntegerHex => { + let slice = lexer.slice(); + let len = slice.len(); + Some(IRToken::IntegerHex(slice.get(2..len).unwrap().to_string())) + } + LanguageToken::IntegerOctal => { + let slice = lexer.slice(); + let len = slice.len(); + Some(IRToken::IntegerOctal( + slice.get(2..len).unwrap().to_string(), + )) + } + LanguageToken::LogicalAnd => Some(IRToken::LogicalAnd), + LanguageToken::LogicalOr => Some(IRToken::LogicalOr), + LanguageToken::Mod => Some(IRToken::Mod), + LanguageToken::Mult => Some(IRToken::Mult), + LanguageToken::ParensLeft => Some(IRToken::ParensLeft), + LanguageToken::ParensRight => Some(IRToken::ParensRight), + LanguageToken::Period => Some(IRToken::Period), + LanguageToken::Sub => Some(IRToken::Sub), + LanguageToken::String => { + let slice = lexer.slice(); + let len = slice.len(); + Some(IRToken::String(slice.get(1..len - 1).unwrap().to_string())) + } + LanguageToken::End => None, _ => unreachable!("Unexpected token {:?}", lexer.token), } } impl<'a> HuiaLexer<'a> { pub fn new(source: &'a str) -> Self { - let mut lex = Token::lexer(source); + let mut lex = LanguageToken::lexer(source); let next = lexer_to_token(&mut lex); @@ -104,7 +159,7 @@ impl<'a> Into> for HuiaLexer<'a> { let mut lexer = self.lexer; let mut result = Vec::new(); - while lexer.token != Token::End { + while lexer.token != LanguageToken::End { result.push(lexer_to_token(&mut lexer).unwrap()); lexer.advance(); } @@ -119,28 +174,47 @@ mod test { #[test] fn test_basic_lex() { - let mut lexer = Token::lexer("a = 1.23 + 2.34"); - assert_eq!(lexer.token, Token::Local); + let mut lexer = LanguageToken::lexer("a = 1.23 + 2.34"); + assert_eq!(lexer.token, LanguageToken::Ident); assert_eq!(lexer.slice(), "a"); lexer.advance(); - assert_eq!(lexer.token, Token::Assign); + assert_eq!(lexer.token, LanguageToken::Assign); assert_eq!(lexer.slice(), "="); lexer.advance(); - assert_eq!(lexer.token, Token::Float); + assert_eq!(lexer.token, LanguageToken::Float); assert_eq!(lexer.slice(), "1.23"); lexer.advance(); - assert_eq!(lexer.token, Token::Add); + assert_eq!(lexer.token, LanguageToken::Add); assert_eq!(lexer.slice(), "+"); lexer.advance(); - assert_eq!(lexer.token, Token::Float); + assert_eq!(lexer.token, LanguageToken::Float); assert_eq!(lexer.slice(), "2.34"); lexer.advance(); } + #[test] + fn test_wat() { + let mut lexer = HuiaLexer::new( + " + type Greeter do + def hello(whom: String) do + \"Hello %s!\".format(whom) + end + + def hello do + \"Hello World!\" + end + end + ", + ); + let result: Vec = lexer.into(); + assert_eq!(result, []); + } + #[test] fn test_converts_into_irtokens() { let lexer = HuiaLexer::new("a = 1.23 + 2.34"); @@ -149,7 +223,7 @@ mod test { assert_eq!( result, [ - IRToken::Local("a".to_string()), + IRToken::Ident("a".to_string()), IRToken::Assign, IRToken::Float("1.23".to_string()), IRToken::Add, @@ -179,6 +253,27 @@ mod test { assert_eq!(result, [IRToken::Atom("marty".to_string())]); } + #[test] + fn test_colon() { + let lexer = HuiaLexer::new(":"); + let result: Vec = lexer.into(); + assert_eq!(result, [IRToken::Colon]); + } + + #[test] + fn test_comma() { + let lexer = HuiaLexer::new(","); + let result: Vec = lexer.into(); + assert_eq!(result, [IRToken::Comma]); + } + + #[test] + fn test_constant() { + let lexer = HuiaLexer::new("Marty"); + let result: Vec = lexer.into(); + assert_eq!(result, [IRToken::Constant("Marty".to_string())]); + } + #[test] fn test_div() { let lexer = HuiaLexer::new("/"); @@ -194,17 +289,41 @@ mod test { } #[test] - fn test_integer() { - let lexer = HuiaLexer::new("123"); + fn test_integer_binary() { + let lexer = HuiaLexer::new("0b101_01"); let result: Vec = lexer.into(); - assert_eq!(result, [IRToken::Integer("123".to_string())]); + assert_eq!(result, [IRToken::IntegerBinary("101_01".to_string())]); } #[test] - fn test_local() { + fn test_integer_decimal() { + let lexer = HuiaLexer::new("123"); + let result: Vec = lexer.into(); + assert_eq!(result, [IRToken::IntegerDecimal("123".to_string())]); + } + + #[test] + fn test_integer_hex() { + let lexer = HuiaLexer::new("0Xfedcba987654_3210"); + let result: Vec = lexer.into(); + assert_eq!( + result, + [IRToken::IntegerHex("fedcba987654_3210".to_string())] + ); + } + + #[test] + fn test_integer_octal() { + let lexer = HuiaLexer::new("0o7654_3210"); + let result: Vec = lexer.into(); + assert_eq!(result, [IRToken::IntegerOctal("7654_3210".to_string())]); + } + + #[test] + fn test_ident() { let lexer = HuiaLexer::new("marty"); let result: Vec = lexer.into(); - assert_eq!(result, [IRToken::Local("marty".to_string())]); + assert_eq!(result, [IRToken::Ident("marty".to_string())]); } #[test] @@ -235,6 +354,27 @@ mod test { assert_eq!(result, [IRToken::Mult]); } + #[test] + fn test_parens_left() { + let lexer = HuiaLexer::new("("); + let result: Vec = lexer.into(); + assert_eq!(result, [IRToken::ParensLeft]); + } + + #[test] + fn test_parens_right() { + let lexer = HuiaLexer::new(")"); + let result: Vec = lexer.into(); + assert_eq!(result, [IRToken::ParensRight]); + } + + #[test] + fn test_string() { + let lexer = HuiaLexer::new("\"Marty McFly\""); + let result: Vec = lexer.into(); + assert_eq!(result, [IRToken::String("Marty McFly".to_string())]); + } + #[test] fn test_sub() { let lexer = HuiaLexer::new("-"); diff --git a/huia-irparser/src/lib.rs b/huia-irparser/src/lib.rs index a5f4eed..de4bd12 100644 --- a/huia-irparser/src/lib.rs +++ b/huia-irparser/src/lib.rs @@ -6,6 +6,7 @@ extern crate lazy_static; mod lexer; mod parser; +mod term; mod token; // pub use parser::parser_spec; diff --git a/huia-irparser/src/parser.rs b/huia-irparser/src/parser.rs index fd1723e..6c2e9c3 100644 --- a/huia-irparser/src/parser.rs +++ b/huia-irparser/src/parser.rs @@ -11,18 +11,36 @@ lazy_static! { // Literals have the lowest precedence. spec.add_null_associations( vec![ + IRToken::Atom("".to_string()), + IRToken::Constant("".to_string()), IRToken::Float("".to_string()), - IRToken::Local("".to_string()), - IRToken::Integer("".to_string()), + IRToken::Ident("".to_string()), + IRToken::IntegerBinary("".to_string()), + IRToken::IntegerDecimal("".to_string()), + IRToken::IntegerHex("".to_string()), + IRToken::IntegerOctal("".to_string()), + IRToken::String("".to_string()) ], PrecedenceLevel::Root, |_, token: IRToken, _| Ok(Node::Simple(token.clone())), ).unwrap(); + // Colon + spec.add_left_assoc( + IRToken::Colon, + PrecedenceLevel::First, + |parser, token, lbp, node| { + Ok(Node::Composite { + token: token.clone(), + children: vec![node, parser.parse_expr(lbp)?], + }) + }, + ).unwrap(); + // Assignment spec.add_left_assoc( IRToken::Assign, - PrecedenceLevel::First, + PrecedenceLevel::Second, |parser, token, lbp, node| { Ok(Node::Composite { token: token.clone(), @@ -34,7 +52,7 @@ lazy_static! { // LogicalOr spec.add_left_assoc( IRToken::LogicalOr, - PrecedenceLevel::Second, + PrecedenceLevel::Third, |parser, token, lbp, node| { Ok(Node::Composite { token: token.clone(), @@ -46,7 +64,7 @@ lazy_static! { // LogicalAnd spec.add_left_assoc( IRToken::LogicalAnd, - PrecedenceLevel::Third, + PrecedenceLevel::Fourth, |parser, token, lbp, node| { Ok(Node::Composite { token: token.clone(), @@ -58,7 +76,7 @@ lazy_static! { // Add, Sub spec.add_left_associations( vec![IRToken::Add, IRToken::Sub], - PrecedenceLevel::Fourth, + PrecedenceLevel::Fifth, |parser, token, lbp, node| { Ok(Node::Composite { token: token.clone(), @@ -70,7 +88,7 @@ lazy_static! { // Div, Mod, Mult spec.add_left_associations( vec![IRToken::Div, IRToken::Mod, IRToken::Mult], - PrecedenceLevel::Fifth, + PrecedenceLevel::Sixth, |parser, token, lbp, node| { Ok(Node::Composite { token: token.clone(), @@ -79,6 +97,32 @@ lazy_static! { }, ).unwrap(); + // Object indexing + spec.add_left_associations( + vec![IRToken::Period], + PrecedenceLevel::Seventh, + |parser, token, lbp, node| { + Ok(Node::Composite { + token: token.clone(), + children: vec![node, parser.parse_expr(lbp)?] + }) + } + ).unwrap(); + + // Method call + spec.add_left_associations( + vec![IRToken::ParensLeft], + PrecedenceLevel::Eighth, + |parser, token, lbp, node| { + let res = parser.parse_expr(lbp)?; + parser.consume(IRToken::ParensRight)?; + Ok(Node::Composite { + token: token.clone(), + children: vec![node, res] + }) + } + ).unwrap(); + spec }; } @@ -107,4 +151,15 @@ mod test { assert!(parser.parse().is_ok()); } + + #[test] + fn test_call() { + let tokens = HuiaLexer::new("\"Hello %s\".format(whom)"); + let mut parser = HuiaParser::new(tokens); + + let nodes = parser.parse().unwrap(); + print!("{:?}", nodes); + assert!(false); + // assert_eq!(nodes, "wat"); + } } diff --git a/huia-irparser/src/term.rs b/huia-irparser/src/term.rs new file mode 100644 index 0000000..ce2befd --- /dev/null +++ b/huia-irparser/src/term.rs @@ -0,0 +1,68 @@ +use std::string; + +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub enum Term { + Atom(Atom), + Constant(Constant), + Float(Float), + Integer(Integer), + Ident(Ident), + String(String), +} + +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub struct Atom(string::String); + +impl Atom { + fn new(value: &str) -> Self { + Atom(value.to_string()) + } +} + +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub struct Constant(string::String); + +impl Constant { + fn new(value: &str) -> Self { + Constant(value.to_string()) + } +} + +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub struct Float(f64); + +impl Float { + fn new(value: &str) -> Self { + Float(value.parse().unwrap()) + } +} + +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub struct Integer(i64, usize); + +impl Integer { + fn new(value: &str, radix: usize) -> Self { + match i64::from_str_radix(value, radix as u32) { + Ok(i) => Integer(i, radix), + Err(_) => unreachable!("Unable to parse integer {:?} radix {:?}", value, radix) + } + } +} + +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub struct Ident(string::String); + +impl Ident { + fn new(value: &str) -> Self { + Ident(value.to_string()) + } +} + +#[derive(Clone, Debug, PartialEq, PartialOrd)] +pub struct String(string::String); + +impl String { + fn new(value: &str) -> Self { + String(value.to_string()) + } +} diff --git a/huia-irparser/src/token.rs b/huia-irparser/src/token.rs index 160d999..158ab69 100644 --- a/huia-irparser/src/token.rs +++ b/huia-irparser/src/token.rs @@ -5,14 +5,26 @@ pub enum IRToken { Add, Assign, Atom(String), + Colon, + Comma, + Constant(String), Div, Float(String), - Integer(String), - Local(String), + Ident(String), + IntegerBinary(String), + IntegerDecimal(String), + IntegerHex(String), + IntegerOctal(String), + InterpolateStart, + InterpolateEnd, LogicalAnd, LogicalOr, Mod, Mult, + ParensLeft, + ParensRight, + Period, + String(String), Sub, }