Go back to using PEST because it's just easier.

This commit is contained in:
James Harton 2019-02-08 21:11:31 +13:00
parent 4fd67d9a16
commit c04c1618b6
22 changed files with 929 additions and 669 deletions

View file

@ -1,4 +1,4 @@
[workspace]
members = [
"huia-irparser",
"huia-parser",
]

View file

@ -1,384 +0,0 @@
use super::token::IRToken;
use logos::{Lexer, Logos};
use prattle;
#[derive(Logos, Debug, PartialEq)]
pub enum LanguageToken {
#[end]
End,
#[error]
Error,
#[token = "+"]
Add,
#[token = "="]
Assign,
#[regex = ":[a-zA-Z_]+"]
Atom,
#[token = "true"]
BooleanTrue,
#[token = "false"]
BooleanFalse,
#[token = ":"]
Colon,
#[token = ","]
Comma,
#[regex = "[A-Z][a-zA-Z0-9_]+"]
Constant,
#[token = "/"]
Div,
#[regex = "[0-9]*\\.[0-9]+([eE][+-]?[0-9]+)?|[0-9]+[eE][+-]?[0-9]+"]
Float,
#[regex = "0[bB][01][01_]*"]
IntegerBinary,
#[regex = "[1-9][0-9]*|0"]
IntegerDecimal,
#[regex = "0[xX][0-9a-fA-F][0-9a-fA-F_]*"]
IntegerHex,
#[regex = "0[oO][0-7][0-7_]*"]
IntegerOctal,
#[regex = "[a-z_][a-zA-Z0-9_]*"]
Ident,
#[token = "&&"]
LogicalAnd,
#[token = "||"]
LogicalOr,
#[token = "%"]
Mod,
#[token = "*"]
Mult,
#[token = "("]
ParensLeft,
#[token = ")"]
ParensRight,
#[token = "."]
Period,
#[regex = r#""([^"\\]|\\t|\\u|\\n|\\")*""#]
String,
#[token = "-"]
Sub,
}
pub struct HuiaLexer<'a> {
lexer: Lexer<LanguageToken, &'a str>,
next: Option<IRToken>,
prev: Option<IRToken>,
current: Option<IRToken>,
}
fn lexer_to_token<'a>(lexer: &mut Lexer<LanguageToken, &'a str>) -> Option<IRToken> {
match lexer.token {
LanguageToken::Add => Some(IRToken::Add),
LanguageToken::Assign => Some(IRToken::Assign),
LanguageToken::Atom => {
let slice = lexer.slice();
let len = slice.len();
Some(IRToken::Atom(slice.get(1..len).unwrap().to_string()))
}
LanguageToken::BooleanFalse => Some(IRToken::Atom("true".to_string())),
LanguageToken::Colon => Some(IRToken::Colon),
LanguageToken::Comma => Some(IRToken::Comma),
LanguageToken::Constant => Some(IRToken::Constant(lexer.slice().to_string())),
LanguageToken::Div => Some(IRToken::Div),
LanguageToken::Float => Some(IRToken::Float(lexer.slice().to_string())),
LanguageToken::Ident => Some(IRToken::Ident(lexer.slice().to_string())),
LanguageToken::IntegerBinary => {
let slice = lexer.slice();
let len = slice.len();
Some(IRToken::IntegerBinary(
slice.get(2..len).unwrap().to_string(),
))
}
LanguageToken::IntegerDecimal => Some(IRToken::IntegerDecimal(lexer.slice().to_string())),
LanguageToken::IntegerHex => {
let slice = lexer.slice();
let len = slice.len();
Some(IRToken::IntegerHex(slice.get(2..len).unwrap().to_string()))
}
LanguageToken::IntegerOctal => {
let slice = lexer.slice();
let len = slice.len();
Some(IRToken::IntegerOctal(
slice.get(2..len).unwrap().to_string(),
))
}
LanguageToken::LogicalAnd => Some(IRToken::LogicalAnd),
LanguageToken::LogicalOr => Some(IRToken::LogicalOr),
LanguageToken::Mod => Some(IRToken::Mod),
LanguageToken::Mult => Some(IRToken::Mult),
LanguageToken::ParensLeft => Some(IRToken::ParensLeft),
LanguageToken::ParensRight => Some(IRToken::ParensRight),
LanguageToken::Period => Some(IRToken::Period),
LanguageToken::Sub => Some(IRToken::Sub),
LanguageToken::String => {
let slice = lexer.slice();
let len = slice.len();
Some(IRToken::String(slice.get(1..len - 1).unwrap().to_string()))
}
LanguageToken::End => None,
_ => unreachable!("Unexpected token {:?}", lexer.token),
}
}
impl<'a> HuiaLexer<'a> {
pub fn new(source: &'a str) -> Self {
let mut lex = LanguageToken::lexer(source);
let next = lexer_to_token(&mut lex);
HuiaLexer {
lexer: lex,
next: next,
current: None,
prev: None,
}
}
}
impl<'a> prattle::lexer::Lexer<IRToken> for HuiaLexer<'a> {
fn peek(&self) -> Option<IRToken> {
self.next.clone()
}
fn next_token(&mut self) -> IRToken {
self.prev = self.current.clone();
self.current = self.next.clone();
self.lexer.advance();
self.next = lexer_to_token(&mut self.lexer);
self.current.clone().unwrap()
}
fn prev_token(&mut self) -> IRToken {
self.prev.clone().unwrap()
}
}
impl<'a> Into<Vec<IRToken>> for HuiaLexer<'a> {
fn into(self) -> Vec<IRToken> {
let mut lexer = self.lexer;
let mut result = Vec::new();
while lexer.token != LanguageToken::End {
result.push(lexer_to_token(&mut lexer).unwrap());
lexer.advance();
}
result
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_basic_lex() {
let mut lexer = LanguageToken::lexer("a = 1.23 + 2.34");
assert_eq!(lexer.token, LanguageToken::Ident);
assert_eq!(lexer.slice(), "a");
lexer.advance();
assert_eq!(lexer.token, LanguageToken::Assign);
assert_eq!(lexer.slice(), "=");
lexer.advance();
assert_eq!(lexer.token, LanguageToken::Float);
assert_eq!(lexer.slice(), "1.23");
lexer.advance();
assert_eq!(lexer.token, LanguageToken::Add);
assert_eq!(lexer.slice(), "+");
lexer.advance();
assert_eq!(lexer.token, LanguageToken::Float);
assert_eq!(lexer.slice(), "2.34");
lexer.advance();
}
#[test]
fn test_wat() {
let mut lexer = HuiaLexer::new(
"
type Greeter do
def hello(whom: String) do
\"Hello %s!\".format(whom)
end
def hello do
\"Hello World!\"
end
end
",
);
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, []);
}
#[test]
fn test_converts_into_irtokens() {
let lexer = HuiaLexer::new("a = 1.23 + 2.34");
let result: Vec<IRToken> = lexer.into();
assert_eq!(
result,
[
IRToken::Ident("a".to_string()),
IRToken::Assign,
IRToken::Float("1.23".to_string()),
IRToken::Add,
IRToken::Float("2.34".to_string())
]
)
}
#[test]
fn test_add() {
let lexer = HuiaLexer::new("+");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Add]);
}
#[test]
fn test_assign() {
let lexer = HuiaLexer::new("=");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Assign]);
}
#[test]
fn test_atom() {
let lexer = HuiaLexer::new(":marty");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Atom("marty".to_string())]);
}
#[test]
fn test_colon() {
let lexer = HuiaLexer::new(":");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Colon]);
}
#[test]
fn test_comma() {
let lexer = HuiaLexer::new(",");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Comma]);
}
#[test]
fn test_constant() {
let lexer = HuiaLexer::new("Marty");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Constant("Marty".to_string())]);
}
#[test]
fn test_div() {
let lexer = HuiaLexer::new("/");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Div]);
}
#[test]
fn test_float() {
let lexer = HuiaLexer::new("1.23");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Float("1.23".to_string())]);
}
#[test]
fn test_integer_binary() {
let lexer = HuiaLexer::new("0b101_01");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::IntegerBinary("101_01".to_string())]);
}
#[test]
fn test_integer_decimal() {
let lexer = HuiaLexer::new("123");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::IntegerDecimal("123".to_string())]);
}
#[test]
fn test_integer_hex() {
let lexer = HuiaLexer::new("0Xfedcba987654_3210");
let result: Vec<IRToken> = lexer.into();
assert_eq!(
result,
[IRToken::IntegerHex("fedcba987654_3210".to_string())]
);
}
#[test]
fn test_integer_octal() {
let lexer = HuiaLexer::new("0o7654_3210");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::IntegerOctal("7654_3210".to_string())]);
}
#[test]
fn test_ident() {
let lexer = HuiaLexer::new("marty");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Ident("marty".to_string())]);
}
#[test]
fn test_logical_and() {
let lexer = HuiaLexer::new("&&");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::LogicalAnd]);
}
#[test]
fn test_logical_or() {
let lexer = HuiaLexer::new("||");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::LogicalOr]);
}
#[test]
fn test_mod() {
let lexer = HuiaLexer::new("%");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Mod]);
}
#[test]
fn test_mult() {
let lexer = HuiaLexer::new("*");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Mult]);
}
#[test]
fn test_parens_left() {
let lexer = HuiaLexer::new("(");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::ParensLeft]);
}
#[test]
fn test_parens_right() {
let lexer = HuiaLexer::new(")");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::ParensRight]);
}
#[test]
fn test_string() {
let lexer = HuiaLexer::new("\"Marty McFly\"");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::String("Marty McFly".to_string())]);
}
#[test]
fn test_sub() {
let lexer = HuiaLexer::new("-");
let result: Vec<IRToken> = lexer.into();
assert_eq!(result, [IRToken::Sub]);
}
}

View file

@ -1,13 +0,0 @@
extern crate prattle;
#[macro_use]
extern crate logos;
#[macro_use]
extern crate lazy_static;
mod lexer;
mod parser;
mod term;
mod token;
// pub use parser::parser_spec;
pub use token::IRToken;

View file

@ -1,165 +0,0 @@
use super::lexer::HuiaLexer;
use super::IRToken;
use prattle::prelude::*;
// inspo: https://ruby-doc.org/core-2.2.0/doc/syntax/precedence_rdoc.html
lazy_static! {
static ref PARSER_SPEC: ParserSpec<IRToken> = {
let mut spec: ParserSpec<IRToken> = ParserSpec::new();
// Literals have the lowest precedence.
spec.add_null_associations(
vec![
IRToken::Atom("".to_string()),
IRToken::Constant("".to_string()),
IRToken::Float("".to_string()),
IRToken::Ident("".to_string()),
IRToken::IntegerBinary("".to_string()),
IRToken::IntegerDecimal("".to_string()),
IRToken::IntegerHex("".to_string()),
IRToken::IntegerOctal("".to_string()),
IRToken::String("".to_string())
],
PrecedenceLevel::Root,
|_, token: IRToken, _| Ok(Node::Simple(token.clone())),
).unwrap();
// Colon
spec.add_left_assoc(
IRToken::Colon,
PrecedenceLevel::First,
|parser, token, lbp, node| {
Ok(Node::Composite {
token: token.clone(),
children: vec![node, parser.parse_expr(lbp)?],
})
},
).unwrap();
// Assignment
spec.add_left_assoc(
IRToken::Assign,
PrecedenceLevel::Second,
|parser, token, lbp, node| {
Ok(Node::Composite {
token: token.clone(),
children: vec![node, parser.parse_expr(lbp)?],
})
},
).unwrap();
// LogicalOr
spec.add_left_assoc(
IRToken::LogicalOr,
PrecedenceLevel::Third,
|parser, token, lbp, node| {
Ok(Node::Composite {
token: token.clone(),
children: vec![node, parser.parse_expr(lbp)?],
})
},
).unwrap();
// LogicalAnd
spec.add_left_assoc(
IRToken::LogicalAnd,
PrecedenceLevel::Fourth,
|parser, token, lbp, node| {
Ok(Node::Composite {
token: token.clone(),
children: vec![node, parser.parse_expr(lbp)?],
})
},
).unwrap();
// Add, Sub
spec.add_left_associations(
vec![IRToken::Add, IRToken::Sub],
PrecedenceLevel::Fifth,
|parser, token, lbp, node| {
Ok(Node::Composite {
token: token.clone(),
children: vec![node, parser.parse_expr(lbp)?],
})
},
).unwrap();
// Div, Mod, Mult
spec.add_left_associations(
vec![IRToken::Div, IRToken::Mod, IRToken::Mult],
PrecedenceLevel::Sixth,
|parser, token, lbp, node| {
Ok(Node::Composite {
token: token.clone(),
children: vec![node, parser.parse_expr(lbp)?],
})
},
).unwrap();
// Object indexing
spec.add_left_associations(
vec![IRToken::Period],
PrecedenceLevel::Seventh,
|parser, token, lbp, node| {
Ok(Node::Composite {
token: token.clone(),
children: vec![node, parser.parse_expr(lbp)?]
})
}
).unwrap();
// Method call
spec.add_left_associations(
vec![IRToken::ParensLeft],
PrecedenceLevel::Eighth,
|parser, token, lbp, node| {
let res = parser.parse_expr(lbp)?;
parser.consume(IRToken::ParensRight)?;
Ok(Node::Composite {
token: token.clone(),
children: vec![node, res]
})
}
).unwrap();
spec
};
}
pub struct HuiaParser<'a>(GeneralParser<IRToken, HuiaLexer<'a>>);
impl<'a> HuiaParser<'a> {
pub fn new(lexer: HuiaLexer<'a>) -> HuiaParser<'a> {
HuiaParser(GeneralParser::new(PARSER_SPEC.clone(), lexer))
}
pub fn parse(&mut self) -> Result<Node<IRToken>, ParseError<IRToken>> {
self.0.parse()
}
}
#[cfg(test)]
mod test {
use super::super::lexer::HuiaLexer;
use super::*;
#[test]
fn test_with_lexer() {
let tokens = HuiaLexer::new("a = 1.23 * 2.46");
let mut parser = HuiaParser::new(tokens);
assert!(parser.parse().is_ok());
}
#[test]
fn test_call() {
let tokens = HuiaLexer::new("\"Hello %s\".format(whom)");
let mut parser = HuiaParser::new(tokens);
let nodes = parser.parse().unwrap();
print!("{:?}", nodes);
assert!(false);
// assert_eq!(nodes, "wat");
}
}

View file

@ -1,68 +0,0 @@
use std::string;
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub enum Term {
Atom(Atom),
Constant(Constant),
Float(Float),
Integer(Integer),
Ident(Ident),
String(String),
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub struct Atom(string::String);
impl Atom {
fn new(value: &str) -> Self {
Atom(value.to_string())
}
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub struct Constant(string::String);
impl Constant {
fn new(value: &str) -> Self {
Constant(value.to_string())
}
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub struct Float(f64);
impl Float {
fn new(value: &str) -> Self {
Float(value.parse().unwrap())
}
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub struct Integer(i64, usize);
impl Integer {
fn new(value: &str, radix: usize) -> Self {
match i64::from_str_radix(value, radix as u32) {
Ok(i) => Integer(i, radix),
Err(_) => unreachable!("Unable to parse integer {:?} radix {:?}", value, radix)
}
}
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub struct Ident(string::String);
impl Ident {
fn new(value: &str) -> Self {
Ident(value.to_string())
}
}
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub struct String(string::String);
impl String {
fn new(value: &str) -> Self {
String(value.to_string())
}
}

View file

@ -1,35 +0,0 @@
use std::fmt::{Display, Error, Formatter};
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum IRToken {
Add,
Assign,
Atom(String),
Colon,
Comma,
Constant(String),
Div,
Float(String),
Ident(String),
IntegerBinary(String),
IntegerDecimal(String),
IntegerHex(String),
IntegerOctal(String),
InterpolateStart,
InterpolateEnd,
LogicalAnd,
LogicalOr,
Mod,
Mult,
ParensLeft,
ParensRight,
Period,
String(String),
Sub,
}
impl Display for IRToken {
fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
write!(f, "{:?}", self)
}
}

View file

@ -1,10 +1,10 @@
[package]
name = "huia-irparser"
name = "huia-parser"
version = "0.1.0"
authors = ["James Harton <james@automat.nz>"]
edition = "2018"
[dependencies]
prattle = "0.1.3"
logos = "0.9.7"
lazy_static = "1.2.0"
pest = "2.1.0"
pest_derive = "2.1.0"

View file

@ -0,0 +1,43 @@
use crate::grammar::Rule;
use crate::input_location::InputLocation;
use pest::iterators::Pair;
#[derive(Debug, Clone)]
pub struct Atom {
name: String,
location: InputLocation,
}
impl<'a> From<Pair<'a, Rule>> for Atom {
fn from(pair: Pair<'a, Rule>) -> Self {
match pair.as_rule() {
Rule::atom => {
let name = pair.clone().into_span().as_str();
let len = name.len();
let name = name.get(1..len).unwrap().to_string();
Atom {
name: name,
location: InputLocation::from(pair.into_span()),
}
}
_ => unreachable!("Expected pair to be an Atom"),
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::grammar::Grammar;
use pest::Parser;
#[test]
fn it_parses() {
let pair = Grammar::parse(Rule::atom, ":marty_mcfly")
.unwrap()
.next()
.unwrap();
let atom = Atom::from(pair);
assert_eq!(atom.name, "marty_mcfly");
}
}

View file

@ -0,0 +1,328 @@
use crate::grammar::Rule;
use crate::input_location::InputLocation;
use pest::iterators::Pair;
#[derive(Debug, Clone)]
pub struct Binary {
operator: Operator,
location: InputLocation,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Operator {
Assign,
BitwiseAnd,
BitwiseOr,
BitwiseXor,
Divide,
Equal,
Exponent,
GreaterThan,
GreaterThanOrEqual,
LessThan,
LessThanOrEqual,
LogicalAnd,
LogicalOr,
Minus,
Modulus,
Multiply,
NotEqual,
Plus,
ShiftLeft,
ShiftRight,
}
impl<'a> From<Pair<'a, Rule>> for Binary {
fn from(pair: Pair<'a, Rule>) -> Self {
match pair.as_rule() {
Rule::assign => Binary {
operator: Operator::Assign,
location: InputLocation::from(pair.into_span()),
},
Rule::bitwise_and => Binary {
operator: Operator::BitwiseAnd,
location: InputLocation::from(pair.into_span()),
},
Rule::bitwise_or => Binary {
operator: Operator::BitwiseOr,
location: InputLocation::from(pair.into_span()),
},
Rule::bitwise_xor => Binary {
operator: Operator::BitwiseXor,
location: InputLocation::from(pair.into_span()),
},
Rule::divide => Binary {
operator: Operator::Divide,
location: InputLocation::from(pair.into_span()),
},
Rule::equal => Binary {
operator: Operator::Equal,
location: InputLocation::from(pair.into_span()),
},
Rule::exponent => Binary {
operator: Operator::Exponent,
location: InputLocation::from(pair.into_span()),
},
Rule::greater_than => Binary {
operator: Operator::GreaterThan,
location: InputLocation::from(pair.into_span()),
},
Rule::greater_than_or_equal => Binary {
operator: Operator::GreaterThanOrEqual,
location: InputLocation::from(pair.into_span()),
},
Rule::less_than => Binary {
operator: Operator::LessThan,
location: InputLocation::from(pair.into_span()),
},
Rule::less_than_or_equal => Binary {
operator: Operator::LessThanOrEqual,
location: InputLocation::from(pair.into_span()),
},
Rule::logical_and => Binary {
operator: Operator::LogicalAnd,
location: InputLocation::from(pair.into_span()),
},
Rule::logical_or => Binary {
operator: Operator::LogicalOr,
location: InputLocation::from(pair.into_span()),
},
Rule::minus => Binary {
operator: Operator::Minus,
location: InputLocation::from(pair.into_span()),
},
Rule::modulus => Binary {
operator: Operator::Modulus,
location: InputLocation::from(pair.into_span()),
},
Rule::multiply => Binary {
operator: Operator::Multiply,
location: InputLocation::from(pair.into_span()),
},
Rule::not_equal => Binary {
operator: Operator::NotEqual,
location: InputLocation::from(pair.into_span()),
},
Rule::plus => Binary {
operator: Operator::Plus,
location: InputLocation::from(pair.into_span()),
},
Rule::shift_left => Binary {
operator: Operator::ShiftLeft,
location: InputLocation::from(pair.into_span()),
},
Rule::shift_right => Binary {
operator: Operator::ShiftRight,
location: InputLocation::from(pair.into_span()),
},
_ => unreachable!("Expected pair to be an Binary"),
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::grammar::Grammar;
use pest::Parser;
#[test]
fn it_parses_assign() {
let pair = Grammar::parse(Rule::binary_operator, "=")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::Assign);
}
#[test]
fn it_parses_bitwise_and() {
let pair = Grammar::parse(Rule::binary_operator, "&")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::BitwiseAnd);
}
#[test]
fn it_parses_bitwise_or() {
let pair = Grammar::parse(Rule::binary_operator, "|")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::BitwiseOr);
}
#[test]
fn it_parses_bitwise_xor() {
let pair = Grammar::parse(Rule::binary_operator, "^")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::BitwiseXor);
}
#[test]
fn it_parses_divide() {
let pair = Grammar::parse(Rule::binary_operator, "/")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::Divide);
}
#[test]
fn it_parses_equal() {
let pair = Grammar::parse(Rule::binary_operator, "==")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::Equal);
}
#[test]
fn it_parses_exponent() {
let pair = Grammar::parse(Rule::binary_operator, "**")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::Exponent);
}
#[test]
fn it_parses_greater_than() {
let pair = Grammar::parse(Rule::binary_operator, ">")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::GreaterThan);
}
#[test]
fn it_parses_greater_than_or_equal() {
let pair = Grammar::parse(Rule::binary_operator, ">=")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::GreaterThanOrEqual);
}
#[test]
fn it_parses_less_than() {
let pair = Grammar::parse(Rule::binary_operator, "<")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::LessThan);
}
#[test]
fn it_parses_less_than_or_equal() {
let pair = Grammar::parse(Rule::binary_operator, "<=")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::LessThanOrEqual);
}
#[test]
fn it_parses_logical_and() {
let pair = Grammar::parse(Rule::binary_operator, "&&")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::LogicalAnd);
}
#[test]
fn it_parses_logical_or() {
let pair = Grammar::parse(Rule::binary_operator, "||")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::LogicalOr);
}
#[test]
fn it_parses_minus() {
let pair = Grammar::parse(Rule::binary_operator, "-")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::Minus);
}
#[test]
fn it_parses_modulus() {
let pair = Grammar::parse(Rule::binary_operator, "%")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::Modulus);
}
#[test]
fn it_parses_multiply() {
let pair = Grammar::parse(Rule::binary_operator, "*")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::Multiply);
}
#[test]
fn it_parses_not_equal() {
let pair = Grammar::parse(Rule::binary_operator, "!=")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::NotEqual);
}
#[test]
fn it_parses_plus() {
let pair = Grammar::parse(Rule::binary_operator, "+")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::Plus);
}
#[test]
fn it_parses_shift_left() {
let pair = Grammar::parse(Rule::binary_operator, "<<")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::ShiftLeft);
}
#[test]
fn it_parses_shift_right() {
let pair = Grammar::parse(Rule::binary_operator, ">>")
.unwrap()
.next()
.unwrap();
let binary = Binary::from(pair);
assert_eq!(binary.operator, Operator::ShiftRight);
}
}

View file

@ -0,0 +1,41 @@
use crate::grammar::Rule;
use crate::input_location::InputLocation;
use pest::iterators::Pair;
#[derive(Debug, Clone)]
pub struct Constant {
name: String,
location: InputLocation,
}
impl<'a> From<Pair<'a, Rule>> for Constant {
fn from(pair: Pair<'a, Rule>) -> Self {
match pair.as_rule() {
Rule::constant => {
let name = pair.clone().into_span().as_str().to_string();
Constant {
name: name,
location: InputLocation::from(pair.into_span()),
}
}
_ => unreachable!("Expected pair to be an Constant"),
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::grammar::Grammar;
use pest::Parser;
#[test]
fn it_parses() {
let pair = Grammar::parse(Rule::constant, "MartyMcFly")
.unwrap()
.next()
.unwrap();
let constant = Constant::from(pair);
assert_eq!(constant.name, "MartyMcFly");
}
}

View file

@ -0,0 +1,41 @@
use crate::grammar::Rule;
use crate::input_location::InputLocation;
use pest::iterators::Pair;
#[derive(Debug, Clone)]
pub struct Float {
value: f64,
location: InputLocation,
}
impl<'a> From<Pair<'a, Rule>> for Float {
fn from(pair: Pair<'a, Rule>) -> Self {
match pair.as_rule() {
Rule::float => {
let value = pair.clone().into_span().as_str().parse().unwrap();
Float {
value: value,
location: InputLocation::from(pair.into_span()),
}
}
_ => unreachable!("Expected pair to be an Float"),
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::grammar::Grammar;
use pest::Parser;
#[test]
fn it_parses() {
let pair = Grammar::parse(Rule::float, "123.456")
.unwrap()
.next()
.unwrap();
let float = Float::from(pair);
assert_eq!(float.value, 123.456);
}
}

View file

@ -0,0 +1,41 @@
use crate::grammar::Rule;
use crate::input_location::InputLocation;
use pest::iterators::Pair;
#[derive(Debug, Clone)]
pub struct Identifier {
name: String,
location: InputLocation,
}
impl<'a> From<Pair<'a, Rule>> for Identifier {
fn from(pair: Pair<'a, Rule>) -> Self {
match pair.as_rule() {
Rule::ident => {
let name = pair.clone().into_span().as_str().to_string();
Identifier {
name: name,
location: InputLocation::from(pair.into_span()),
}
}
_ => unreachable!("Expected pair to be an Identifier"),
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::grammar::Grammar;
use pest::Parser;
#[test]
fn it_parses() {
let pair = Grammar::parse(Rule::ident, "marty_mcfly")
.unwrap()
.next()
.unwrap();
let ident = Identifier::from(pair);
assert_eq!(ident.name, "marty_mcfly");
}
}

View file

@ -0,0 +1,41 @@
use crate::grammar::Rule;
use crate::input_location::InputLocation;
use pest::iterators::Pair;
#[derive(Debug, Clone)]
pub struct Integer {
value: i64,
location: InputLocation,
}
impl<'a> From<Pair<'a, Rule>> for Integer {
fn from(pair: Pair<'a, Rule>) -> Self {
match pair.as_rule() {
Rule::integer => {
let value = pair.clone().into_span().as_str().parse().unwrap();
Integer {
value: value,
location: InputLocation::from(pair.into_span()),
}
}
_ => unreachable!("Expected pair to be an Integer"),
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::grammar::Grammar;
use pest::Parser;
#[test]
fn it_parses() {
let pair = Grammar::parse(Rule::integer, "123")
.unwrap()
.next()
.unwrap();
let integer = Integer::from(pair);
assert_eq!(integer.value, 123);
}
}

View file

@ -0,0 +1,41 @@
use crate::grammar::Rule;
use crate::input_location::InputLocation;
use pest::iterators::Pair;
#[derive(Debug, Clone)]
pub struct Local {
name: String,
location: InputLocation,
}
impl<'a> From<Pair<'a, Rule>> for Local {
fn from(pair: Pair<'a, Rule>) -> Self {
match pair.as_rule() {
Rule::aocal => {
let name = pair.clone().into_span().as_str().to_string();
Local {
name: name,
location: InputLocation::from(pair.into_span()),
}
}
_ => unreachable!("Expected pair to be an Local"),
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::grammar::Grammar;
use pest::Parser;
#[test]
fn it_parses() {
let pair = Grammar::parse(Rule::aocal, "marty_mcfly")
.unwrap()
.next()
.unwrap();
let aocal = Local::from(pair);
assert_eq!(aocal.name, "marty_mcfly");
}
}

View file

@ -0,0 +1,37 @@
mod atom;
mod binary;
mod constant;
mod float;
mod identifier;
mod integer;
mod string;
mod unary;
pub use atom::Atom;
pub use binary::Binary;
pub use constant::Constant;
pub use float::Float;
pub use identifier::Identifier;
pub use integer::Integer;
pub use string::String;
pub use unary::Unary;
pub enum Term {
Atom(Atom),
Binary(Binary, Box<Term>, Box<Term>),
Constant(Constant),
ConstantDefinition(Identifier, Constant, Vec<(Identifier, Constant)>, Vec<Term>),
Float(Float),
FunctionDefinition(
Identifier,
Identifier,
Vec<(Identifier, Constant)>,
Vec<Term>,
),
Integer(Integer),
Let(Identifier, Box<Term>),
Local(Identifier),
Method(Identifier, Box<Term>, Box<Term>),
String(String),
Unary(Unary, Box<Term>),
}

View file

@ -0,0 +1,49 @@
use crate::grammar::Rule;
use crate::input_location::InputLocation;
use pest::iterators::Pair;
use std::string;
#[derive(Debug, Clone)]
pub struct String {
value: string::String,
location: InputLocation,
}
impl<'a> From<Pair<'a, Rule>> for String {
fn from(pair: Pair<'a, Rule>) -> Self {
match pair.as_rule() {
Rule::string => {
let value = pair
.clone()
.into_inner()
.next()
.unwrap()
.into_span()
.as_str()
.to_string();
String {
value: value,
location: InputLocation::from(pair.into_span()),
}
}
_ => unreachable!("Expected pair to be an String"),
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::grammar::Grammar;
use pest::Parser;
#[test]
fn it_parses() {
let pair = Grammar::parse(Rule::string, "\"Marty McFly\"")
.unwrap()
.next()
.unwrap();
let string = String::from(pair);
assert_eq!(string.value, "Marty McFly");
}
}

View file

@ -0,0 +1,73 @@
use crate::grammar::Rule;
use crate::input_location::InputLocation;
use pest::iterators::Pair;
#[derive(Debug, Clone)]
pub struct Unary {
operator: Operator,
location: InputLocation,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Operator {
LogicalNot,
Minus,
Plus,
}
impl<'a> From<Pair<'a, Rule>> for Unary {
fn from(pair: Pair<'a, Rule>) -> Self {
match pair.as_rule() {
Rule::logical_not => Unary {
operator: Operator::LogicalNot,
location: InputLocation::from(pair.into_span()),
},
Rule::minus => Unary {
operator: Operator::Minus,
location: InputLocation::from(pair.into_span()),
},
Rule::plus => Unary {
operator: Operator::Plus,
location: InputLocation::from(pair.into_span()),
},
_ => unreachable!("Expected pair to be an Unary"),
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::grammar::Grammar;
use pest::Parser;
#[test]
fn it_parses_logical_not() {
let pair = Grammar::parse(Rule::unary_operator, "!")
.unwrap()
.next()
.unwrap();
let unary = Unary::from(pair);
assert_eq!(unary.operator, Operator::LogicalNot);
}
#[test]
fn it_parses_minus() {
let pair = Grammar::parse(Rule::unary_operator, "-")
.unwrap()
.next()
.unwrap();
let unary = Unary::from(pair);
assert_eq!(unary.operator, Operator::Minus);
}
#[test]
fn it_parses_plus() {
let pair = Grammar::parse(Rule::unary_operator, "+")
.unwrap()
.next()
.unwrap();
let unary = Unary::from(pair);
assert_eq!(unary.operator, Operator::Plus);
}
}

42
huia-parser/src/error.rs Normal file
View file

@ -0,0 +1,42 @@
use crate::grammar::Rule;
use crate::input_location::InputLocation;
use pest;
use std::error::Error;
use std::fmt;
#[derive(Debug, Clone)]
pub enum ParseError {
PestError {
positives: Vec<Rule>,
negatives: Vec<Rule>,
location: InputLocation,
},
AstGeneration {
rule: Rule,
location: InputLocation,
},
}
impl<'a> From<pest::error::Error<Rule>> for ParseError {
fn from(pest: pest::error::Error<Rule>) -> Self {
match pest.variant {
pest::error::ErrorVariant::ParsingError {
ref positives,
ref negatives,
} => ParseError::PestError {
positives: positives.clone(),
negatives: negatives.clone(),
location: InputLocation::from(pest.location),
},
_ => unreachable!(),
}
}
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self)
}
}
impl<'a> Error for ParseError {}

View file

@ -0,0 +1,104 @@
input = _{ SOI ~ expression+ ~ EOI }
file = _{ SOI ~ definition* ~ EOI }
WHITESPACE = _{ (" " | "\t" | "\r" | "\n")+ }
newline = _{ (" " | "\t")* ~ ("\n" | "\r")+ ~ (" " | "\t")* }
reserved = { "end" | "let" }
expression = _{ expression_inner ~ (binary_operator ~ expression_inner)* }
expression_inner = _{ method | definition | local_let | infix | literal | local}
local_let = { "let" ~ ident ~ assign ~ expression }
infix = { unary_operator ~ literal | local }
method = { literal ~ "." ~ ident ~ method_arguments? }
method_arguments = { "(" ~ (method_argument ~ ("," ~ method_argument)* )? ~ ")" }
method_argument = { expression }
definition = { ident ~ definition_what ~ arguments }
definition_what = { (ident | constant) ~ definition_arguments? }
definition_arguments = { "(" ~ (definition_argument ~ ("," ~ definition_argument)* )? ~ ")" }
definition_argument = { ident ~ ":" ~ constant }
arguments = { argument_block | argument_short ~ (argument_block | argument_short)* }
argument_block = _{ ident ~ expression* ~ "end" }
argument_short = _{ "," ~ ident ~ ":" ~ expression }
literal = _{ constant | string | atom | float | integer }
ident = @{ !reserved ~ 'a'..'z' ~ ('a'..'z' | 'A'..'Z' | "_")* }
atom = @{ ":" ~ ident }
constant = @{ 'A'..'Z' ~ ('a'..'z' | 'A'..'Z' | "_")* }
float = ${ float_characteristic ~ "." ~ float_mantissa }
float_characteristic = { "0" | (('1'..'9') ~ ('0'..'9' | "_")*) }
float_mantissa = { ('0'..'9')+ }
integer = ${ integer_hexadecimal_sigil | integer_octal_sigil | integer_binary_sigil | integer_decimal | integer_zero }
integer_decimal = { '1'..'9' ~ ('0'..'9' | "_")* }
integer_hexadecimal = { ('0'..'9' | 'a'..'f' | 'A'..'F') ~ ('0'..'9' | 'a'..'f' | 'A'..'F' | "_")* }
integer_hexadecimal_sigil = _{ "0x" ~ integer_hexadecimal }
integer_octal = { ('0'..'7') ~ ('0'..'7' | "_" )* }
integer_octal_sigil = _{ "0o" ~ integer_octal }
integer_binary = { ('0'..'1') ~ ('0'..'1' | "_")* }
integer_binary_sigil = _{ "0b" ~ integer_binary }
integer_zero = { "0" }
string = ${ string_single_line }
string_single_line = _{ string_delimiter ~ string_content ~ string_delimiter }
string_content = ${ (string_escape | !(string_delimiter | "\\") ~ ANY)* }
string_delimiter = _{ "\"" }
string_unicode = _{ "u" ~ ('0'..'9' | 'a'..'f' | 'A'..'F'){4} }
string_escape = _{ "\\" ~ ("\"" | "\\" | "a" | "b" | "f" | "n" | "r" | "t" | "v" | string_unicode) }
local = { ident }
exponent = { "**" }
multiply = { "*" }
divide = { "/" }
modulus = { "%" }
plus = { "+" }
minus = { "-" }
shift_left = { "<<" }
shift_right = { ">>" }
less_than = { "<" }
less_than_or_equal = { "<=" }
greater_than = { ">" }
greater_than_or_equal = { ">=" }
not_equal = { "!=" }
equal = { "==" }
logical_and = { "&&" }
logical_or = { "||" }
logical_not = { "!" }
bitwise_and = { "&" }
bitwise_or = { "|" }
bitwise_xor = { "^" }
assign = { "=" }
all_operators = _{
exponent |
multiply | divide | modulus |
plus | minus |
shift_left | shift_right |
less_than_or_equal | less_than |
greater_than_or_equal | greater_than |
not_equal | equal |
logical_and | logical_or | logical_not |
bitwise_and | bitwise_or | bitwise_xor |
assign
}
binary_operator = _{
exponent |
multiply | divide | modulus |
plus | minus |
shift_left | shift_right |
less_than_or_equal | less_than |
greater_than_or_equal | greater_than |
not_equal | equal |
logical_and | logical_or |
bitwise_and | bitwise_or | bitwise_xor |
assign
}
unary_operator = _{ plus | minus | logical_not }

View file

@ -0,0 +1,3 @@
#[derive(Parser)]
#[grammar = "grammar.pest"]
pub struct Grammar;

View file

@ -0,0 +1,28 @@
use pest;
#[derive(Debug, Clone)]
pub enum InputLocation {
Pos(usize),
Span(usize, usize),
}
impl From<pest::error::InputLocation> for InputLocation {
fn from(pest: pest::error::InputLocation) -> InputLocation {
match pest {
pest::error::InputLocation::Pos(pos) => InputLocation::Pos(pos),
pest::error::InputLocation::Span((start, end)) => InputLocation::Span(start, end),
}
}
}
impl<'a> From<pest::Position<'a>> for InputLocation {
fn from(pest: pest::Position<'a>) -> InputLocation {
InputLocation::Pos(pest.pos())
}
}
impl<'a> From<pest::Span<'a>> for InputLocation {
fn from(pest: pest::Span<'a>) -> InputLocation {
InputLocation::Span(pest.start(), pest.end())
}
}

13
huia-parser/src/lib.rs Normal file
View file

@ -0,0 +1,13 @@
#[macro_use]
extern crate lazy_static;
#[cfg_attr(test, macro_use)]
extern crate pest;
#[macro_use]
extern crate pest_derive;
mod ast;
mod error;
mod input_location;
mod grammar;