feat: working scanner for common tokens.
This is by no means complete - or even that smart, but it's enough to get started with.
This commit is contained in:
parent
d755f6ab3a
commit
0aeabb0af3
17 changed files with 1028 additions and 11 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
target
|
|
@ -1,13 +1,45 @@
|
|||
# You can override the included template(s) by including variable overrides
|
||||
# SAST customization: https://docs.gitlab.com/ee/user/application_security/sast/#customizing-the-sast-settings
|
||||
# Secret Detection customization: https://docs.gitlab.com/ee/user/application_security/secret_detection/#customizing-settings
|
||||
# Dependency Scanning customization: https://docs.gitlab.com/ee/user/application_security/dependency_scanning/#customizing-the-dependency-scanning-settings
|
||||
# Container Scanning customization: https://docs.gitlab.com/ee/user/application_security/container_scanning/#customizing-the-container-scanning-settings
|
||||
# Note that environment variables can be set in several places
|
||||
# See https://docs.gitlab.com/ee/ci/variables/#cicd-variable-precedence
|
||||
image: rust:latest
|
||||
stages:
|
||||
- test
|
||||
sast:
|
||||
- dependencies
|
||||
- test
|
||||
- release
|
||||
|
||||
build:
|
||||
stage: dependencies
|
||||
cache:
|
||||
key: "$CI_JOB_NAME"
|
||||
paths:
|
||||
- $HOME/.cargo
|
||||
artifacts:
|
||||
paths:
|
||||
- $HOME/.cargo
|
||||
expire_in: 60 mins
|
||||
script:
|
||||
- cargo test --no-run --release
|
||||
|
||||
cargo test:
|
||||
stage: test
|
||||
include:
|
||||
- template: Security/SAST.gitlab-ci.yml
|
||||
dependencies:
|
||||
- build
|
||||
script:
|
||||
- rustc --version && cargo --version
|
||||
- cargo test --jobs 1 --release
|
||||
|
||||
cargo clippy:
|
||||
stage: test
|
||||
dependencies:
|
||||
- build
|
||||
before_script:
|
||||
- rustup component add clippy
|
||||
script:
|
||||
- cargo clippy --all-targets --all-features -- -D warnings
|
||||
# cargo publish:
|
||||
# stage: release
|
||||
# dependencies:
|
||||
# - build
|
||||
# only:
|
||||
# - /^v[0-9]+\.[0-9]+\.[0-9]+$/
|
||||
# except:
|
||||
# - branches
|
||||
# script:
|
||||
# - cargo publish --token $CARGO_TOKEN
|
||||
|
|
59
Cargo.lock
generated
Normal file
59
Cargo.lock
generated
Normal file
|
@ -0,0 +1,59 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "outrun"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"outrun-lexer",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "outrun-lexer"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"unicode-general-category",
|
||||
"unicode-ident",
|
||||
"unicode-normalization",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "outrun-parser"
|
||||
version = "0.1.0"
|
||||
|
||||
[[package]]
|
||||
name = "tinyvec"
|
||||
version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
|
||||
dependencies = [
|
||||
"tinyvec_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinyvec_macros"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-general-category"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1218098468b8085b19a2824104c70d976491d247ce194bbd9dc77181150cdfd6"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
version = "0.1.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "854cbdc4f7bc6ae19c820d44abdc3277ac3e1b2b93db20a636825d9322fb60e6"
|
||||
dependencies = [
|
||||
"tinyvec",
|
||||
]
|
7
Cargo.toml
Normal file
7
Cargo.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[workspace]
|
||||
|
||||
members = [
|
||||
"outrun-parser",
|
||||
"outrun-lexer",
|
||||
"outrun",
|
||||
]
|
11
outrun-lexer/Cargo.toml
Normal file
11
outrun-lexer/Cargo.toml
Normal file
|
@ -0,0 +1,11 @@
|
|||
[package]
|
||||
edition = "2021"
|
||||
name = "outrun-lexer"
|
||||
version = "0.1.0"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
unicode-general-category = "0.5.1"
|
||||
unicode-ident = "1.0.2"
|
||||
unicode-normalization = "0.1.21"
|
40
outrun-lexer/src/error.rs
Normal file
40
outrun-lexer/src/error.rs
Normal file
|
@ -0,0 +1,40 @@
|
|||
use crate::span::Span;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
UnexpectedEof {
|
||||
span: Span,
|
||||
},
|
||||
Internal {
|
||||
span: Span,
|
||||
message: String,
|
||||
},
|
||||
UnexpectedChar {
|
||||
span: Span,
|
||||
actual: Option<char>,
|
||||
expected: Vec<char>,
|
||||
},
|
||||
Unmatched,
|
||||
Eof,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub fn unexpected_eof(span: Span) -> Error {
|
||||
Error::UnexpectedEof { span }
|
||||
}
|
||||
|
||||
pub fn internal<T: ToString>(span: Span, message: T) -> Error {
|
||||
Error::Internal {
|
||||
span,
|
||||
message: message.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unexpected(span: Span, actual: Option<char>, expected: Vec<char>) -> Error {
|
||||
Error::UnexpectedChar {
|
||||
span,
|
||||
actual,
|
||||
expected,
|
||||
}
|
||||
}
|
||||
}
|
8
outrun-lexer/src/lib.rs
Normal file
8
outrun-lexer/src/lib.rs
Normal file
|
@ -0,0 +1,8 @@
|
|||
mod error;
|
||||
mod scanner;
|
||||
mod span;
|
||||
mod token;
|
||||
|
||||
pub use scanner::Scanner;
|
||||
pub use span::Span;
|
||||
pub use token::{Token, TokenKind, TokenValue};
|
15
outrun-lexer/src/scanner/classes.rs
Normal file
15
outrun-lexer/src/scanner/classes.rs
Normal file
|
@ -0,0 +1,15 @@
|
|||
use unicode_general_category::{get_general_category, GeneralCategory};
|
||||
|
||||
pub fn is_numeric(chr: char) -> bool {
|
||||
matches!(chr, '0'..='9')
|
||||
}
|
||||
|
||||
pub fn is_whitespace(chr: char) -> bool {
|
||||
matches!(chr, ' ' | '\t' | '\n' | '\r')
|
||||
|| matches!(
|
||||
get_general_category(chr),
|
||||
GeneralCategory::SpaceSeparator
|
||||
| GeneralCategory::LineSeparator
|
||||
| GeneralCategory::ParagraphSeparator
|
||||
)
|
||||
}
|
147
outrun-lexer/src/scanner/combinators.rs
Normal file
147
outrun-lexer/src/scanner/combinators.rs
Normal file
|
@ -0,0 +1,147 @@
|
|||
use crate::error::Error;
|
||||
use crate::span::Span;
|
||||
use crate::Scanner;
|
||||
|
||||
pub fn match_one<F>(scanner: Scanner<'_>, matcher: F) -> Result<(Scanner<'_>, Span), Error>
|
||||
where
|
||||
F: Fn(char) -> bool,
|
||||
{
|
||||
let chr = scanner.current_char().ok_or(Error::Unmatched)?;
|
||||
if matcher(chr) {
|
||||
let new_scanner = scanner.advance();
|
||||
return Ok((new_scanner, Span::new(scanner.pos, new_scanner.pos)));
|
||||
}
|
||||
|
||||
Err(Error::Unmatched)
|
||||
}
|
||||
|
||||
pub fn match_many0<F>(scanner: Scanner<'_>, matcher: F) -> Result<(Scanner<'_>, Span), Error>
|
||||
where
|
||||
F: Fn(char) -> bool,
|
||||
{
|
||||
let mut new_scanner = scanner;
|
||||
|
||||
while let Some(chr) = new_scanner.current_char() {
|
||||
if matcher(chr) {
|
||||
new_scanner = new_scanner.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if new_scanner.pos != scanner.pos {
|
||||
Ok((new_scanner, (scanner.pos, new_scanner.pos).into()))
|
||||
} else {
|
||||
Ok((scanner, scanner.pos.into()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn match_many1<F>(scanner: Scanner<'_>, matcher: F) -> Result<(Scanner<'_>, Span), Error>
|
||||
where
|
||||
F: Fn(char) -> bool,
|
||||
{
|
||||
let (scanner, head) = match_one(scanner, &matcher)?;
|
||||
let (scanner, tail) = match_many0(scanner, &matcher)?;
|
||||
Ok((scanner, head.extend(tail)))
|
||||
}
|
||||
|
||||
pub fn match_exact<'a>(scanner: Scanner<'a>, pattern: &str) -> Result<(Scanner<'a>, Span), Error> {
|
||||
let mut pattern_chars = pattern.chars();
|
||||
let mut new_scanner = scanner;
|
||||
|
||||
for _ in 0..pattern.chars().count() {
|
||||
let p = pattern_chars.next().ok_or(Error::Unmatched)?;
|
||||
let s = new_scanner.current_char().ok_or(Error::Unmatched)?;
|
||||
if p == s {
|
||||
new_scanner = new_scanner.advance();
|
||||
} else {
|
||||
return Err(Error::Unmatched);
|
||||
}
|
||||
}
|
||||
Ok((new_scanner, Span::new(scanner.pos, new_scanner.pos)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn match_one_on_success() {
|
||||
let initial_scanner = Scanner::new("123");
|
||||
let (finish_scanner, span) = match_one(initial_scanner, |c| c == '1').unwrap();
|
||||
|
||||
assert_eq!(finish_scanner.remaining(), "23");
|
||||
assert_eq!(span.start, 0);
|
||||
assert_eq!(span.end, 1);
|
||||
assert_eq!(finish_scanner.pos, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_one_on_failure() {
|
||||
let initial_scanner = Scanner::new("123");
|
||||
let result = match_one(initial_scanner, |c| c == '2');
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_many0_matches_none() {
|
||||
let initial_scanner = Scanner::new("123");
|
||||
let (finish_scanner, span) = match_many0(initial_scanner, |c| c == '4').unwrap();
|
||||
assert_eq!(finish_scanner, initial_scanner);
|
||||
assert_eq!(finish_scanner.remaining(), "123");
|
||||
assert_eq!(span.start, 0);
|
||||
assert_eq!(span.end, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_many0_matches_one() {
|
||||
let initial_scanner = Scanner::new("123");
|
||||
let (finish_scanner, span) = match_many0(initial_scanner, |c| c == '1').unwrap();
|
||||
assert_eq!(finish_scanner.remaining(), "23");
|
||||
assert_eq!(span.start, 0);
|
||||
assert_eq!(span.end, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_many0_matches_many() {
|
||||
let initial_scanner = Scanner::new("11123");
|
||||
let (finish_scanner, span) = match_many0(initial_scanner, |c| c == '1').unwrap();
|
||||
assert_eq!(finish_scanner.remaining(), "23");
|
||||
assert_eq!(span.start, 0);
|
||||
assert_eq!(span.end, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_many1_doesnt_match_none() {
|
||||
let initial_scanner = Scanner::new("23");
|
||||
let result = match_many1(initial_scanner, |c| c == '1');
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_many1_matches_one() {
|
||||
let initial_scanner = Scanner::new("123");
|
||||
let (finish_scanner, span) = match_many1(initial_scanner, |c| c == '1').unwrap();
|
||||
assert_eq!(finish_scanner.remaining(), "23");
|
||||
assert_eq!(span.start, 0);
|
||||
assert_eq!(span.end, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_many1_matches_many() {
|
||||
let initial_scanner = Scanner::new("11123");
|
||||
let (finish_scanner, span) = match_many1(initial_scanner, |c| c == '1').unwrap();
|
||||
assert_eq!(finish_scanner.remaining(), "23");
|
||||
assert_eq!(span.start, 0);
|
||||
assert_eq!(span.end, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_exact_matches_exactly() {
|
||||
let initial_scanner = Scanner::new("abcdef 12345");
|
||||
let (finish_scanner, span) = match_exact(initial_scanner, "abcdef").unwrap();
|
||||
assert_eq!(finish_scanner.remaining(), " 12345");
|
||||
assert_eq!(span.start, 0);
|
||||
assert_eq!(span.end, 6)
|
||||
}
|
||||
}
|
197
outrun-lexer/src/scanner/mod.rs
Normal file
197
outrun-lexer/src/scanner/mod.rs
Normal file
|
@ -0,0 +1,197 @@
|
|||
use crate::error::Error;
|
||||
use crate::token::{Token, TokenKind};
|
||||
mod classes;
|
||||
mod combinators;
|
||||
mod rules;
|
||||
|
||||
use rules::*;
|
||||
|
||||
pub type ScanResult<'a> = std::result::Result<(Scanner<'a>, Token), Error>;
|
||||
type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Copy)]
|
||||
pub struct Scanner<'a> {
|
||||
input: &'a str,
|
||||
pos: usize,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl<'a> Scanner<'a> {
|
||||
pub fn new(input: &'a str) -> Self {
|
||||
let len = input.chars().count();
|
||||
Scanner { input, len, pos: 0 }
|
||||
}
|
||||
|
||||
pub fn next(self) -> ScanResult<'a> {
|
||||
self.eof()?;
|
||||
let scanner = skip_whitespace(self)?;
|
||||
scanner.eof()?;
|
||||
|
||||
match_float(scanner)
|
||||
.or_else(|_| match_integer(scanner))
|
||||
.or_else(|_| match_boolean(scanner))
|
||||
.or_else(|_| match_string(scanner))
|
||||
.or_else(|_| match_terminal(scanner, "use", TokenKind::KeywordUse))
|
||||
.or_else(|_| match_terminal(scanner, "type", TokenKind::KeywordType))
|
||||
.or_else(|_| match_terminal(scanner, "protocol", TokenKind::KeywordProtocol))
|
||||
.or_else(|_| match_terminal(scanner, "end", TokenKind::KeywordEnd))
|
||||
.or_else(|_| match_terminal(scanner, "impl", TokenKind::KeywordImpl))
|
||||
.or_else(|_| match_terminal(scanner, "defp", TokenKind::KeywordDefPrivate))
|
||||
.or_else(|_| match_terminal(scanner, "def", TokenKind::KeywordDef))
|
||||
.or_else(|_| match_identifier(scanner))
|
||||
.or_else(|_| match_terminal(scanner, "(", TokenKind::LeftParen))
|
||||
.or_else(|_| match_terminal(scanner, ")", TokenKind::RightParen))
|
||||
.or_else(|_| match_terminal(scanner, "[", TokenKind::LeftSquare))
|
||||
.or_else(|_| match_terminal(scanner, "]", TokenKind::RightSquare))
|
||||
.or_else(|_| match_terminal(scanner, "{", TokenKind::LeftBrace))
|
||||
.or_else(|_| match_terminal(scanner, "}", TokenKind::RightBrace))
|
||||
.or_else(|_| match_terminal(scanner, "+", TokenKind::Plus))
|
||||
.or_else(|_| match_terminal(scanner, "-", TokenKind::Minus))
|
||||
.or_else(|_| match_terminal(scanner, "*", TokenKind::Star))
|
||||
.or_else(|_| match_terminal(scanner, "/", TokenKind::ForwardSlash))
|
||||
.or_else(|_| match_terminal(scanner, "%", TokenKind::Percent))
|
||||
.or_else(|_| match_terminal(scanner, ":", TokenKind::Colon))
|
||||
.or_else(|_| match_terminal(scanner, ",", TokenKind::Comma))
|
||||
.or_else(|_| match_terminal(scanner, ".", TokenKind::Dot))
|
||||
.or_else(|_| match_terminal(scanner, ">>", TokenKind::GtGt))
|
||||
.or_else(|_| match_terminal(scanner, ">=", TokenKind::GtEq))
|
||||
.or_else(|_| match_terminal(scanner, ">", TokenKind::Gt))
|
||||
.or_else(|_| match_terminal(scanner, "<<", TokenKind::LtLt))
|
||||
.or_else(|_| match_terminal(scanner, "<=", TokenKind::LtEq))
|
||||
.or_else(|_| match_terminal(scanner, "<", TokenKind::Lt))
|
||||
.or_else(|_| match_terminal(scanner, "==", TokenKind::EqEq))
|
||||
.or_else(|_| match_terminal(scanner, "=", TokenKind::Eq))
|
||||
.or_else(|_| match_terminal(scanner, "&&", TokenKind::AndAnd))
|
||||
.or_else(|_| match_terminal(scanner, "&", TokenKind::And))
|
||||
.or_else(|_| match_terminal(scanner, "|", TokenKind::Pipe))
|
||||
.or_else(|_| match_terminal(scanner, "||", TokenKind::PipePipe))
|
||||
.or_else(|_| match_terminal(scanner, "!=", TokenKind::BangEq))
|
||||
.or_else(|_| match_terminal(scanner, "!", TokenKind::Bang))
|
||||
.map_err(|error| match error {
|
||||
Error::Unmatched => {
|
||||
Error::unexpected(scanner.pos.into(), scanner.current_char(), Vec::new())
|
||||
}
|
||||
error => error,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn is_at_end(&self) -> bool {
|
||||
self.pos >= self.len
|
||||
}
|
||||
|
||||
pub fn advance(self) -> Self {
|
||||
Scanner {
|
||||
input: self.input,
|
||||
pos: self.pos + 1,
|
||||
len: self.len,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remaining(&self) -> &str {
|
||||
if self.pos < self.len {
|
||||
let (index, _) = self.input.char_indices().nth(self.pos).unwrap();
|
||||
&self.input[index..]
|
||||
} else {
|
||||
""
|
||||
}
|
||||
}
|
||||
|
||||
pub fn eof(&self) -> Result<()> {
|
||||
if self.is_at_end() {
|
||||
Err(Error::Eof)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_char(&self) -> Option<char> {
|
||||
self.input.chars().nth(self.pos + 1)
|
||||
}
|
||||
|
||||
pub fn current_char(&self) -> Option<char> {
|
||||
self.input.chars().nth(self.pos)
|
||||
}
|
||||
|
||||
pub fn current_char_or_error(&self) -> Result<char> {
|
||||
self.input.chars().nth(self.pos).ok_or_else(|| {
|
||||
Error::internal(
|
||||
self.pos.into(),
|
||||
format!(
|
||||
"Internal error while retrieving character at position {}",
|
||||
self.pos
|
||||
),
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::span::Span;
|
||||
use crate::token::TokenValue;
|
||||
|
||||
fn scan(input: &str) -> ScanResult<'_> {
|
||||
let scanner = Scanner::new(input);
|
||||
scanner.next()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scanner_new_is_at_zero() {
|
||||
let scanner = Scanner::new("1");
|
||||
assert_eq!(scanner.pos, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scanner_next_with_empty_input() {
|
||||
assert!(matches!(scan(""), Err(Error::Eof)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scanner_next_with_whitespace() {
|
||||
assert!(matches!(scan(" "), Err(Error::Eof)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scanner_with_integer_input() {
|
||||
let (_, token) = scan("1").expect("We should be able to lex integers");
|
||||
assert_eq!(token.value, TokenValue::Integer(1));
|
||||
assert_eq!(token.span, Span::new(0, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scanner_with_long_integer_input() {
|
||||
let (_, token) = scan("1234567890").expect("We should be able to lex integers");
|
||||
assert_eq!(token.value, TokenValue::Integer(1234567890));
|
||||
assert_eq!(token.span, Span::new(0, 10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scanner_with_float_input() {
|
||||
let (_, token) = scan("0.0").expect("We should be able to lex floats");
|
||||
assert_eq!(token.value, TokenValue::Float(0.0));
|
||||
assert_eq!(token.span, Span::new(0, 3));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scanner_exhausts_input() {
|
||||
let (scanner, _) = scan("1").unwrap();
|
||||
assert!(
|
||||
scanner.next().is_err(),
|
||||
"Should not be able to read past the end of input"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scanner_skips_leading_whitespace() {
|
||||
let (_, token) = scan(" 1").unwrap();
|
||||
assert_eq!(token.value, TokenValue::Integer(1));
|
||||
assert_eq!(token.span, Span::new(5, 6));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scanner_parses_terminals() {
|
||||
let (_, token) = scan("(").unwrap();
|
||||
assert_eq!(token.kind, TokenKind::LeftParen);
|
||||
}
|
||||
}
|
258
outrun-lexer/src/scanner/rules.rs
Normal file
258
outrun-lexer/src/scanner/rules.rs
Normal file
|
@ -0,0 +1,258 @@
|
|||
use crate::error::Error;
|
||||
use crate::scanner::classes::*;
|
||||
use crate::scanner::combinators::*;
|
||||
use crate::scanner::{ScanResult, Scanner};
|
||||
use crate::span::Span;
|
||||
use crate::token::{Token, TokenKind, TokenValue};
|
||||
|
||||
use unicode_ident::{is_xid_continue, is_xid_start};
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
pub fn skip_whitespace(scanner: Scanner<'_>) -> Result<Scanner<'_>, Error> {
|
||||
let (scanner, _) = match_many0(scanner, is_whitespace)?;
|
||||
Ok(scanner)
|
||||
}
|
||||
|
||||
pub fn match_integer(scanner: Scanner<'_>) -> ScanResult<'_> {
|
||||
let (scanner, span) = match_many1(scanner, is_numeric)?;
|
||||
let value = span
|
||||
.extract(scanner.input)
|
||||
.ok_or_else(|| Error::internal(span, "Unable to retrieve value from input"))?;
|
||||
let value = value.parse::<i64>().map_err(|e| {
|
||||
Error::internal(
|
||||
span,
|
||||
format!("Unable to parse {:?} as integer: {:?}", value, e),
|
||||
)
|
||||
})?;
|
||||
let token = Token::new(TokenKind::Integer, TokenValue::Integer(value), span);
|
||||
Ok((scanner, token))
|
||||
}
|
||||
|
||||
pub fn match_float(scanner: Scanner<'_>) -> ScanResult<'_> {
|
||||
let (scanner, head) = match_many1(scanner, is_numeric)?;
|
||||
let (scanner, _) = match_one(scanner, |c| c == '.')?;
|
||||
let (scanner, tail) = match_many1(scanner, is_numeric)?;
|
||||
|
||||
let span = head.extend(tail);
|
||||
let value = span
|
||||
.extract(scanner.input)
|
||||
.expect("Span out of input range!");
|
||||
let value = value.parse::<f64>().map_err(|e| {
|
||||
Error::internal(
|
||||
span,
|
||||
format!("Unable to parse {:?} as float: {:?}", value, e),
|
||||
)
|
||||
})?;
|
||||
let token = Token::new(TokenKind::Float, TokenValue::Float(value), span);
|
||||
Ok((scanner, token))
|
||||
}
|
||||
|
||||
pub fn match_terminal<'a>(scanner: Scanner<'a>, pattern: &str, kind: TokenKind) -> ScanResult<'a> {
|
||||
match_exact(scanner, pattern)
|
||||
.map(|(scanner, span)| (scanner, Token::new(kind, TokenValue::None, span)))
|
||||
}
|
||||
|
||||
pub fn match_boolean(scanner: Scanner<'_>) -> ScanResult<'_> {
|
||||
match_exact(scanner, "true")
|
||||
.map(|(scanner, span)| {
|
||||
(
|
||||
scanner,
|
||||
Token::new(TokenKind::Boolean, TokenValue::Boolean(true), span),
|
||||
)
|
||||
})
|
||||
.or_else(|_| {
|
||||
match_exact(scanner, "false").map(|(scanner, span)| {
|
||||
(
|
||||
scanner,
|
||||
Token::new(TokenKind::Boolean, TokenValue::Boolean(false), span),
|
||||
)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
pub fn match_string(scanner: Scanner<'_>) -> ScanResult<'_> {
|
||||
let (scanner, begin) = match_exact(scanner, r#"""#)?;
|
||||
|
||||
let mut contents = Span::at(scanner.pos);
|
||||
let mut loop_scanner = scanner;
|
||||
|
||||
while let Ok((scanner, span)) =
|
||||
match_exact(loop_scanner, r#"\""#).or_else(|_| match_one(loop_scanner, |c| c != '"'))
|
||||
{
|
||||
loop_scanner = scanner;
|
||||
contents = contents.extend(span);
|
||||
}
|
||||
|
||||
let (scanner, end) = match_exact(loop_scanner, r#"""#).map_err(|error| match error {
|
||||
Error::Unmatched => Error::unexpected(
|
||||
loop_scanner.pos.into(),
|
||||
loop_scanner.current_char(),
|
||||
vec!['"'],
|
||||
),
|
||||
error => error,
|
||||
})?;
|
||||
|
||||
let value = contents
|
||||
.extract(scanner.input)
|
||||
.ok_or_else(|| Error::internal(contents, "Unable to extract string contents"))
|
||||
.map(|s| s.replace(r#"\""#, r#"""#))?;
|
||||
|
||||
Ok((
|
||||
scanner,
|
||||
Token::new(
|
||||
TokenKind::String,
|
||||
TokenValue::String(value),
|
||||
begin.extend(end),
|
||||
),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn match_identifier(scanner: Scanner<'_>) -> ScanResult<'_> {
|
||||
let (scanner, start) = match_one(scanner, is_xid_start)?;
|
||||
let (scanner, end) = match_many0(scanner, is_xid_continue)?;
|
||||
|
||||
let span = start.extend(end);
|
||||
|
||||
let value = span
|
||||
.extract(scanner.input)
|
||||
.ok_or_else(|| Error::internal(span, "Unable to extract identifier contents"))
|
||||
.map(|s| s.nfc().collect::<String>())?;
|
||||
|
||||
Ok((
|
||||
scanner,
|
||||
Token::new(TokenKind::Identifier, TokenValue::Identifier(value), span),
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::span::Span;
|
||||
|
||||
#[test]
|
||||
fn skip_whitespace_always_moves_forward() {
|
||||
let scanner = Scanner::new(" 0");
|
||||
let scanner = skip_whitespace(scanner).unwrap();
|
||||
assert_eq!(scanner.pos, 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_integer_9_matches() {
|
||||
let scanner = Scanner::new("9");
|
||||
let (_, token) = match_integer(scanner).unwrap();
|
||||
assert_eq!(token.value, TokenValue::Integer(9));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_integer_0_matches() {
|
||||
let scanner = Scanner::new("0");
|
||||
let (scanner, token) = match_integer(scanner).unwrap();
|
||||
assert_eq!(scanner.remaining(), "");
|
||||
assert_eq!(token.value, TokenValue::Integer(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_integer_1234567890_matches() {
|
||||
let scanner = Scanner::new("1234567890");
|
||||
let (_, token) = match_integer(scanner).unwrap();
|
||||
assert_eq!(token.value, TokenValue::Integer(1234567890));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_float_0_0_matches() {
|
||||
let scanner = Scanner::new("0.0");
|
||||
let (_, token) = match_float(scanner).unwrap();
|
||||
assert_eq!(token.value, TokenValue::Float(0.0));
|
||||
}
|
||||
#[test]
|
||||
fn match_float_123_456_matches() {
|
||||
let scanner = Scanner::new("123.456");
|
||||
let (_, token) = match_float(scanner).unwrap();
|
||||
assert_eq!(token.value, TokenValue::Float(123.456));
|
||||
}
|
||||
#[test]
|
||||
fn match_float_abc_fails() {
|
||||
let scanner = Scanner::new("abc");
|
||||
assert!(match_float(scanner).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_terminal_matches() {
|
||||
let scanner = Scanner::new("(");
|
||||
let (_, token) = match_terminal(scanner, "(", TokenKind::LeftParen).unwrap();
|
||||
assert_eq!(token.kind, TokenKind::LeftParen);
|
||||
assert_eq!(token.span, Span::new(0, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_true() {
|
||||
let scanner = Scanner::new("true");
|
||||
let (_, token) = match_boolean(scanner).unwrap();
|
||||
assert_eq!(token.kind, TokenKind::Boolean);
|
||||
assert_eq!(token.value, TokenValue::Boolean(true));
|
||||
assert_eq!(token.span, Span::new(0, 4));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_false() {
|
||||
let scanner = Scanner::new("false");
|
||||
let (_, token) = match_boolean(scanner).unwrap();
|
||||
assert_eq!(token.kind, TokenKind::Boolean);
|
||||
assert_eq!(token.value, TokenValue::Boolean(false));
|
||||
assert_eq!(token.span, Span::new(0, 5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_string_simple() {
|
||||
let scanner = Scanner::new("\"Magical Sound Shower\"");
|
||||
let (_, token) = match_string(scanner).unwrap();
|
||||
assert_eq!(token.kind, TokenKind::String);
|
||||
assert_eq!(
|
||||
token.value,
|
||||
TokenValue::String("Magical Sound Shower".to_string())
|
||||
);
|
||||
assert_eq!(token.span, Span::new(0, 22));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_string_escaped() {
|
||||
let scanner = Scanner::new(r#""\"GUNSHIP\"""#);
|
||||
let (_, token) = match_string(scanner).unwrap();
|
||||
assert_eq!(token.kind, TokenKind::String);
|
||||
assert_eq!(token.value, TokenValue::String(r#""GUNSHIP""#.to_string()));
|
||||
assert_eq!(token.span, Span::new(0, 13));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_identifier_title_case() {
|
||||
let scanner = Scanner::new("DarkAllDay");
|
||||
let (_, token) = match_identifier(scanner).unwrap();
|
||||
assert_eq!(token.kind, TokenKind::Identifier);
|
||||
assert_eq!(
|
||||
token.value,
|
||||
TokenValue::Identifier("DarkAllDay".to_string())
|
||||
);
|
||||
assert_eq!(token.span, Span::new(0, 10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_identifier_snake_case() {
|
||||
let scanner = Scanner::new("dark_all_day");
|
||||
let (_, token) = match_identifier(scanner).unwrap();
|
||||
assert_eq!(token.kind, TokenKind::Identifier);
|
||||
assert_eq!(
|
||||
token.value,
|
||||
TokenValue::Identifier("dark_all_day".to_string())
|
||||
);
|
||||
assert_eq!(token.span, Span::new(0, 12));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_identifier_unicode() {
|
||||
let scanner = Scanner::new("國");
|
||||
let (_, token) = match_identifier(scanner).unwrap();
|
||||
assert_eq!(token.kind, TokenKind::Identifier);
|
||||
assert_eq!(token.value, TokenValue::Identifier("國".to_string()));
|
||||
assert_eq!(token.span, Span::new(0, 1));
|
||||
}
|
||||
}
|
120
outrun-lexer/src/span.rs
Normal file
120
outrun-lexer/src/span.rs
Normal file
|
@ -0,0 +1,120 @@
|
|||
#[derive(Clone, Debug, PartialEq, Copy)]
|
||||
pub struct Span {
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
pub fn at(pos: usize) -> Span {
|
||||
Span {
|
||||
start: pos,
|
||||
end: pos,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(start: usize, end: usize) -> Span {
|
||||
Span { start, end }
|
||||
}
|
||||
|
||||
pub fn extract<'a>(&'a self, input: &'a str) -> Option<&'a str> {
|
||||
let len = self.end - self.start;
|
||||
|
||||
if len == 0 {
|
||||
return Some("");
|
||||
}
|
||||
|
||||
let mut chars = input.char_indices();
|
||||
|
||||
let mut count = 0;
|
||||
while count < self.start {
|
||||
chars.next()?;
|
||||
count += 1;
|
||||
}
|
||||
|
||||
let (start, _) = chars.next()?;
|
||||
|
||||
count = 1;
|
||||
while count < len {
|
||||
chars.next()?;
|
||||
count += 1;
|
||||
}
|
||||
|
||||
chars
|
||||
.next()
|
||||
.and_then(|(end, _)| input.get(start..end))
|
||||
.or_else(|| input.get(start..))
|
||||
}
|
||||
|
||||
pub fn extend(self, other: Span) -> Span {
|
||||
let start = self.start.min(other.start);
|
||||
let end = self.end.max(other.end);
|
||||
Span { start, end }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<usize> for Span {
|
||||
fn from(pos: usize) -> Span {
|
||||
Span {
|
||||
start: pos,
|
||||
end: pos,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<(usize, usize)> for Span {
|
||||
fn from(span: (usize, usize)) -> Span {
|
||||
Span {
|
||||
start: span.0,
|
||||
end: span.1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn span_at() {
|
||||
let span = Span::at(13);
|
||||
|
||||
assert_eq!(span.start, 13);
|
||||
assert_eq!(span.end, 13);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn span_new() {
|
||||
let span = Span::new(13, 27);
|
||||
|
||||
assert_eq!(span.start, 13);
|
||||
assert_eq!(span.end, 27);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn span_extend() {
|
||||
let span0 = Span::new(13, 27);
|
||||
let span1 = Span::new(3, 19);
|
||||
|
||||
let span = span0.extend(span1);
|
||||
assert_eq!(span.start, 3);
|
||||
assert_eq!(span.end, 27);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn span_extract() {
|
||||
let buffer = "Passing Breeze";
|
||||
assert_eq!(Span::new(3, 3).extract(buffer).unwrap(), "");
|
||||
assert_eq!(Span::new(3, 4).extract(buffer).unwrap(), "s");
|
||||
assert_eq!(Span::new(3, 10).extract(buffer).unwrap(), "sing Br");
|
||||
assert_eq!(Span::at(0).extract(buffer).unwrap(), "");
|
||||
assert_eq!(Span::new(0, 14).extract(buffer).unwrap(), "Passing Breeze");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_indexing() {
|
||||
let s = "abcdef 12345";
|
||||
assert_eq!(&s[0..=0], "a");
|
||||
assert_eq!(&s[3..4], "d");
|
||||
assert_eq!(&s[3..=4], "de");
|
||||
}
|
||||
}
|
74
outrun-lexer/src/token.rs
Normal file
74
outrun-lexer/src/token.rs
Normal file
|
@ -0,0 +1,74 @@
|
|||
use crate::span::Span;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum TokenKind {
|
||||
// Literals
|
||||
Boolean,
|
||||
Float,
|
||||
Identifier,
|
||||
Integer,
|
||||
String,
|
||||
|
||||
// Symbols
|
||||
And,
|
||||
AndAnd,
|
||||
Bang,
|
||||
BangEq,
|
||||
Colon,
|
||||
Comma,
|
||||
Dot,
|
||||
Eq,
|
||||
EqEq,
|
||||
ForwardSlash,
|
||||
Gt,
|
||||
GtEq,
|
||||
GtGt,
|
||||
LeftBrace,
|
||||
LeftParen,
|
||||
LeftSquare,
|
||||
Lt,
|
||||
LtEq,
|
||||
LtLt,
|
||||
Minus,
|
||||
Percent,
|
||||
Pipe,
|
||||
PipePipe,
|
||||
Plus,
|
||||
RightBrace,
|
||||
RightParen,
|
||||
RightSquare,
|
||||
Star,
|
||||
|
||||
// Keywords
|
||||
KeywordDef,
|
||||
KeywordDefPrivate,
|
||||
KeywordEnd,
|
||||
KeywordImpl,
|
||||
KeywordProtocol,
|
||||
KeywordType,
|
||||
KeywordUse,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum TokenValue {
|
||||
Constant(Vec<TokenValue>),
|
||||
Identifier(String),
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
Boolean(bool),
|
||||
String(String),
|
||||
None,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Token {
|
||||
pub kind: TokenKind,
|
||||
pub value: TokenValue,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn new(kind: TokenKind, value: TokenValue, span: Span) -> Token {
|
||||
Token { kind, value, span }
|
||||
}
|
||||
}
|
8
outrun-parser/Cargo.toml
Normal file
8
outrun-parser/Cargo.toml
Normal file
|
@ -0,0 +1,8 @@
|
|||
[package]
|
||||
name = "outrun-parser"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
1
outrun-parser/src/lib.rs
Normal file
1
outrun-parser/src/lib.rs
Normal file
|
@ -0,0 +1 @@
|
|||
|
9
outrun/Cargo.toml
Normal file
9
outrun/Cargo.toml
Normal file
|
@ -0,0 +1,9 @@
|
|||
[package]
|
||||
edition = "2021"
|
||||
name = "outrun"
|
||||
version = "0.1.0"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
outrun-lexer = {path = "../outrun-lexer"}
|
30
outrun/src/main.rs
Normal file
30
outrun/src/main.rs
Normal file
|
@ -0,0 +1,30 @@
|
|||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
|
||||
extern crate outrun_lexer;
|
||||
use outrun_lexer::Scanner;
|
||||
|
||||
fn main() -> std::io::Result<()> {
|
||||
let mut file = File::open("test.or")?;
|
||||
let mut source = String::new();
|
||||
file.read_to_string(&mut source)?;
|
||||
|
||||
let mut scanner = Scanner::new(&source);
|
||||
|
||||
loop {
|
||||
match scanner.next() {
|
||||
Ok((new_scanner, token)) => {
|
||||
println!("token: {:?}", token);
|
||||
scanner = new_scanner;
|
||||
}
|
||||
Err(error) => {
|
||||
println!("error: {:?}", error);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!("Scanner: {:?}", scanner);
|
||||
|
||||
Ok(())
|
||||
}
|
Loading…
Reference in a new issue