feat: working scanner for common tokens.

This is by no means complete - or even that smart, but it's enough to get started with.
This commit is contained in:
James Harton 2022-07-15 21:27:25 +12:00
parent d755f6ab3a
commit 0aeabb0af3
17 changed files with 1028 additions and 11 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
target

View file

@ -1,13 +1,45 @@
# You can override the included template(s) by including variable overrides
# SAST customization: https://docs.gitlab.com/ee/user/application_security/sast/#customizing-the-sast-settings
# Secret Detection customization: https://docs.gitlab.com/ee/user/application_security/secret_detection/#customizing-settings
# Dependency Scanning customization: https://docs.gitlab.com/ee/user/application_security/dependency_scanning/#customizing-the-dependency-scanning-settings
# Container Scanning customization: https://docs.gitlab.com/ee/user/application_security/container_scanning/#customizing-the-container-scanning-settings
# Note that environment variables can be set in several places
# See https://docs.gitlab.com/ee/ci/variables/#cicd-variable-precedence
image: rust:latest
stages:
- test
sast:
- dependencies
- test
- release
build:
stage: dependencies
cache:
key: "$CI_JOB_NAME"
paths:
- $HOME/.cargo
artifacts:
paths:
- $HOME/.cargo
expire_in: 60 mins
script:
- cargo test --no-run --release
cargo test:
stage: test
include:
- template: Security/SAST.gitlab-ci.yml
dependencies:
- build
script:
- rustc --version && cargo --version
- cargo test --jobs 1 --release
cargo clippy:
stage: test
dependencies:
- build
before_script:
- rustup component add clippy
script:
- cargo clippy --all-targets --all-features -- -D warnings
# cargo publish:
# stage: release
# dependencies:
# - build
# only:
# - /^v[0-9]+\.[0-9]+\.[0-9]+$/
# except:
# - branches
# script:
# - cargo publish --token $CARGO_TOKEN

59
Cargo.lock generated Normal file
View file

@ -0,0 +1,59 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "outrun"
version = "0.1.0"
dependencies = [
"outrun-lexer",
]
[[package]]
name = "outrun-lexer"
version = "0.1.0"
dependencies = [
"unicode-general-category",
"unicode-ident",
"unicode-normalization",
]
[[package]]
name = "outrun-parser"
version = "0.1.0"
[[package]]
name = "tinyvec"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "unicode-general-category"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1218098468b8085b19a2824104c70d976491d247ce194bbd9dc77181150cdfd6"
[[package]]
name = "unicode-ident"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7"
[[package]]
name = "unicode-normalization"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "854cbdc4f7bc6ae19c820d44abdc3277ac3e1b2b93db20a636825d9322fb60e6"
dependencies = [
"tinyvec",
]

7
Cargo.toml Normal file
View file

@ -0,0 +1,7 @@
[workspace]
members = [
"outrun-parser",
"outrun-lexer",
"outrun",
]

11
outrun-lexer/Cargo.toml Normal file
View file

@ -0,0 +1,11 @@
[package]
edition = "2021"
name = "outrun-lexer"
version = "0.1.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
unicode-general-category = "0.5.1"
unicode-ident = "1.0.2"
unicode-normalization = "0.1.21"

40
outrun-lexer/src/error.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::span::Span;
#[derive(Debug)]
pub enum Error {
UnexpectedEof {
span: Span,
},
Internal {
span: Span,
message: String,
},
UnexpectedChar {
span: Span,
actual: Option<char>,
expected: Vec<char>,
},
Unmatched,
Eof,
}
impl Error {
pub fn unexpected_eof(span: Span) -> Error {
Error::UnexpectedEof { span }
}
pub fn internal<T: ToString>(span: Span, message: T) -> Error {
Error::Internal {
span,
message: message.to_string(),
}
}
pub fn unexpected(span: Span, actual: Option<char>, expected: Vec<char>) -> Error {
Error::UnexpectedChar {
span,
actual,
expected,
}
}
}

8
outrun-lexer/src/lib.rs Normal file
View file

@ -0,0 +1,8 @@
mod error;
mod scanner;
mod span;
mod token;
pub use scanner::Scanner;
pub use span::Span;
pub use token::{Token, TokenKind, TokenValue};

View file

@ -0,0 +1,15 @@
use unicode_general_category::{get_general_category, GeneralCategory};
pub fn is_numeric(chr: char) -> bool {
matches!(chr, '0'..='9')
}
pub fn is_whitespace(chr: char) -> bool {
matches!(chr, ' ' | '\t' | '\n' | '\r')
|| matches!(
get_general_category(chr),
GeneralCategory::SpaceSeparator
| GeneralCategory::LineSeparator
| GeneralCategory::ParagraphSeparator
)
}

View file

@ -0,0 +1,147 @@
use crate::error::Error;
use crate::span::Span;
use crate::Scanner;
pub fn match_one<F>(scanner: Scanner<'_>, matcher: F) -> Result<(Scanner<'_>, Span), Error>
where
F: Fn(char) -> bool,
{
let chr = scanner.current_char().ok_or(Error::Unmatched)?;
if matcher(chr) {
let new_scanner = scanner.advance();
return Ok((new_scanner, Span::new(scanner.pos, new_scanner.pos)));
}
Err(Error::Unmatched)
}
pub fn match_many0<F>(scanner: Scanner<'_>, matcher: F) -> Result<(Scanner<'_>, Span), Error>
where
F: Fn(char) -> bool,
{
let mut new_scanner = scanner;
while let Some(chr) = new_scanner.current_char() {
if matcher(chr) {
new_scanner = new_scanner.advance();
} else {
break;
}
}
if new_scanner.pos != scanner.pos {
Ok((new_scanner, (scanner.pos, new_scanner.pos).into()))
} else {
Ok((scanner, scanner.pos.into()))
}
}
pub fn match_many1<F>(scanner: Scanner<'_>, matcher: F) -> Result<(Scanner<'_>, Span), Error>
where
F: Fn(char) -> bool,
{
let (scanner, head) = match_one(scanner, &matcher)?;
let (scanner, tail) = match_many0(scanner, &matcher)?;
Ok((scanner, head.extend(tail)))
}
pub fn match_exact<'a>(scanner: Scanner<'a>, pattern: &str) -> Result<(Scanner<'a>, Span), Error> {
let mut pattern_chars = pattern.chars();
let mut new_scanner = scanner;
for _ in 0..pattern.chars().count() {
let p = pattern_chars.next().ok_or(Error::Unmatched)?;
let s = new_scanner.current_char().ok_or(Error::Unmatched)?;
if p == s {
new_scanner = new_scanner.advance();
} else {
return Err(Error::Unmatched);
}
}
Ok((new_scanner, Span::new(scanner.pos, new_scanner.pos)))
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn match_one_on_success() {
let initial_scanner = Scanner::new("123");
let (finish_scanner, span) = match_one(initial_scanner, |c| c == '1').unwrap();
assert_eq!(finish_scanner.remaining(), "23");
assert_eq!(span.start, 0);
assert_eq!(span.end, 1);
assert_eq!(finish_scanner.pos, 1);
}
#[test]
fn match_one_on_failure() {
let initial_scanner = Scanner::new("123");
let result = match_one(initial_scanner, |c| c == '2');
assert!(result.is_err());
}
#[test]
fn match_many0_matches_none() {
let initial_scanner = Scanner::new("123");
let (finish_scanner, span) = match_many0(initial_scanner, |c| c == '4').unwrap();
assert_eq!(finish_scanner, initial_scanner);
assert_eq!(finish_scanner.remaining(), "123");
assert_eq!(span.start, 0);
assert_eq!(span.end, 0);
}
#[test]
fn match_many0_matches_one() {
let initial_scanner = Scanner::new("123");
let (finish_scanner, span) = match_many0(initial_scanner, |c| c == '1').unwrap();
assert_eq!(finish_scanner.remaining(), "23");
assert_eq!(span.start, 0);
assert_eq!(span.end, 1);
}
#[test]
fn match_many0_matches_many() {
let initial_scanner = Scanner::new("11123");
let (finish_scanner, span) = match_many0(initial_scanner, |c| c == '1').unwrap();
assert_eq!(finish_scanner.remaining(), "23");
assert_eq!(span.start, 0);
assert_eq!(span.end, 3);
}
#[test]
fn match_many1_doesnt_match_none() {
let initial_scanner = Scanner::new("23");
let result = match_many1(initial_scanner, |c| c == '1');
assert!(result.is_err());
}
#[test]
fn match_many1_matches_one() {
let initial_scanner = Scanner::new("123");
let (finish_scanner, span) = match_many1(initial_scanner, |c| c == '1').unwrap();
assert_eq!(finish_scanner.remaining(), "23");
assert_eq!(span.start, 0);
assert_eq!(span.end, 1);
}
#[test]
fn match_many1_matches_many() {
let initial_scanner = Scanner::new("11123");
let (finish_scanner, span) = match_many1(initial_scanner, |c| c == '1').unwrap();
assert_eq!(finish_scanner.remaining(), "23");
assert_eq!(span.start, 0);
assert_eq!(span.end, 3);
}
#[test]
fn match_exact_matches_exactly() {
let initial_scanner = Scanner::new("abcdef 12345");
let (finish_scanner, span) = match_exact(initial_scanner, "abcdef").unwrap();
assert_eq!(finish_scanner.remaining(), " 12345");
assert_eq!(span.start, 0);
assert_eq!(span.end, 6)
}
}

View file

@ -0,0 +1,197 @@
use crate::error::Error;
use crate::token::{Token, TokenKind};
mod classes;
mod combinators;
mod rules;
use rules::*;
pub type ScanResult<'a> = std::result::Result<(Scanner<'a>, Token), Error>;
type Result<T> = std::result::Result<T, Error>;
#[derive(Clone, Debug, PartialEq, Copy)]
pub struct Scanner<'a> {
input: &'a str,
pos: usize,
len: usize,
}
impl<'a> Scanner<'a> {
pub fn new(input: &'a str) -> Self {
let len = input.chars().count();
Scanner { input, len, pos: 0 }
}
pub fn next(self) -> ScanResult<'a> {
self.eof()?;
let scanner = skip_whitespace(self)?;
scanner.eof()?;
match_float(scanner)
.or_else(|_| match_integer(scanner))
.or_else(|_| match_boolean(scanner))
.or_else(|_| match_string(scanner))
.or_else(|_| match_terminal(scanner, "use", TokenKind::KeywordUse))
.or_else(|_| match_terminal(scanner, "type", TokenKind::KeywordType))
.or_else(|_| match_terminal(scanner, "protocol", TokenKind::KeywordProtocol))
.or_else(|_| match_terminal(scanner, "end", TokenKind::KeywordEnd))
.or_else(|_| match_terminal(scanner, "impl", TokenKind::KeywordImpl))
.or_else(|_| match_terminal(scanner, "defp", TokenKind::KeywordDefPrivate))
.or_else(|_| match_terminal(scanner, "def", TokenKind::KeywordDef))
.or_else(|_| match_identifier(scanner))
.or_else(|_| match_terminal(scanner, "(", TokenKind::LeftParen))
.or_else(|_| match_terminal(scanner, ")", TokenKind::RightParen))
.or_else(|_| match_terminal(scanner, "[", TokenKind::LeftSquare))
.or_else(|_| match_terminal(scanner, "]", TokenKind::RightSquare))
.or_else(|_| match_terminal(scanner, "{", TokenKind::LeftBrace))
.or_else(|_| match_terminal(scanner, "}", TokenKind::RightBrace))
.or_else(|_| match_terminal(scanner, "+", TokenKind::Plus))
.or_else(|_| match_terminal(scanner, "-", TokenKind::Minus))
.or_else(|_| match_terminal(scanner, "*", TokenKind::Star))
.or_else(|_| match_terminal(scanner, "/", TokenKind::ForwardSlash))
.or_else(|_| match_terminal(scanner, "%", TokenKind::Percent))
.or_else(|_| match_terminal(scanner, ":", TokenKind::Colon))
.or_else(|_| match_terminal(scanner, ",", TokenKind::Comma))
.or_else(|_| match_terminal(scanner, ".", TokenKind::Dot))
.or_else(|_| match_terminal(scanner, ">>", TokenKind::GtGt))
.or_else(|_| match_terminal(scanner, ">=", TokenKind::GtEq))
.or_else(|_| match_terminal(scanner, ">", TokenKind::Gt))
.or_else(|_| match_terminal(scanner, "<<", TokenKind::LtLt))
.or_else(|_| match_terminal(scanner, "<=", TokenKind::LtEq))
.or_else(|_| match_terminal(scanner, "<", TokenKind::Lt))
.or_else(|_| match_terminal(scanner, "==", TokenKind::EqEq))
.or_else(|_| match_terminal(scanner, "=", TokenKind::Eq))
.or_else(|_| match_terminal(scanner, "&&", TokenKind::AndAnd))
.or_else(|_| match_terminal(scanner, "&", TokenKind::And))
.or_else(|_| match_terminal(scanner, "|", TokenKind::Pipe))
.or_else(|_| match_terminal(scanner, "||", TokenKind::PipePipe))
.or_else(|_| match_terminal(scanner, "!=", TokenKind::BangEq))
.or_else(|_| match_terminal(scanner, "!", TokenKind::Bang))
.map_err(|error| match error {
Error::Unmatched => {
Error::unexpected(scanner.pos.into(), scanner.current_char(), Vec::new())
}
error => error,
})
}
pub fn is_at_end(&self) -> bool {
self.pos >= self.len
}
pub fn advance(self) -> Self {
Scanner {
input: self.input,
pos: self.pos + 1,
len: self.len,
}
}
pub fn remaining(&self) -> &str {
if self.pos < self.len {
let (index, _) = self.input.char_indices().nth(self.pos).unwrap();
&self.input[index..]
} else {
""
}
}
pub fn eof(&self) -> Result<()> {
if self.is_at_end() {
Err(Error::Eof)
} else {
Ok(())
}
}
pub fn next_char(&self) -> Option<char> {
self.input.chars().nth(self.pos + 1)
}
pub fn current_char(&self) -> Option<char> {
self.input.chars().nth(self.pos)
}
pub fn current_char_or_error(&self) -> Result<char> {
self.input.chars().nth(self.pos).ok_or_else(|| {
Error::internal(
self.pos.into(),
format!(
"Internal error while retrieving character at position {}",
self.pos
),
)
})
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::span::Span;
use crate::token::TokenValue;
fn scan(input: &str) -> ScanResult<'_> {
let scanner = Scanner::new(input);
scanner.next()
}
#[test]
fn scanner_new_is_at_zero() {
let scanner = Scanner::new("1");
assert_eq!(scanner.pos, 0);
}
#[test]
fn scanner_next_with_empty_input() {
assert!(matches!(scan(""), Err(Error::Eof)));
}
#[test]
fn scanner_next_with_whitespace() {
assert!(matches!(scan(" "), Err(Error::Eof)));
}
#[test]
fn scanner_with_integer_input() {
let (_, token) = scan("1").expect("We should be able to lex integers");
assert_eq!(token.value, TokenValue::Integer(1));
assert_eq!(token.span, Span::new(0, 1));
}
#[test]
fn scanner_with_long_integer_input() {
let (_, token) = scan("1234567890").expect("We should be able to lex integers");
assert_eq!(token.value, TokenValue::Integer(1234567890));
assert_eq!(token.span, Span::new(0, 10));
}
#[test]
fn scanner_with_float_input() {
let (_, token) = scan("0.0").expect("We should be able to lex floats");
assert_eq!(token.value, TokenValue::Float(0.0));
assert_eq!(token.span, Span::new(0, 3));
}
#[test]
fn scanner_exhausts_input() {
let (scanner, _) = scan("1").unwrap();
assert!(
scanner.next().is_err(),
"Should not be able to read past the end of input"
);
}
#[test]
fn scanner_skips_leading_whitespace() {
let (_, token) = scan(" 1").unwrap();
assert_eq!(token.value, TokenValue::Integer(1));
assert_eq!(token.span, Span::new(5, 6));
}
#[test]
fn scanner_parses_terminals() {
let (_, token) = scan("(").unwrap();
assert_eq!(token.kind, TokenKind::LeftParen);
}
}

View file

@ -0,0 +1,258 @@
use crate::error::Error;
use crate::scanner::classes::*;
use crate::scanner::combinators::*;
use crate::scanner::{ScanResult, Scanner};
use crate::span::Span;
use crate::token::{Token, TokenKind, TokenValue};
use unicode_ident::{is_xid_continue, is_xid_start};
use unicode_normalization::UnicodeNormalization;
pub fn skip_whitespace(scanner: Scanner<'_>) -> Result<Scanner<'_>, Error> {
let (scanner, _) = match_many0(scanner, is_whitespace)?;
Ok(scanner)
}
pub fn match_integer(scanner: Scanner<'_>) -> ScanResult<'_> {
let (scanner, span) = match_many1(scanner, is_numeric)?;
let value = span
.extract(scanner.input)
.ok_or_else(|| Error::internal(span, "Unable to retrieve value from input"))?;
let value = value.parse::<i64>().map_err(|e| {
Error::internal(
span,
format!("Unable to parse {:?} as integer: {:?}", value, e),
)
})?;
let token = Token::new(TokenKind::Integer, TokenValue::Integer(value), span);
Ok((scanner, token))
}
pub fn match_float(scanner: Scanner<'_>) -> ScanResult<'_> {
let (scanner, head) = match_many1(scanner, is_numeric)?;
let (scanner, _) = match_one(scanner, |c| c == '.')?;
let (scanner, tail) = match_many1(scanner, is_numeric)?;
let span = head.extend(tail);
let value = span
.extract(scanner.input)
.expect("Span out of input range!");
let value = value.parse::<f64>().map_err(|e| {
Error::internal(
span,
format!("Unable to parse {:?} as float: {:?}", value, e),
)
})?;
let token = Token::new(TokenKind::Float, TokenValue::Float(value), span);
Ok((scanner, token))
}
pub fn match_terminal<'a>(scanner: Scanner<'a>, pattern: &str, kind: TokenKind) -> ScanResult<'a> {
match_exact(scanner, pattern)
.map(|(scanner, span)| (scanner, Token::new(kind, TokenValue::None, span)))
}
pub fn match_boolean(scanner: Scanner<'_>) -> ScanResult<'_> {
match_exact(scanner, "true")
.map(|(scanner, span)| {
(
scanner,
Token::new(TokenKind::Boolean, TokenValue::Boolean(true), span),
)
})
.or_else(|_| {
match_exact(scanner, "false").map(|(scanner, span)| {
(
scanner,
Token::new(TokenKind::Boolean, TokenValue::Boolean(false), span),
)
})
})
}
pub fn match_string(scanner: Scanner<'_>) -> ScanResult<'_> {
let (scanner, begin) = match_exact(scanner, r#"""#)?;
let mut contents = Span::at(scanner.pos);
let mut loop_scanner = scanner;
while let Ok((scanner, span)) =
match_exact(loop_scanner, r#"\""#).or_else(|_| match_one(loop_scanner, |c| c != '"'))
{
loop_scanner = scanner;
contents = contents.extend(span);
}
let (scanner, end) = match_exact(loop_scanner, r#"""#).map_err(|error| match error {
Error::Unmatched => Error::unexpected(
loop_scanner.pos.into(),
loop_scanner.current_char(),
vec!['"'],
),
error => error,
})?;
let value = contents
.extract(scanner.input)
.ok_or_else(|| Error::internal(contents, "Unable to extract string contents"))
.map(|s| s.replace(r#"\""#, r#"""#))?;
Ok((
scanner,
Token::new(
TokenKind::String,
TokenValue::String(value),
begin.extend(end),
),
))
}
pub fn match_identifier(scanner: Scanner<'_>) -> ScanResult<'_> {
let (scanner, start) = match_one(scanner, is_xid_start)?;
let (scanner, end) = match_many0(scanner, is_xid_continue)?;
let span = start.extend(end);
let value = span
.extract(scanner.input)
.ok_or_else(|| Error::internal(span, "Unable to extract identifier contents"))
.map(|s| s.nfc().collect::<String>())?;
Ok((
scanner,
Token::new(TokenKind::Identifier, TokenValue::Identifier(value), span),
))
}
#[cfg(test)]
mod test {
use super::*;
use crate::span::Span;
#[test]
fn skip_whitespace_always_moves_forward() {
let scanner = Scanner::new(" 0");
let scanner = skip_whitespace(scanner).unwrap();
assert_eq!(scanner.pos, 7);
}
#[test]
fn match_integer_9_matches() {
let scanner = Scanner::new("9");
let (_, token) = match_integer(scanner).unwrap();
assert_eq!(token.value, TokenValue::Integer(9));
}
#[test]
fn match_integer_0_matches() {
let scanner = Scanner::new("0");
let (scanner, token) = match_integer(scanner).unwrap();
assert_eq!(scanner.remaining(), "");
assert_eq!(token.value, TokenValue::Integer(0));
}
#[test]
fn match_integer_1234567890_matches() {
let scanner = Scanner::new("1234567890");
let (_, token) = match_integer(scanner).unwrap();
assert_eq!(token.value, TokenValue::Integer(1234567890));
}
#[test]
fn match_float_0_0_matches() {
let scanner = Scanner::new("0.0");
let (_, token) = match_float(scanner).unwrap();
assert_eq!(token.value, TokenValue::Float(0.0));
}
#[test]
fn match_float_123_456_matches() {
let scanner = Scanner::new("123.456");
let (_, token) = match_float(scanner).unwrap();
assert_eq!(token.value, TokenValue::Float(123.456));
}
#[test]
fn match_float_abc_fails() {
let scanner = Scanner::new("abc");
assert!(match_float(scanner).is_err());
}
#[test]
fn match_terminal_matches() {
let scanner = Scanner::new("(");
let (_, token) = match_terminal(scanner, "(", TokenKind::LeftParen).unwrap();
assert_eq!(token.kind, TokenKind::LeftParen);
assert_eq!(token.span, Span::new(0, 1));
}
#[test]
fn match_true() {
let scanner = Scanner::new("true");
let (_, token) = match_boolean(scanner).unwrap();
assert_eq!(token.kind, TokenKind::Boolean);
assert_eq!(token.value, TokenValue::Boolean(true));
assert_eq!(token.span, Span::new(0, 4));
}
#[test]
fn match_false() {
let scanner = Scanner::new("false");
let (_, token) = match_boolean(scanner).unwrap();
assert_eq!(token.kind, TokenKind::Boolean);
assert_eq!(token.value, TokenValue::Boolean(false));
assert_eq!(token.span, Span::new(0, 5));
}
#[test]
fn match_string_simple() {
let scanner = Scanner::new("\"Magical Sound Shower\"");
let (_, token) = match_string(scanner).unwrap();
assert_eq!(token.kind, TokenKind::String);
assert_eq!(
token.value,
TokenValue::String("Magical Sound Shower".to_string())
);
assert_eq!(token.span, Span::new(0, 22));
}
#[test]
fn match_string_escaped() {
let scanner = Scanner::new(r#""\"GUNSHIP\"""#);
let (_, token) = match_string(scanner).unwrap();
assert_eq!(token.kind, TokenKind::String);
assert_eq!(token.value, TokenValue::String(r#""GUNSHIP""#.to_string()));
assert_eq!(token.span, Span::new(0, 13));
}
#[test]
fn match_identifier_title_case() {
let scanner = Scanner::new("DarkAllDay");
let (_, token) = match_identifier(scanner).unwrap();
assert_eq!(token.kind, TokenKind::Identifier);
assert_eq!(
token.value,
TokenValue::Identifier("DarkAllDay".to_string())
);
assert_eq!(token.span, Span::new(0, 10));
}
#[test]
fn match_identifier_snake_case() {
let scanner = Scanner::new("dark_all_day");
let (_, token) = match_identifier(scanner).unwrap();
assert_eq!(token.kind, TokenKind::Identifier);
assert_eq!(
token.value,
TokenValue::Identifier("dark_all_day".to_string())
);
assert_eq!(token.span, Span::new(0, 12));
}
#[test]
fn match_identifier_unicode() {
let scanner = Scanner::new("");
let (_, token) = match_identifier(scanner).unwrap();
assert_eq!(token.kind, TokenKind::Identifier);
assert_eq!(token.value, TokenValue::Identifier("".to_string()));
assert_eq!(token.span, Span::new(0, 1));
}
}

120
outrun-lexer/src/span.rs Normal file
View file

@ -0,0 +1,120 @@
#[derive(Clone, Debug, PartialEq, Copy)]
pub struct Span {
pub start: usize,
pub end: usize,
}
impl Span {
pub fn at(pos: usize) -> Span {
Span {
start: pos,
end: pos,
}
}
pub fn new(start: usize, end: usize) -> Span {
Span { start, end }
}
pub fn extract<'a>(&'a self, input: &'a str) -> Option<&'a str> {
let len = self.end - self.start;
if len == 0 {
return Some("");
}
let mut chars = input.char_indices();
let mut count = 0;
while count < self.start {
chars.next()?;
count += 1;
}
let (start, _) = chars.next()?;
count = 1;
while count < len {
chars.next()?;
count += 1;
}
chars
.next()
.and_then(|(end, _)| input.get(start..end))
.or_else(|| input.get(start..))
}
pub fn extend(self, other: Span) -> Span {
let start = self.start.min(other.start);
let end = self.end.max(other.end);
Span { start, end }
}
}
impl From<usize> for Span {
fn from(pos: usize) -> Span {
Span {
start: pos,
end: pos,
}
}
}
impl From<(usize, usize)> for Span {
fn from(span: (usize, usize)) -> Span {
Span {
start: span.0,
end: span.1,
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn span_at() {
let span = Span::at(13);
assert_eq!(span.start, 13);
assert_eq!(span.end, 13);
}
#[test]
fn span_new() {
let span = Span::new(13, 27);
assert_eq!(span.start, 13);
assert_eq!(span.end, 27);
}
#[test]
fn span_extend() {
let span0 = Span::new(13, 27);
let span1 = Span::new(3, 19);
let span = span0.extend(span1);
assert_eq!(span.start, 3);
assert_eq!(span.end, 27);
}
#[test]
fn span_extract() {
let buffer = "Passing Breeze";
assert_eq!(Span::new(3, 3).extract(buffer).unwrap(), "");
assert_eq!(Span::new(3, 4).extract(buffer).unwrap(), "s");
assert_eq!(Span::new(3, 10).extract(buffer).unwrap(), "sing Br");
assert_eq!(Span::at(0).extract(buffer).unwrap(), "");
assert_eq!(Span::new(0, 14).extract(buffer).unwrap(), "Passing Breeze");
}
#[test]
fn test_string_indexing() {
let s = "abcdef 12345";
assert_eq!(&s[0..=0], "a");
assert_eq!(&s[3..4], "d");
assert_eq!(&s[3..=4], "de");
}
}

74
outrun-lexer/src/token.rs Normal file
View file

@ -0,0 +1,74 @@
use crate::span::Span;
#[derive(Clone, Debug, PartialEq)]
pub enum TokenKind {
// Literals
Boolean,
Float,
Identifier,
Integer,
String,
// Symbols
And,
AndAnd,
Bang,
BangEq,
Colon,
Comma,
Dot,
Eq,
EqEq,
ForwardSlash,
Gt,
GtEq,
GtGt,
LeftBrace,
LeftParen,
LeftSquare,
Lt,
LtEq,
LtLt,
Minus,
Percent,
Pipe,
PipePipe,
Plus,
RightBrace,
RightParen,
RightSquare,
Star,
// Keywords
KeywordDef,
KeywordDefPrivate,
KeywordEnd,
KeywordImpl,
KeywordProtocol,
KeywordType,
KeywordUse,
}
#[derive(Clone, Debug, PartialEq)]
pub enum TokenValue {
Constant(Vec<TokenValue>),
Identifier(String),
Integer(i64),
Float(f64),
Boolean(bool),
String(String),
None,
}
#[derive(Clone, Debug, PartialEq)]
pub struct Token {
pub kind: TokenKind,
pub value: TokenValue,
pub span: Span,
}
impl Token {
pub fn new(kind: TokenKind, value: TokenValue, span: Span) -> Token {
Token { kind, value, span }
}
}

8
outrun-parser/Cargo.toml Normal file
View file

@ -0,0 +1,8 @@
[package]
name = "outrun-parser"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

1
outrun-parser/src/lib.rs Normal file
View file

@ -0,0 +1 @@

9
outrun/Cargo.toml Normal file
View file

@ -0,0 +1,9 @@
[package]
edition = "2021"
name = "outrun"
version = "0.1.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
outrun-lexer = {path = "../outrun-lexer"}

30
outrun/src/main.rs Normal file
View file

@ -0,0 +1,30 @@
use std::fs::File;
use std::io::Read;
extern crate outrun_lexer;
use outrun_lexer::Scanner;
fn main() -> std::io::Result<()> {
let mut file = File::open("test.or")?;
let mut source = String::new();
file.read_to_string(&mut source)?;
let mut scanner = Scanner::new(&source);
loop {
match scanner.next() {
Ok((new_scanner, token)) => {
println!("token: {:?}", token);
scanner = new_scanner;
}
Err(error) => {
println!("error: {:?}", error);
break;
}
}
}
println!("Scanner: {:?}", scanner);
Ok(())
}