feat: convert IR into bytecode. #27

Closed
jimsy wants to merge 2 commits from bytecode into main
14 changed files with 555 additions and 39 deletions

8
Cargo.lock generated
View file

@ -257,6 +257,13 @@ dependencies = [
"thiserror",
]
[[package]]
name = "outrun-bytecode"
version = "0.1.0"
dependencies = [
"outrun-common",
]
[[package]]
name = "outrun-common"
version = "0.1.0"
@ -272,6 +279,7 @@ dependencies = [
"heck",
"lazy_static",
"miette",
"outrun-bytecode",
"outrun-common",
"pest",
"pest_derive",

View file

@ -1,6 +1,7 @@
[workspace]
members = [
"outrun-bytecode",
"outrun-common",
"outrun-compiler",
"outrun-core",

View file

@ -0,0 +1,9 @@
[package]
edition = "2021"
name = "outrun-bytecode"
version = "0.1.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
outrun-common = {path = "../outrun-common"}

132
outrun-bytecode/src/lib.rs Normal file
View file

@ -0,0 +1,132 @@
use outrun_common::{IndexedVec, IndexedVecKey, UniqueVec, UniqueVecKey};
#[derive(Debug, Clone)]
pub struct Module {
pub location: Option<SourceLocation>,
pub associated_type: UniqueVecKey<Type>,
pub name: String,
pub types: UniqueVec<Type>,
pub strings: UniqueVec<String>,
pub functions: IndexedVec<Function>,
pub blocks: IndexedVec<Block>,
}
#[derive(Debug, Clone)]
pub struct SourceLocation {
pub file: String,
pub start: u32,
pub end: u32,
}
#[derive(Debug, Clone)]
pub struct Function {
pub name: String,
pub arguments: Vec<(UniqueVecKey<String>, UniqueVecKey<Type>)>,
pub return_type: UniqueVecKey<Type>,
pub entry: Option<IndexedVecKey<Block>>,
pub access: Access,
}
#[derive(Debug, Clone)]
pub enum Access {
Private,
Public,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Type {
Atom,
Boolean,
Integer,
Float,
String,
Array,
Map,
Struct(
UniqueVecKey<String>,
Vec<(UniqueVecKey<String>, UniqueVecKey<Type>)>,
),
Protocol(Option<UniqueVecKey<String>>, Vec<UniqueVecKey<Type>>),
External(UniqueVecKey<String>),
Impl(UniqueVecKey<Type>, UniqueVecKey<Type>),
}
#[derive(Debug, Clone)]
pub struct Block {
pub arguments: Vec<UniqueVecKey<Type>>,
pub instructions: IndexedVec<Instruction>,
}
impl Block {
pub fn is_terminated(&self) -> bool {
self.instructions
.as_slice()
.last()
.map(Instruction::is_terminal)
.unwrap_or(false)
}
pub fn push(&mut self, instruction: Instruction) -> IndexedVecKey<Instruction> {
self.instructions.insert(instruction)
}
}
impl Default for Block {
fn default() -> Self {
Self {
arguments: Vec::new(),
instructions: IndexedVec::new(),
}
}
}
#[derive(Debug, Clone)]
pub enum Instruction {
/// Construct a struct instance of type by popping `n` elements off the stack and pushing the result.
Construct(UniqueVecKey<Type>, usize),
/// Create an array by popping `n` elements off the stack and pushing the result.
CreateArray(usize),
/// Create a map by popping `n` elements off the stack and pushing the result.
CreateMap(usize),
/// Retrieve a value from a variable and push it onto the stack.
GetLocal(UniqueVecKey<String>),
/// Pops a value off of the stack, indexes it, and pushes the result onto the stack.
Index(UniqueVecKey<String>),
/// Pops the receiver and arguments off the stack, and calls the receiver, pushing the return value onto the stack.
LocalCall(usize),
/// Pushes an atom literal onto the stack.
PushAtom(UniqueVecKey<String>),
/// Pushes a boolean literal onto the stack.
PushBoolean(bool),
/// Pushes a float literal onto the stack.
PushFloat(f64),
/// Pushes an integer literal onto the stack.
PushInteger(i64),
/// Pushes a string literal onto the stack.
PushString(UniqueVecKey<String>),
/// Calls a named function in a remote module.
RemoteCall(UniqueVecKey<Type>, UniqueVecKey<String>, usize),
/// Pops a value off the stack and returns from the current function.
Return,
/// Pops a value off the stack and stores in in the local variable scope.
SetLocal(UniqueVecKey<String>),
}
impl Instruction {
pub fn is_terminal(&self) -> bool {
matches!(self, Instruction::Return)
}
}

View file

@ -1,7 +1,7 @@
use std::fmt::Debug;
use std::marker::PhantomData;
#[derive(Debug, Default)]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct IndexedVec<T: Debug>(Vec<T>);
impl<T: Debug> IndexedVec<T> {
@ -28,7 +28,7 @@ impl<T: Debug> IndexedVec<T> {
}
}
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct IndexedVecKey<T: Debug>(usize, PhantomData<T>);
impl<T: Debug> From<IndexedVecKey<T>> for usize {

View file

@ -1,7 +1,9 @@
use std::default::Default;
use std::fmt::Debug;
use std::hash::{Hash, Hasher};
use std::marker::PhantomData;
#[derive(Clone)]
pub struct UniqueVec<T: Debug> {
data: Vec<T>,
cmp: fn(&T, &T) -> bool,

View file

@ -12,6 +12,7 @@ version = "0.1.0"
heck = "0.4"
lazy_static = "1.4"
miette = {version = "5.3", features = ["fancy"]}
outrun-bytecode = {path = "../outrun-bytecode"}
outrun-common = {path = "../outrun-common"}
pest = "2.4"
pest_derive = "2.4"

View file

@ -3,14 +3,14 @@ use crate::grammar::NodeValue;
use crate::span::Span;
use std::sync::Arc;
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq)]
pub struct Expression {
pub r#type: Arc<Type>,
pub span: Span,
pub value: ExpressionValue,
}
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq)]
pub enum ExpressionValue {
Array(Vec<Expression>),
Atom(String),

View file

@ -3,13 +3,13 @@ use crate::grammar::NodeKind;
use std::cell::RefCell;
use std::sync::Arc;
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Access {
Public,
Private,
}
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq)]
pub struct Function {
pub name: String,
pub arguments: Vec<String>,

View file

@ -0,0 +1,289 @@
use crate::ir;
use crate::{Source, Span};
use outrun_bytecode as bc;
use outrun_common::{IndexedVec, IndexedVecKey, UniqueVec, UniqueVecKey};
use std::collections::HashMap;
use std::iter::once;
use std::sync::Arc;
pub fn into_bytecode(context: &mut ir::Context) -> Result<Vec<bc::Module>, ir::Error> {
let user_types: Vec<Arc<ir::Type>> = context
.types
.iter()
.filter(|t| t.is_user())
.cloned()
.collect();
let mut modules = Vec::new();
for t in user_types {
let module = into_module(t.clone(), context)?;
modules.push(module);
}
Ok(modules)
}
fn into_module(ty: Arc<ir::Type>, context: &mut ir::Context) -> Result<bc::Module, ir::Error> {
let mut types: UniqueVec<bc::Type> = UniqueVec::default();
let mut strings: UniqueVec<String> = UniqueVec::default();
let index = into_type(ty.clone(), &mut types, &mut strings)?;
let mut module = bc::Module {
name: ty.to_string(),
location: into_source_location(ty.source(), ty.span()),
associated_type: index,
types,
strings,
functions: IndexedVec::new(),
blocks: IndexedVec::new(),
};
let associated_functions: Vec<Arc<ir::Function>> = context
.functions
.iter()
.filter(|f| f.associated_type == ty)
.cloned()
.collect();
for fun in associated_functions {
into_function(fun.clone(), &mut module, context)?;
}
Ok(module)
}
fn into_function(
fun: Arc<ir::Function>,
module: &mut bc::Module,
context: &mut ir::Context,
) -> Result<IndexedVecKey<bc::Function>, ir::Error> {
let access = match fun.access {
ir::Access::Public => bc::Access::Public,
ir::Access::Private => bc::Access::Private,
};
let (argument_types, result) = match fun.r#type.as_ref() {
ir::Type::Function {
arguments, result, ..
} => (arguments, result),
_ => unreachable!(),
};
let mut arguments = Vec::new();
for (name, t) in fun.arguments.iter().zip(argument_types) {
let t = into_type(t.clone(), &mut module.types, &mut module.strings)?;
let name = module.strings.insert(name.clone());
arguments.push((name, t));
}
let return_type = into_type(result.clone(), &mut module.types, &mut module.strings)?;
let name = fun.name.clone();
// look for guards, to build entry block, otherwise just jump direct to the body.
let block_id = if fun.has_body() {
let block_id = into_block(fun.body.borrow().clone(), module, arguments.clone())?;
Some(block_id)
} else {
None
};
let entry = if fun.is_prototype() {
None
} else {
if fun.is_guarded() {
// build the guard block.
None
} else {
// just jump directly to the body.
block_id
}
};
let function = bc::Function {
name,
access,
arguments,
return_type,
entry,
};
let idx = module.functions.insert(function);
Ok(idx)
}
fn into_block(
body: Vec<ir::Expression>,
module: &mut bc::Module,
arguments: Vec<(UniqueVecKey<String>, UniqueVecKey<bc::Type>)>,
) -> Result<IndexedVecKey<bc::Block>, ir::Error> {
let mut block = bc::Block::default();
let mut locals = arguments.iter().cloned().collect();
collect_expressions(body, &mut block, &mut locals, module)?;
if !block.is_terminated() {
block.push(bc::Instruction::Return);
}
let block_id = module.blocks.insert(block);
Ok(block_id)
}
fn collect_expressions(
body: Vec<ir::Expression>,
block: &mut bc::Block,
locals: &mut HashMap<UniqueVecKey<String>, UniqueVecKey<bc::Type>>,
module: &mut bc::Module,
) -> Result<(), ir::Error> {
for expr in body {
match expr.value {
ir::ExpressionValue::Array(values) => {
collect_expressions(values.clone(), block, locals, module)?;
block.push(bc::Instruction::CreateArray(values.len()));
}
ir::ExpressionValue::Atom(value) => {
let value = module.strings.insert(value);
block.push(bc::Instruction::PushAtom(value));
}
ir::ExpressionValue::Boolean(value) => {
block.push(bc::Instruction::PushBoolean(value));
}
ir::ExpressionValue::Constructor(values) => {
collect_expressions(values.clone(), block, locals, module)?;
let ty = into_type(expr.r#type, &mut module.types, &mut module.strings)?;
block.push(bc::Instruction::Construct(ty, values.len()));
}
ir::ExpressionValue::Float(value) => {
block.push(bc::Instruction::PushFloat(value));
}
ir::ExpressionValue::GetLocal(name) => {
// FIXME yeah so we need to do locals.
}
ir::ExpressionValue::Index { .. } => {
// FIXME
}
ir::ExpressionValue::Integer(value) => {
block.push(bc::Instruction::PushInteger(value));
}
ir::ExpressionValue::Let { .. } => {
// FIXME
}
ir::ExpressionValue::LocalCall { .. } => {
// FIXME
}
ir::ExpressionValue::Map(values) => {
let values: Vec<ir::Expression> = values
.iter()
.flat_map(|(key, value)| once(key).chain(once(value)))
.cloned()
.collect();
collect_expressions(values.clone(), block, locals, module)?;
block.push(bc::Instruction::CreateMap(values.len()));
}
ir::ExpressionValue::RemoteCall { .. } => {
// FIXME wat.
}
ir::ExpressionValue::String(value) => {
let value = module.strings.insert(value);
block.push(bc::Instruction::PushString(value));
}
}
}
Ok(())
}
fn into_source_location(
source: Option<Arc<Source>>,
span: Option<Span>,
) -> Option<bc::SourceLocation> {
let file = source
.and_then(|source| source.location.clone())
.map(|path| path.to_string_lossy().to_string())?;
let (start, end) = span.map(|span| (span.start as u32, span.end as u32))?;
Some(bc::SourceLocation { file, start, end })
}
fn into_type(
ty: Arc<ir::Type>,
types: &mut UniqueVec<bc::Type>,
strings: &mut UniqueVec<String>,
) -> Result<UniqueVecKey<bc::Type>, ir::Error> {
let name = strings.insert(ty.to_string());
match ty.as_ref() {
ir::Type::Any => {
let name = strings.insert("Any".to_string());
Ok(types.insert(bc::Type::Protocol(Some(name), vec![])))
}
ir::Type::Struct { fields, .. } => {
let fields = fields
.iter()
.map(|(name, field)| {
let name = strings.insert(name.clone());
let field = into_type(field.clone(), types, strings)?;
Ok((name, field))
})
.collect::<Result<Vec<(UniqueVecKey<String>, UniqueVecKey<bc::Type>)>, ir::Error>>(
)?;
Ok(types.insert(bc::Type::Struct(name, fields)))
}
ir::Type::Protocol { .. } => {
let protocol_requires = ty
.protocol_requires()
.iter()
.map(|p| into_type(p.clone(), types, strings))
.collect::<Result<Vec<UniqueVecKey<bc::Type>>, ir::Error>>()?;
Ok(types.insert(bc::Type::Protocol(Some(name), protocol_requires)))
}
ir::Type::Impl {
protocol, augments, ..
} => {
let protocol = into_type(protocol.clone(), types, strings)?;
let augments = into_type(augments.clone(), types, strings)?;
Ok(types.insert(bc::Type::Impl(protocol, augments)))
}
ir::Type::Union {
types: requires, ..
} => {
let requires = requires
.iter()
.map(|ty| into_type(ty.clone(), types, strings))
.collect::<Result<Vec<UniqueVecKey<bc::Type>>, ir::Error>>()?;
Ok(types.insert(bc::Type::Protocol(None, requires)))
}
ir::Type::Atom => Ok(types.insert(bc::Type::Atom)),
ir::Type::Boolean => Ok(types.insert(bc::Type::Boolean)),
ir::Type::Integer => Ok(types.insert(bc::Type::Integer)),
ir::Type::Float => Ok(types.insert(bc::Type::Float)),
ir::Type::String => Ok(types.insert(bc::Type::String)),
ir::Type::Array => Ok(types.insert(bc::Type::Array)),
ir::Type::Map => Ok(types.insert(bc::Type::Map)),
ir::Type::LateSelf => Ok(types.insert(bc::Type::Protocol(
Some(strings.insert("*Self".to_string())),
vec![],
))),
ir::Type::Variable { r#type, span, .. } => {
let t = r#type.borrow().target().ok_or_else(|| {
ir::Error::InternalError(
"Attempt to traverse unlinked type variable".to_string(),
*span,
)
})?;
into_type(t, types, strings)
}
ir::Type::External { name, .. } => {
let name = strings.insert(name.clone());
Ok(types.insert(bc::Type::External(name)))
}
ir::Type::Alias { target, .. } => into_type(target.clone(), types, strings),
_ => Err(ir::Error::InternalError(
"Don't encode function types".to_string(),
None,
)),
}
}

View file

@ -3,6 +3,7 @@ mod context;
mod error;
mod expression;
mod function;
mod into_bytecode;
mod notice;
mod stages;
mod r#type;
@ -12,6 +13,7 @@ pub use context::Context;
pub use error::Error;
pub use expression::{Expression, ExpressionValue};
pub use function::{Access, Function};
pub use into_bytecode::into_bytecode;
pub use notice::{Notice, NoticeKind, NoticeLevel};
pub use r#type::{Type, TypeVariable};
pub use visitor::{visit_node, visit_nodes};

View file

@ -77,6 +77,10 @@ pub enum Type {
}
impl Type {
pub fn is_any(&self) -> bool {
matches!(self, Type::Any) || self.predicate(|t| t.is_any())
}
pub fn is_bound(&self) -> bool {
match self {
Type::Any => true,
@ -102,22 +106,23 @@ impl Type {
&& result.is_bound()
&& arguments.iter().all(|t| t.is_bound())
}
Type::Variable { r#type, .. } => r#type.borrow().is_bound(),
Type::Variable { r#type, .. } => r#type.borrow().predicate(|t| t.is_bound()),
Type::External { .. } => true,
Type::Alias { target, .. } => target.is_bound(),
}
}
pub fn is_external(&self) -> bool {
matches!(self, Type::External { .. } | Type::Any)
matches!(self, Type::External { .. } | Type::Any { .. })
|| self.predicate(|t| t.is_external())
}
pub fn is_function(&self) -> bool {
matches!(self, Type::Function { .. })
matches!(self, Type::Function { .. }) || self.predicate(|t| t.is_function())
}
pub fn is_impl(&self) -> bool {
matches!(self, Type::Impl { .. })
matches!(self, Type::Impl { .. }) || self.predicate(|t| t.is_impl())
}
pub fn is_linkable(&self) -> bool {
@ -133,6 +138,7 @@ impl Type {
Type::Map => true,
Type::Protocol { .. } => true,
Type::Struct { .. } => true,
Type::Variable { r#type, .. } => r#type.borrow().predicate(|t| t.is_linkable()),
_ => false,
}
}
@ -145,7 +151,7 @@ impl Type {
}
pub fn is_protocol(&self) -> bool {
matches!(self, Type::Protocol { .. })
matches!(self, Type::Protocol { .. }) || self.predicate(|t| t.is_protocol())
}
pub fn is_reference(&self) -> bool {
@ -156,7 +162,7 @@ impl Type {
}
pub fn is_struct(&self) -> bool {
matches!(self, Type::Struct { .. })
matches!(self, Type::Struct { .. }) || self.predicate(|t| t.is_struct())
}
pub fn is_unbound(&self) -> bool {
@ -168,25 +174,21 @@ impl Type {
}
pub fn is_user(&self) -> bool {
matches!(
self,
Type::Struct { .. } | Type::Protocol { .. } | Type::Impl { .. }
)
self.is_struct() || self.is_protocol() || self.is_impl()
}
pub fn is_native(&self) -> bool {
match self {
Type::Any => true,
Type::Atom => true,
Type::Boolean => true,
Type::Integer => true,
Type::Float => true,
Type::String => true,
Type::Array => true,
Type::Map => true,
Type::Variable { r#type, .. } => r#type.borrow().is_native(),
_ => false,
}
matches!(
self,
Type::Any
| Type::Atom
| Type::Boolean
| Type::Integer
| Type::Float
| Type::Array
| Type::Map
| Type::String
) || self.predicate(|t| t.is_native())
}
pub fn link_to(&self, target: Arc<Type>) {
@ -218,6 +220,64 @@ impl Type {
_ => None,
}
}
pub fn source(&self) -> Option<Arc<Source>> {
match self {
Type::Struct { source, .. } => source.clone(),
Type::Protocol { source, .. } => source.clone(),
Type::Impl { source, .. } => source.clone(),
Type::Union { source, .. } => source.clone(),
Type::Function { source, .. } => source.clone(),
Type::Variable { source, .. } => source.clone(),
Type::External { source, .. } => source.clone(),
Type::Alias { source, .. } => source.clone(),
_ => None,
}
}
pub fn struct_fields(&self) -> Vec<(String, Arc<Type>)> {
match self {
Type::Struct { fields, .. } => fields
.iter()
.map(|(n, t)| (n.to_owned(), t.clone()))
.collect(),
_other => self.apply(|t| t.struct_fields(), vec![]),
}
}
pub fn protocol_requires(&self) -> Vec<Arc<Type>> {
match self {
Type::Protocol { requires, .. } => requires
.as_ref()
.map(|union| union.protocol_requires())
.unwrap_or_default(),
Type::Union { types, .. } => types.clone(),
_other => self.apply(|t| t.protocol_requires(), vec![]),
}
}
// Apply a predicate if the type links to another type.
fn predicate<F>(&self, predicate: F) -> bool
where
F: Fn(Arc<Type>) -> bool,
{
match self {
Type::Alias { target, .. } => predicate(target.clone()),
Type::Variable { r#type, .. } => r#type.borrow().predicate(predicate),
_ => false,
}
}
fn apply<F, T>(&self, function: F, default: T) -> T
where
F: Fn(Arc<Type>) -> T,
{
match self {
Type::Alias { target, .. } => function(target.clone()),
Type::Variable { r#type, .. } => r#type.borrow().apply(function, default),
_ => default,
}
}
}
impl ToString for Type {
@ -291,11 +351,23 @@ impl TypeVariable {
matches!(self, TypeVariable::Reference { .. })
}
pub fn is_bound(&self) -> bool {
pub fn predicate<F>(&self, predicate: F) -> bool
where
F: Fn(Arc<Type>) -> bool,
{
match self {
TypeVariable::Unbound { .. } => false,
TypeVariable::Reference { .. } => false,
TypeVariable::Link { target } => target.is_bound(),
TypeVariable::Link { target } => predicate(target.clone()),
_ => false,
}
}
pub fn apply<F, T>(&self, function: F, default: T) -> T
where
F: Fn(Arc<Type>) -> T,
{
match self {
TypeVariable::Link { target } => function(target.clone()),
_ => default,
}
}
@ -307,13 +379,6 @@ impl TypeVariable {
matches!(self, TypeVariable::Link { .. })
}
pub fn is_native(&self) -> bool {
match self {
TypeVariable::Link { target } => target.is_native(),
_ => false,
}
}
pub fn target(&self) -> Option<Arc<Type>> {
match self {
TypeVariable::Link { target } => Some(target.clone()),

View file

@ -1,4 +1,5 @@
extern crate heck;
extern crate outrun_bytecode;
extern crate outrun_common;
extern crate pest;
extern crate rusttyc;
@ -29,6 +30,7 @@ pub use error::Error;
pub use ir::Context;
pub use ir::Type;
pub use source::Source;
pub use span::Span;
pub fn compile(context: Context, source: Arc<Source>) -> Result<Context, Error> {
let ast = grammar::parse(source.clone(), None).map_err(|e| (source.clone(), e))?;

View file

@ -47,6 +47,11 @@ impl Span {
(start, end) => (end - start) + 1,
}
}
/// Return true if the span covers no distance.
pub fn is_empty(&self) -> bool {
self.start == self.end
}
}
impl From<(usize, usize)> for Span {