First post 👍

This commit is contained in:
James Harton 2015-03-05 14:22:23 -08:00
commit a4ef374f4a
7 changed files with 303 additions and 0 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
/_build
/deps
erl_crash.dump
*.ez

36
README.md Normal file
View file

@ -0,0 +1,36 @@
# LexLuthor
LexLuthor is a Lexer in Elixir (say that 10 times fast) which uses macros to generate a reusable lexers. Good times.
LexLuthor is a state based lexer, meaning that it keeps a state stack which you can push states on and pop states off the stack, which are used to filter the applicable rules for a given state. For example:
```elixir
defmodule StringLexer do
use LexLuthor
defrule ~r/^'/, fn(_) -> :STRING end
defrule ~r/^[^']+/, :STRING, fn(e) -> { :string, e } end
defrule ~r/^'/, :STRING, fn(_) -> nil end
end
```
Rules are defined by a regular expression, an optional state (as an atom) and an action in the form of an anonymous function.
When passed the string `'foo'`, the lexer starts in the `:default` state, so it filters for rules in the default state (the first rule, as it doesn't specify a state), then it filters the available rules by the longest matching regular expression. In this case, since we have only one rule (which happens to match) it's automatically the longest match.
Once the longest match is found, then it's action is executed and the return value matched:
- If the return value is a single atom then that atom is assumed to be a state and push onto the top of the state stack.
- If the return value is a two element tuple then the first element is expected to be an atom (the token name) and the second element a value for this token.
- If the return value is `nil` then the top state is popped off the state stack.
If lexing succeeds then you will receive an `:ok` tuple with the second value being a list of `LexLuthor.Token` structs.
If lexing fails then you will receive an `:error` tuple which a reason and position.
## Contributing
1. Fork it ( https://github.com/jamesotron/lex_luthor/fork )
2. Create your feature branch (`git checkout -b my-new-feature`)
3. Commit your changes (`git commit -am 'Add some feature'`)
4. Push to the branch (`git push origin my-new-feature`)
5. Create a new Pull Request

24
config/config.exs Normal file
View file

@ -0,0 +1,24 @@
# This file is responsible for configuring your application
# and its dependencies with the aid of the Mix.Config module.
use Mix.Config
# This configuration is loaded before any dependency and is restricted
# to this project. If another project depends on this project, this
# file won't be loaded nor affect the parent project. For this reason,
# if you want to provide default values for your application for third-
# party users, it should be done in your mix.exs file.
# Sample configuration:
#
# config :logger, :console,
# level: :info,
# format: "$date $time [$level] $metadata$message\n",
# metadata: [:user_id]
# It is also possible to import configuration files, relative to this
# directory. For example, you can emulate configuration per environment
# by uncommenting the line below and defining dev.exs, test.exs and such.
# Configuration from the imported file will override the ones defined
# here (which is why it is important to import them last).
#
# import_config "#{Mix.env}.exs"

130
lib/lex_luthor.ex Normal file
View file

@ -0,0 +1,130 @@
defmodule LexLuthor do
@rules []
@action_no 0
defmodule State do
defstruct pos: 0, states: [nil], tokens: []
end
defmodule Token do
defstruct pos: 0, name: nil, value: nil
end
defmacro __using__(_opts) do
quote do
@rules []
import LexLuthor
@before_compile LexLuthor
end
end
defmacro __before_compile__(_env) do
quote do
def lex string do
LexLuthor.lex __MODULE__, @rules, string
end
end
end
defmacro defrule(regex, state, block) do
function_name = "_action_#{inspect(regex)}_#{Atom.to_string state}" |> String.to_atom
quote do
def unquote(function_name)(e) do
unquote(block).(e)
end
@rules(@rules ++ [{ unquote(state), unquote(regex), unquote(function_name) }])
{ :ok, Enum.count(@rules) }
end
end
defmacro defrule(regex, block) do
quote do
defrule unquote(regex), :default, unquote(block)
end
end
def lex module, rules, string do
do_lex module, rules, string, %State{}
end
defp do_lex module, rules, string, lexer do
[ current_state | _rest ] = lexer.states
# Find the longest matching rule. This could
# probably be made a whole lot less enumeratey.
match = rules_for_state(rules, current_state)
|> matching_rules(string)
|> apply_matches(string)
|> longest_match_first
|> Enum.at(0)
# Execute the matches' action.
{len, value, fun} = match
result = apply(module, fun, [value])
# Modify the lexer state as needed.
cond do
is_nil(result) ->
lexer = pop_state lexer
is_atom(result) ->
lexer = push_state lexer, result
{ _token, _value } = result ->
lexer = push_token lexer, result
end
# Increment lexer position
lexer = %State{ pos: lexer.pos + len, states: lexer.states, tokens: lexer.tokens }
# Are we at the end of the string?
if String.length(string) == len do
Enum.reverse lexer.tokens
else
{ _ , new_string } = String.split_at string, len
do_lex module, rules, new_string, lexer
end
end
defp push_token lexer, token do
{ tname, tvalue } = token
token = %Token{ pos: lexer.pos, name: tname, value: tvalue }
%State{ pos: lexer.pos, states: lexer.states, tokens: [ token | lexer.tokens ] }
end
defp push_state lexer, state do
%State{ pos: lexer.pos, states: [ state | lexer.states ], tokens: lexer.tokens }
end
defp pop_state lexer do
[ _ | states ] = lexer.states
%State{ pos: lexer.pos, states: states, tokens: lexer.tokens }
end
defp rules_for_state rules, state do
Enum.filter rules, fn({rule_state,_,_})->
if is_nil(state) do
state = :default
end
state == rule_state
end
end
defp matching_rules rules, string do
Enum.filter rules, fn({_,regex,_})->
Regex.match?(regex, string)
end
end
defp apply_matches rules, string do
Enum.map rules, fn({_,regex,fun})->
[match] = Regex.run(regex,string, capture: :first)
{ String.length(match), match, fun }
end
end
defp longest_match_first matches do
Enum.sort_by matches, fn({len,_,_})-> len end, &>=/2
end
end

30
mix.exs Normal file
View file

@ -0,0 +1,30 @@
defmodule LexLuthor.Mixfile do
use Mix.Project
def project do
[app: :lex_luthor,
version: "0.0.1",
elixir: "~> 1.0",
deps: deps]
end
# Configuration for the OTP application
#
# Type `mix help compile.app` for more information
def application do
[applications: [:logger]]
end
# Dependencies can be Hex packages:
#
# {:mydep, "~> 0.3.0"}
#
# Or git/path repositories:
#
# {:mydep, git: "https://github.com/elixir-lang/mydep.git", tag: "0.1.0"}
#
# Type `mix help deps` for more examples and options
defp deps do
[]
end
end

33
test/lex_luthor_test.exs Normal file
View file

@ -0,0 +1,33 @@
defmodule LexLuthorTest do
import TestHelpers
use ExUnit.Case, async: true
@tests [
{ "''", generate_token(:simple_string, "") },
{ "'hello'", generate_token(:simple_string, "hello") },
{ "\"\"", generate_token(:string, "") },
{ "\"hello\"", generate_token(:string, "hello") },
{ "0", generate_token(:integer, 0) },
{ "123", generate_token(:integer, 123) },
{ "0x123", generate_token(:integer, 291) },
{ "0b1011", generate_token(:integer, 11) },
{ "0.0", generate_token(:float, 0.0) },
{ "123.456", generate_token(:float, 123.456) }
]
Enum.each @tests, fn
{ source, token } ->
tname = Map.get token, :name
tvalue = Map.get token, :value
test "String #{inspect(source)} results in token #{inspect(token)}" do
result = Enum.at(ExampleLexer.lex(unquote(source)), 0)
rname = Map.get result, :name
rvalue = Map.get result, :value
assert rname == unquote(tname)
assert rvalue == unquote(tvalue)
end
end
end

46
test/test_helper.exs Normal file
View file

@ -0,0 +1,46 @@
defmodule TestHelpers do
def generate_token(name, value) do
%LexLuthor.Token{name: name, value: value}
end
end
defmodule ExampleLexer do
use LexLuthor
# single tick strings
defrule ~r/^''/, fn(_) -> { :simple_string, "" } end
defrule ~r/^'/, fn(_) -> :simple_string end
defrule ~r/^[^']+/, :simple_string, fn(e) -> { :simple_string, e } end
defrule ~r/^'/, :simple_string, fn(_) -> nil end
# double tick strings
defrule ~r/^""/, fn(_) -> { :string, "" } end
defrule ~r/^"/, fn(_) -> :string end
defrule ~R/^#{/, :string, fn(_) -> :default end
defrule ~R/^}/, :default, fn(_) -> nil end
defrule ~R/^[^("|#{)]+/, :string, fn(e) -> { :string, e } end
defrule ~r/^"/, :string, fn(_) -> nil end
# floats
defrule ~r/^[0-9]+\.[0-9]+/, fn(e) -> { :float, String.to_float(e) } end
# integers
defrule ~r/^0x[0-9a-fA-F]+/, fn(e) ->
[ _ | i ] = String.split e, "x"
{ :integer, String.to_integer(Enum.at(i, 0), 16) }
end
defrule ~r/^0b[01]+/, fn(e) ->
[ _ | i ] = String.split e, "b"
{ :integer, String.to_integer(Enum.at(i, 0), 2) }
end
defrule ~r/^[1-9][0-9]*/, fn(e) -> { :integer, String.to_integer(e) } end
defrule ~r/^0/, fn(_) -> { :integer, 0 } end
# white space
defrule ~r/^[ \t]+/, fn(e) -> { :ws, String.length(e) } end
defrule ~r/^\r\n/, fn(_) -> { :nl, 1 } end
defrule ~r/^\r/, fn(_) -> { :nl, 1 } end
defrule ~r/^\n/, fn(_) -> { :nl, 1 } end
end
ExUnit.start()