First post 👍
This commit is contained in:
commit
a4ef374f4a
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
/_build
|
||||
/deps
|
||||
erl_crash.dump
|
||||
*.ez
|
36
README.md
Normal file
36
README.md
Normal file
|
@ -0,0 +1,36 @@
|
|||
# LexLuthor
|
||||
|
||||
LexLuthor is a Lexer in Elixir (say that 10 times fast) which uses macros to generate a reusable lexers. Good times.
|
||||
|
||||
LexLuthor is a state based lexer, meaning that it keeps a state stack which you can push states on and pop states off the stack, which are used to filter the applicable rules for a given state. For example:
|
||||
|
||||
```elixir
|
||||
defmodule StringLexer do
|
||||
use LexLuthor
|
||||
|
||||
defrule ~r/^'/, fn(_) -> :STRING end
|
||||
defrule ~r/^[^']+/, :STRING, fn(e) -> { :string, e } end
|
||||
defrule ~r/^'/, :STRING, fn(_) -> nil end
|
||||
end
|
||||
```
|
||||
|
||||
Rules are defined by a regular expression, an optional state (as an atom) and an action in the form of an anonymous function.
|
||||
|
||||
When passed the string `'foo'`, the lexer starts in the `:default` state, so it filters for rules in the default state (the first rule, as it doesn't specify a state), then it filters the available rules by the longest matching regular expression. In this case, since we have only one rule (which happens to match) it's automatically the longest match.
|
||||
|
||||
Once the longest match is found, then it's action is executed and the return value matched:
|
||||
- If the return value is a single atom then that atom is assumed to be a state and push onto the top of the state stack.
|
||||
- If the return value is a two element tuple then the first element is expected to be an atom (the token name) and the second element a value for this token.
|
||||
- If the return value is `nil` then the top state is popped off the state stack.
|
||||
|
||||
If lexing succeeds then you will receive an `:ok` tuple with the second value being a list of `LexLuthor.Token` structs.
|
||||
|
||||
If lexing fails then you will receive an `:error` tuple which a reason and position.
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork it ( https://github.com/jamesotron/lex_luthor/fork )
|
||||
2. Create your feature branch (`git checkout -b my-new-feature`)
|
||||
3. Commit your changes (`git commit -am 'Add some feature'`)
|
||||
4. Push to the branch (`git push origin my-new-feature`)
|
||||
5. Create a new Pull Request
|
24
config/config.exs
Normal file
24
config/config.exs
Normal file
|
@ -0,0 +1,24 @@
|
|||
# This file is responsible for configuring your application
|
||||
# and its dependencies with the aid of the Mix.Config module.
|
||||
use Mix.Config
|
||||
|
||||
# This configuration is loaded before any dependency and is restricted
|
||||
# to this project. If another project depends on this project, this
|
||||
# file won't be loaded nor affect the parent project. For this reason,
|
||||
# if you want to provide default values for your application for third-
|
||||
# party users, it should be done in your mix.exs file.
|
||||
|
||||
# Sample configuration:
|
||||
#
|
||||
# config :logger, :console,
|
||||
# level: :info,
|
||||
# format: "$date $time [$level] $metadata$message\n",
|
||||
# metadata: [:user_id]
|
||||
|
||||
# It is also possible to import configuration files, relative to this
|
||||
# directory. For example, you can emulate configuration per environment
|
||||
# by uncommenting the line below and defining dev.exs, test.exs and such.
|
||||
# Configuration from the imported file will override the ones defined
|
||||
# here (which is why it is important to import them last).
|
||||
#
|
||||
# import_config "#{Mix.env}.exs"
|
130
lib/lex_luthor.ex
Normal file
130
lib/lex_luthor.ex
Normal file
|
@ -0,0 +1,130 @@
|
|||
defmodule LexLuthor do
|
||||
|
||||
@rules []
|
||||
@action_no 0
|
||||
|
||||
defmodule State do
|
||||
defstruct pos: 0, states: [nil], tokens: []
|
||||
end
|
||||
|
||||
defmodule Token do
|
||||
defstruct pos: 0, name: nil, value: nil
|
||||
end
|
||||
|
||||
defmacro __using__(_opts) do
|
||||
quote do
|
||||
@rules []
|
||||
import LexLuthor
|
||||
@before_compile LexLuthor
|
||||
end
|
||||
end
|
||||
|
||||
defmacro __before_compile__(_env) do
|
||||
quote do
|
||||
def lex string do
|
||||
LexLuthor.lex __MODULE__, @rules, string
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
defmacro defrule(regex, state, block) do
|
||||
function_name = "_action_#{inspect(regex)}_#{Atom.to_string state}" |> String.to_atom
|
||||
quote do
|
||||
def unquote(function_name)(e) do
|
||||
unquote(block).(e)
|
||||
end
|
||||
|
||||
@rules(@rules ++ [{ unquote(state), unquote(regex), unquote(function_name) }])
|
||||
{ :ok, Enum.count(@rules) }
|
||||
end
|
||||
end
|
||||
|
||||
defmacro defrule(regex, block) do
|
||||
quote do
|
||||
defrule unquote(regex), :default, unquote(block)
|
||||
end
|
||||
end
|
||||
|
||||
def lex module, rules, string do
|
||||
do_lex module, rules, string, %State{}
|
||||
end
|
||||
|
||||
defp do_lex module, rules, string, lexer do
|
||||
[ current_state | _rest ] = lexer.states
|
||||
|
||||
# Find the longest matching rule. This could
|
||||
# probably be made a whole lot less enumeratey.
|
||||
match = rules_for_state(rules, current_state)
|
||||
|> matching_rules(string)
|
||||
|> apply_matches(string)
|
||||
|> longest_match_first
|
||||
|> Enum.at(0)
|
||||
|
||||
# Execute the matches' action.
|
||||
{len, value, fun} = match
|
||||
result = apply(module, fun, [value])
|
||||
|
||||
# Modify the lexer state as needed.
|
||||
cond do
|
||||
is_nil(result) ->
|
||||
lexer = pop_state lexer
|
||||
is_atom(result) ->
|
||||
lexer = push_state lexer, result
|
||||
{ _token, _value } = result ->
|
||||
lexer = push_token lexer, result
|
||||
end
|
||||
|
||||
# Increment lexer position
|
||||
lexer = %State{ pos: lexer.pos + len, states: lexer.states, tokens: lexer.tokens }
|
||||
|
||||
# Are we at the end of the string?
|
||||
if String.length(string) == len do
|
||||
Enum.reverse lexer.tokens
|
||||
else
|
||||
{ _ , new_string } = String.split_at string, len
|
||||
do_lex module, rules, new_string, lexer
|
||||
end
|
||||
end
|
||||
|
||||
defp push_token lexer, token do
|
||||
{ tname, tvalue } = token
|
||||
token = %Token{ pos: lexer.pos, name: tname, value: tvalue }
|
||||
%State{ pos: lexer.pos, states: lexer.states, tokens: [ token | lexer.tokens ] }
|
||||
end
|
||||
|
||||
defp push_state lexer, state do
|
||||
%State{ pos: lexer.pos, states: [ state | lexer.states ], tokens: lexer.tokens }
|
||||
end
|
||||
|
||||
defp pop_state lexer do
|
||||
[ _ | states ] = lexer.states
|
||||
%State{ pos: lexer.pos, states: states, tokens: lexer.tokens }
|
||||
end
|
||||
|
||||
defp rules_for_state rules, state do
|
||||
Enum.filter rules, fn({rule_state,_,_})->
|
||||
if is_nil(state) do
|
||||
state = :default
|
||||
end
|
||||
state == rule_state
|
||||
end
|
||||
end
|
||||
|
||||
defp matching_rules rules, string do
|
||||
Enum.filter rules, fn({_,regex,_})->
|
||||
Regex.match?(regex, string)
|
||||
end
|
||||
end
|
||||
|
||||
defp apply_matches rules, string do
|
||||
Enum.map rules, fn({_,regex,fun})->
|
||||
[match] = Regex.run(regex,string, capture: :first)
|
||||
{ String.length(match), match, fun }
|
||||
end
|
||||
end
|
||||
|
||||
defp longest_match_first matches do
|
||||
Enum.sort_by matches, fn({len,_,_})-> len end, &>=/2
|
||||
end
|
||||
|
||||
end
|
30
mix.exs
Normal file
30
mix.exs
Normal file
|
@ -0,0 +1,30 @@
|
|||
defmodule LexLuthor.Mixfile do
|
||||
use Mix.Project
|
||||
|
||||
def project do
|
||||
[app: :lex_luthor,
|
||||
version: "0.0.1",
|
||||
elixir: "~> 1.0",
|
||||
deps: deps]
|
||||
end
|
||||
|
||||
# Configuration for the OTP application
|
||||
#
|
||||
# Type `mix help compile.app` for more information
|
||||
def application do
|
||||
[applications: [:logger]]
|
||||
end
|
||||
|
||||
# Dependencies can be Hex packages:
|
||||
#
|
||||
# {:mydep, "~> 0.3.0"}
|
||||
#
|
||||
# Or git/path repositories:
|
||||
#
|
||||
# {:mydep, git: "https://github.com/elixir-lang/mydep.git", tag: "0.1.0"}
|
||||
#
|
||||
# Type `mix help deps` for more examples and options
|
||||
defp deps do
|
||||
[]
|
||||
end
|
||||
end
|
33
test/lex_luthor_test.exs
Normal file
33
test/lex_luthor_test.exs
Normal file
|
@ -0,0 +1,33 @@
|
|||
defmodule LexLuthorTest do
|
||||
import TestHelpers
|
||||
use ExUnit.Case, async: true
|
||||
|
||||
@tests [
|
||||
{ "''", generate_token(:simple_string, "") },
|
||||
{ "'hello'", generate_token(:simple_string, "hello") },
|
||||
{ "\"\"", generate_token(:string, "") },
|
||||
{ "\"hello\"", generate_token(:string, "hello") },
|
||||
{ "0", generate_token(:integer, 0) },
|
||||
{ "123", generate_token(:integer, 123) },
|
||||
{ "0x123", generate_token(:integer, 291) },
|
||||
{ "0b1011", generate_token(:integer, 11) },
|
||||
{ "0.0", generate_token(:float, 0.0) },
|
||||
{ "123.456", generate_token(:float, 123.456) }
|
||||
]
|
||||
|
||||
Enum.each @tests, fn
|
||||
{ source, token } ->
|
||||
tname = Map.get token, :name
|
||||
tvalue = Map.get token, :value
|
||||
|
||||
test "String #{inspect(source)} results in token #{inspect(token)}" do
|
||||
result = Enum.at(ExampleLexer.lex(unquote(source)), 0)
|
||||
|
||||
rname = Map.get result, :name
|
||||
rvalue = Map.get result, :value
|
||||
assert rname == unquote(tname)
|
||||
assert rvalue == unquote(tvalue)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
46
test/test_helper.exs
Normal file
46
test/test_helper.exs
Normal file
|
@ -0,0 +1,46 @@
|
|||
defmodule TestHelpers do
|
||||
def generate_token(name, value) do
|
||||
%LexLuthor.Token{name: name, value: value}
|
||||
end
|
||||
end
|
||||
|
||||
defmodule ExampleLexer do
|
||||
use LexLuthor
|
||||
|
||||
# single tick strings
|
||||
defrule ~r/^''/, fn(_) -> { :simple_string, "" } end
|
||||
defrule ~r/^'/, fn(_) -> :simple_string end
|
||||
defrule ~r/^[^']+/, :simple_string, fn(e) -> { :simple_string, e } end
|
||||
defrule ~r/^'/, :simple_string, fn(_) -> nil end
|
||||
|
||||
# double tick strings
|
||||
defrule ~r/^""/, fn(_) -> { :string, "" } end
|
||||
defrule ~r/^"/, fn(_) -> :string end
|
||||
defrule ~R/^#{/, :string, fn(_) -> :default end
|
||||
defrule ~R/^}/, :default, fn(_) -> nil end
|
||||
defrule ~R/^[^("|#{)]+/, :string, fn(e) -> { :string, e } end
|
||||
defrule ~r/^"/, :string, fn(_) -> nil end
|
||||
|
||||
# floats
|
||||
defrule ~r/^[0-9]+\.[0-9]+/, fn(e) -> { :float, String.to_float(e) } end
|
||||
|
||||
# integers
|
||||
defrule ~r/^0x[0-9a-fA-F]+/, fn(e) ->
|
||||
[ _ | i ] = String.split e, "x"
|
||||
{ :integer, String.to_integer(Enum.at(i, 0), 16) }
|
||||
end
|
||||
defrule ~r/^0b[01]+/, fn(e) ->
|
||||
[ _ | i ] = String.split e, "b"
|
||||
{ :integer, String.to_integer(Enum.at(i, 0), 2) }
|
||||
end
|
||||
defrule ~r/^[1-9][0-9]*/, fn(e) -> { :integer, String.to_integer(e) } end
|
||||
defrule ~r/^0/, fn(_) -> { :integer, 0 } end
|
||||
|
||||
# white space
|
||||
defrule ~r/^[ \t]+/, fn(e) -> { :ws, String.length(e) } end
|
||||
defrule ~r/^\r\n/, fn(_) -> { :nl, 1 } end
|
||||
defrule ~r/^\r/, fn(_) -> { :nl, 1 } end
|
||||
defrule ~r/^\n/, fn(_) -> { :nl, 1 } end
|
||||
end
|
||||
|
||||
ExUnit.start()
|
Loading…
Reference in a new issue