2015-03-06 11:22:23 +13:00
|
|
|
defmodule LexLuthor do
|
|
|
|
defmodule State do
|
2015-03-29 14:55:28 +13:00
|
|
|
defstruct pos: 0, line: 1, column: 0, states: [nil], tokens: []
|
2015-03-06 11:22:23 +13:00
|
|
|
end
|
|
|
|
|
|
|
|
defmodule Token do
|
2015-03-29 14:55:28 +13:00
|
|
|
defstruct pos: 0, line: 1, column: 0, name: nil, value: nil
|
2015-03-06 11:22:23 +13:00
|
|
|
end
|
|
|
|
|
|
|
|
defmacro __using__(_opts) do
|
|
|
|
quote do
|
|
|
|
@rules []
|
2015-03-31 10:30:26 +13:00
|
|
|
@action_counter 0
|
2015-03-06 11:22:23 +13:00
|
|
|
import LexLuthor
|
|
|
|
@before_compile LexLuthor
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defmacro __before_compile__(_env) do
|
|
|
|
quote do
|
|
|
|
def lex string do
|
|
|
|
LexLuthor.lex __MODULE__, @rules, string
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defmacro defrule(regex, state, block) do
|
|
|
|
quote do
|
2015-03-31 10:30:26 +13:00
|
|
|
@action_counter(@action_counter + 1)
|
|
|
|
action_name = "_action_#{@action_counter}" |> String.to_atom
|
|
|
|
block = unquote(Macro.escape(block))
|
|
|
|
|
|
|
|
defaction = quote do
|
|
|
|
def unquote(Macro.escape(action_name))(e) do
|
|
|
|
unquote(block).(e)
|
|
|
|
end
|
2015-03-06 11:22:23 +13:00
|
|
|
end
|
2015-03-31 10:30:26 +13:00
|
|
|
Module.eval_quoted __MODULE__, defaction
|
2015-03-06 11:22:23 +13:00
|
|
|
|
2015-03-31 10:30:26 +13:00
|
|
|
@rules(@rules ++ [{ unquote(state), unquote(regex), action_name }])
|
2015-03-06 11:22:23 +13:00
|
|
|
{ :ok, Enum.count(@rules) }
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defmacro defrule(regex, block) do
|
|
|
|
quote do
|
|
|
|
defrule unquote(regex), :default, unquote(block)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def lex module, rules, string do
|
|
|
|
do_lex module, rules, string, %State{}
|
|
|
|
end
|
|
|
|
|
|
|
|
defp do_lex module, rules, string, lexer do
|
|
|
|
[ current_state | _rest ] = lexer.states
|
|
|
|
|
|
|
|
# Find the longest matching rule. This could
|
|
|
|
# probably be made a whole lot less enumeratey.
|
2015-03-06 12:22:11 +13:00
|
|
|
matches = rules_for_state(rules, current_state)
|
2015-03-06 11:22:23 +13:00
|
|
|
|> matching_rules(string)
|
|
|
|
|> apply_matches(string)
|
|
|
|
|> longest_match_first
|
2015-03-06 12:22:11 +13:00
|
|
|
|
|
|
|
process_matches module, rules, matches, string, lexer, Enum.count(matches)
|
|
|
|
end
|
|
|
|
|
|
|
|
defp process_matches(_, _, _, string, _, count) when count == 0 do
|
|
|
|
{ :error, "String not in language: #{inspect string}"}
|
|
|
|
end
|
|
|
|
|
|
|
|
defp process_matches(module, rules, matches, string, lexer, count) when count > 0 do
|
|
|
|
match = Enum.at matches, 0
|
2015-03-06 11:22:23 +13:00
|
|
|
|
|
|
|
# Execute the matches' action.
|
|
|
|
{len, value, fun} = match
|
|
|
|
result = apply(module, fun, [value])
|
|
|
|
|
2015-03-06 12:22:11 +13:00
|
|
|
lexer = process_result result, lexer
|
|
|
|
|
|
|
|
case lexer do
|
|
|
|
{ :error, _ } ->
|
|
|
|
lexer
|
|
|
|
_ ->
|
2015-03-29 14:55:28 +13:00
|
|
|
|
|
|
|
fragment = String.slice string, 0, len
|
|
|
|
line = lexer.line + line_number_incrementor fragment
|
|
|
|
column = column_number lexer, fragment
|
|
|
|
|
|
|
|
lexer = Map.merge lexer, %{pos: lexer.pos + len, line: line, column: column}
|
2015-03-06 12:22:11 +13:00
|
|
|
|
|
|
|
# Are we at the end of the string?
|
|
|
|
if String.length(string) == len do
|
|
|
|
{ :ok, Enum.reverse lexer.tokens }
|
|
|
|
else
|
|
|
|
{ _ , new_string } = String.split_at string, len
|
|
|
|
do_lex module, rules, new_string, lexer
|
|
|
|
end
|
2015-03-06 11:22:23 +13:00
|
|
|
end
|
2015-03-06 12:22:11 +13:00
|
|
|
end
|
2015-03-06 11:22:23 +13:00
|
|
|
|
2015-03-29 14:55:28 +13:00
|
|
|
defp column_number lexer, match do
|
|
|
|
case Regex.match? ~r/[\r\n]/, match do
|
|
|
|
true ->
|
|
|
|
len = match |> split_on_newlines |> List.last |> String.length
|
|
|
|
case len do
|
|
|
|
0 -> 1
|
|
|
|
_ -> len
|
|
|
|
end
|
|
|
|
false ->
|
|
|
|
lexer.column + String.length match
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp line_number_incrementor match do
|
|
|
|
(match |> split_on_newlines |> Enum.count) - 1
|
|
|
|
end
|
|
|
|
|
|
|
|
def split_on_newlines string do
|
|
|
|
string |> String.split(~r{(\r|\n|\r\n)})
|
|
|
|
end
|
|
|
|
|
2015-03-06 12:22:11 +13:00
|
|
|
defp process_result(result, lexer) when is_nil(result) do
|
|
|
|
pop_state lexer
|
|
|
|
end
|
2015-03-06 11:22:23 +13:00
|
|
|
|
2015-03-06 12:22:11 +13:00
|
|
|
defp process_result(result, lexer) when is_atom(result) do
|
|
|
|
push_state lexer, result
|
|
|
|
end
|
|
|
|
|
|
|
|
defp process_result(result, lexer) when is_tuple(result) do
|
|
|
|
push_token lexer, result
|
|
|
|
end
|
|
|
|
|
|
|
|
defp process_result result, _ do
|
|
|
|
{ :error, "Invalid result from action: #{inspect result}"}
|
2015-03-06 11:22:23 +13:00
|
|
|
end
|
|
|
|
|
|
|
|
defp push_token lexer, token do
|
|
|
|
{ tname, tvalue } = token
|
2015-03-29 14:55:28 +13:00
|
|
|
token = %Token{ pos: lexer.pos, line: lexer.line, column: lexer.column, name: tname, value: tvalue }
|
2015-03-29 12:51:02 +13:00
|
|
|
Map.merge lexer, %{tokens: [token | lexer.tokens ]}
|
2015-03-06 11:22:23 +13:00
|
|
|
end
|
|
|
|
|
|
|
|
defp push_state lexer, state do
|
2015-03-29 12:51:02 +13:00
|
|
|
Map.merge lexer, %{states: [state | lexer.states ]}
|
2015-03-06 11:22:23 +13:00
|
|
|
end
|
|
|
|
|
|
|
|
defp pop_state lexer do
|
|
|
|
[ _ | states ] = lexer.states
|
2015-03-29 12:51:02 +13:00
|
|
|
Map.merge lexer, %{states: states}
|
2015-03-06 11:22:23 +13:00
|
|
|
end
|
|
|
|
|
|
|
|
defp rules_for_state rules, state do
|
|
|
|
Enum.filter rules, fn({rule_state,_,_})->
|
|
|
|
if is_nil(state) do
|
|
|
|
state = :default
|
|
|
|
end
|
|
|
|
state == rule_state
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp matching_rules rules, string do
|
|
|
|
Enum.filter rules, fn({_,regex,_})->
|
|
|
|
Regex.match?(regex, string)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp apply_matches rules, string do
|
|
|
|
Enum.map rules, fn({_,regex,fun})->
|
|
|
|
[match] = Regex.run(regex,string, capture: :first)
|
|
|
|
{ String.length(match), match, fun }
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp longest_match_first matches do
|
|
|
|
Enum.sort_by matches, fn({len,_,_})-> len end, &>=/2
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|