diff --git a/.formatter.exs b/.formatter.exs new file mode 100644 index 0000000..d2cda26 --- /dev/null +++ b/.formatter.exs @@ -0,0 +1,4 @@ +# Used by "mix format" +[ + inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] +] diff --git a/lib/lex_luthor.ex b/lib/lex_luthor.ex index 4b3d817..f91e1ee 100644 --- a/lib/lex_luthor.ex +++ b/lib/lex_luthor.ex @@ -40,8 +40,8 @@ defmodule LexLuthor do defmacro __before_compile__(_env) do quote do - def lex string do - Runner.lex __MODULE__, @rules, string + def lex(string) do + Runner.lex(__MODULE__, @rules, string) end end end @@ -53,21 +53,24 @@ defmodule LexLuthor do - `state` the lexer state in which this rule applies. - `action` the function to execute when this rule is applied. """ - @spec defrule(Regex.t, atom, (String.t -> atom | nil | {atom, any})) :: {:ok, non_neg_integer} + @spec defrule(Regex.t(), atom, (String.t() -> atom | nil | {atom, any})) :: + {:ok, non_neg_integer} defmacro defrule(regex, state, action) do quote do - @action_counter(@action_counter + 1) - action_name = "_action_#{@action_counter}" |> String.to_atom - action = unquote(Macro.escape(action)) + @action_counter @action_counter + 1 + action_name = "_action_#{@action_counter}" |> String.to_atom() + action = unquote(Macro.escape(action)) - defaction = quote do - def unquote(Macro.escape(action_name))(e) do - unquote(action).(e) + defaction = + quote do + def unquote(Macro.escape(action_name))(e) do + unquote(action).(e) + end end - end - Module.eval_quoted __MODULE__, defaction - @rules(@rules ++ [{unquote(state), unquote(regex), action_name}]) + Module.eval_quoted(__MODULE__, defaction) + + @rules @rules ++ [{unquote(state), unquote(regex), action_name}] {:ok, Enum.count(@rules)} end end @@ -80,7 +83,7 @@ defmodule LexLuthor do """ defmacro defrule(regex, action) do quote do - defrule unquote(regex), :default, unquote(action) + defrule(unquote(regex), :default, unquote(action)) end end end diff --git a/lib/lex_luthor/runner.ex b/lib/lex_luthor/runner.ex index 2a9f897..04407db 100644 --- a/lib/lex_luthor/runner.ex +++ b/lib/lex_luthor/runner.ex @@ -15,142 +15,146 @@ defmodule LexLuthor.Runner do - `rules` an array of rules to apply to the input string. - `string` the input string to be lexed. """ - @spec lex(atom, [{atom, Regex.t, String.t}], String.t) :: {:ok, non_neg_integer} - def lex module, rules, string do - do_lex module, rules, string, %State{} + @spec lex(atom, [{atom, Regex.t(), String.t()}], String.t()) :: {:ok, non_neg_integer} + def lex(module, rules, string) do + do_lex(module, rules, string, %State{}) end - defp do_lex module, rules, string, lexer do + defp do_lex(module, rules, string, lexer) do [current_state | _rest] = lexer.states # Find the longest matching rule. This could # probably be made a whole lot less enumeratey. - matches = rules + matches = + rules |> rules_for_state(current_state) |> matching_rules(string) |> apply_matches(string) |> longest_match_first - process_matches module, rules, matches, string, lexer, Enum.count(matches) + process_matches(module, rules, matches, string, lexer, Enum.count(matches)) end defp process_matches(_, _, _, string, _, count) when count == 0 do - {:error, "String not in language: #{inspect string}"} + {:error, "String not in language: #{inspect(string)}"} end defp process_matches(module, rules, matches, string, lexer, count) when count > 0 do - match = Enum.at matches, 0 + match = Enum.at(matches, 0) # Execute the matches' action. {len, value, fun} = match result = apply(module, fun, [value]) - lexer = process_result result, lexer + lexer = process_result(result, lexer) case lexer do - { :error, _ } -> + {:error, _} -> lexer + _ -> + fragment = String.slice(string, 0, len) + line = lexer.line + line_number_incrementor(fragment) + column = column_number(lexer, fragment) - fragment = String.slice string, 0, len - line = lexer.line + line_number_incrementor fragment - column = column_number lexer, fragment + lexer = Map.merge(lexer, %{pos: lexer.pos + len, line: line, column: column}) - lexer = Map.merge(lexer, %{pos: lexer.pos + len, - line: line, - column: column}) - - # Are we at the end of the string? - if String.length(string) == len do - { :ok, Enum.reverse lexer.tokens } - else - { _ , new_string } = String.split_at string, len - do_lex module, rules, new_string, lexer + case String.split_at(string, len) do + {_, ""} -> {:ok, Enum.reverse(lexer.tokens)} + {_, new_string} -> do_lex(module, rules, new_string, lexer) end end end - defp column_number lexer, match do - case Regex.match? ~r/[\r\n]/, match do + defp column_number(lexer, match) do + case Regex.match?(~r/[\r\n]/, match) do true -> - len = match |> split_on_newlines |> List.last |> String.length + len = match |> split_on_newlines |> List.last() |> String.length() + case len do 0 -> 1 _ -> len end + false -> - lexer.column + String.length match + lexer.column + String.length(match) end end - defp line_number_incrementor match do - (match |> split_on_newlines |> Enum.count) - 1 + defp line_number_incrementor(match) do + (match |> split_on_newlines |> Enum.count()) - 1 end - defp split_on_newlines string do + defp split_on_newlines(string) do string |> String.split(~r{(\r|\n|\r\n)}) end defp process_result(result, lexer) when is_nil(result) do - pop_state lexer + pop_state(lexer) end defp process_result(result, lexer) when is_atom(result) do - push_state lexer, result + push_state(lexer, result) end defp process_result(result, lexer) when is_tuple(result) do - push_token lexer, result + push_token(lexer, result) end - defp process_result result, _ do - {:error, "Invalid result from action: #{inspect result}"} + defp process_result(result, _) do + {:error, "Invalid result from action: #{inspect(result)}"} end - defp push_token lexer, token do + defp push_token(lexer, token) do {tname, tvalue} = token - token = %Token{pos: lexer.pos, - line: lexer.line, - column: lexer.column, - name: tname, - value: tvalue} - Map.merge lexer, %{tokens: [token | lexer.tokens ]} + + token = %Token{ + pos: lexer.pos, + line: lexer.line, + column: lexer.column, + name: tname, + value: tvalue + } + + Map.merge(lexer, %{tokens: [token | lexer.tokens]}) end - defp push_state lexer, state do - Map.merge lexer, %{states: [state | lexer.states ]} + defp push_state(lexer, state) do + Map.merge(lexer, %{states: [state | lexer.states]}) end - defp pop_state lexer do - [ _ | states ] = lexer.states - Map.merge lexer, %{states: states} + defp pop_state(lexer) do + [_ | states] = lexer.states + Map.merge(lexer, %{states: states}) end - defp rules_for_state rules, state do - Enum.filter rules, fn({rule_state,_,_}) -> - state = if is_nil(state) do - :default - else - state - end + defp rules_for_state(rules, state) do + Enum.filter(rules, fn {rule_state, _, _} -> + state = + if is_nil(state) do + :default + else + state + end + state == rule_state - end + end) end - defp matching_rules rules, string do - Enum.filter rules, fn({_,regex,_}) -> + defp matching_rules(rules, string) do + Enum.filter(rules, fn {_, regex, _} -> Regex.match?(regex, string) - end + end) end - defp apply_matches rules, string do - Enum.map rules, fn({_,regex,fun}) -> - [match] = Regex.run(regex,string, capture: :first) - { String.length(match), match, fun } - end + defp apply_matches(rules, string) do + Enum.map(rules, fn {_, regex, fun} -> + [match] = Regex.run(regex, string, capture: :first) + {String.length(match), match, fun} + end) end - defp longest_match_first matches do - Enum.sort_by matches, fn({len,_,_}) -> len end, &>=/2 + defp longest_match_first(matches) do + Enum.sort_by(matches, fn {len, _, _} -> len end, &>=/2) end end diff --git a/mix.exs b/mix.exs index bccc7c3..e44184b 100644 --- a/mix.exs +++ b/mix.exs @@ -2,18 +2,21 @@ defmodule LexLuthor.Mixfile do use Mix.Project def project do - [app: :lex_luthor, - version: "0.1.1", - elixir: "~> 1.0", - description: "LexLuthor is a Lexer in Elixir (say that 10 times fast) which uses macros to generate a reusable lexers. Good times.", - source_url: "https://github.com/jamesotron/lex_luthor", - preferred_cli_env: [inch: :docs], - package: [ - contributors: ["James Harton"], - licenses: ["MIT"], - links: %{"Source" => "https://github.com/jamesotron/lex_luthor"} - ], - deps: deps()] + [ + app: :lex_luthor, + version: "0.1.1", + elixir: "~> 1.0", + description: + "LexLuthor is a Lexer in Elixir (say that 10 times fast) which uses macros to generate a reusable lexers. Good times.", + source_url: "https://github.com/jamesotron/lex_luthor", + preferred_cli_env: [inch: :docs], + package: [ + contributors: ["James Harton"], + licenses: ["MIT"], + links: %{"Source" => "https://github.com/jamesotron/lex_luthor"} + ], + deps: deps() + ] end # Configuration for the OTP application @@ -35,8 +38,7 @@ defmodule LexLuthor.Mixfile do defp deps do [ {:ex_doc, ">= 0.0.0", only: :dev}, - {:inch_ex, only: :docs}, - {:credo, only: ~w(dev test)a} + {:credo, "~> 1.5", only: ~w(dev test)a} ] end end diff --git a/mix.lock b/mix.lock index afeba33..2d4593d 100644 --- a/mix.lock +++ b/mix.lock @@ -1,6 +1,10 @@ -%{"bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], []}, - "credo": {:hex, :credo, "0.7.3", "9827ab04002186af1aec014a811839a06f72aaae6cd5eed3919b248c8767dbf3", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, optional: false]}]}, - "earmark": {:hex, :earmark, "1.0.1", "2c2cd903bfdc3de3f189bd9a8d4569a075b88a8981ded9a0d95672f6e2b63141", [:mix], []}, - "ex_doc": {:hex, :ex_doc, "0.13.0", "aa2f8fe4c6136a2f7cfc0a7e06805f82530e91df00e2bff4b4362002b43ada65", [:mix], [{:earmark, "~> 1.0", [hex: :earmark, optional: false]}]}, - "inch_ex": {:hex, :inch_ex, "0.5.6", "418357418a553baa6d04eccd1b44171936817db61f4c0840112b420b8e378e67", [:mix], [{:poison, "~> 1.5 or ~> 2.0 or ~> 3.0", [hex: :poison, optional: false]}]}, - "poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], []}} +%{ + "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm", "7af5c7e09fe1d40f76c8e4f9dd2be7cebd83909f31fee7cd0e9eadc567da8353"}, + "credo": {:hex, :credo, "1.5.4", "9914180105b438e378e94a844ec3a5088ae5875626fc945b7c1462b41afc3198", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2.8", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "cf51af45eadc0a3f39ba13b56fdac415c91b34f7b7533a13dc13550277141bc4"}, + "earmark": {:hex, :earmark, "1.0.1", "2c2cd903bfdc3de3f189bd9a8d4569a075b88a8981ded9a0d95672f6e2b63141", [:mix], [], "hexpm", "db7b13d74a9edc54d3681762154d164d4a661cd27673cca80760344449877664"}, + "ex_doc": {:hex, :ex_doc, "0.13.0", "aa2f8fe4c6136a2f7cfc0a7e06805f82530e91df00e2bff4b4362002b43ada65", [:mix], [{:earmark, "~> 1.0", [hex: :earmark, repo: "hexpm", optional: false]}], "hexpm", "4b40cd154c2660d795b88f73c61b5e3679abe7215e8c20eb9040101cc4819d12"}, + "file_system": {:hex, :file_system, "0.2.10", "fb082005a9cd1711c05b5248710f8826b02d7d1784e7c3451f9c1231d4fc162d", [:mix], [], "hexpm", "41195edbfb562a593726eda3b3e8b103a309b733ad25f3d642ba49696bf715dc"}, + "inch_ex": {:hex, :inch_ex, "0.5.6", "418357418a553baa6d04eccd1b44171936817db61f4c0840112b420b8e378e67", [:mix], [{:poison, "~> 1.5 or ~> 2.0 or ~> 3.0", [hex: :poison, repo: "hexpm", optional: false]}], "hexpm", "7123ca0450686a61416a06cd38e26af18fd0f8c1cff5214770a957c6e0724338"}, + "jason": {:hex, :jason, "1.2.2", "ba43e3f2709fd1aa1dce90aaabfd039d000469c05c56f0b8e31978e03fa39052", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "18a228f5f0058ee183f29f9eae0805c6e59d61c3b006760668d8d18ff0d12179"}, + "poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], [], "hexpm", "fec8660eb7733ee4117b85f55799fd3833eb769a6df71ccf8903e8dc5447cfce"}, +} diff --git a/test/acceptance_test.exs b/test/acceptance_test.exs index cc3efa5..b256338 100644 --- a/test/acceptance_test.exs +++ b/test/acceptance_test.exs @@ -3,50 +3,50 @@ defmodule AcceptanceTest do use ExUnit.Case, async: true @tests [ - { "''", generate_token(:simple_string, "") }, - { "'hello'", generate_token(:simple_string, "hello") }, - { "\"\"", generate_token(:string, "") }, - { "\"hello\"", generate_token(:string, "hello") }, - { "0", generate_token(:integer, 0) }, - { "123", generate_token(:integer, 123) }, - { "0x123", generate_token(:integer, 291) }, - { "0b1011", generate_token(:integer, 11) }, - { "0.0", generate_token(:float, 0.0) }, - { "123.456", generate_token(:float, 123.456) } + {"''", generate_token(:simple_string, "")}, + {"'hello'", generate_token(:simple_string, "hello")}, + {"\"\"", generate_token(:string, "")}, + {"\"hello\"", generate_token(:string, "hello")}, + {"0", generate_token(:integer, 0)}, + {"123", generate_token(:integer, 123)}, + {"0x123", generate_token(:integer, 291)}, + {"0b1011", generate_token(:integer, 11)}, + {"0.0", generate_token(:float, 0.0)}, + {"123.456", generate_token(:float, 123.456)} ] - Enum.each @tests, fn - { source, token } -> - tname = Map.get token, :name - tvalue = Map.get token, :value + Enum.each(@tests, fn + {source, token} -> + tname = Map.get(token, :name) + tvalue = Map.get(token, :value) test "String #{inspect(source)} results in token #{inspect(token)}" do - result = ExampleLexer.lex unquote(source) + result = ExampleLexer.lex(unquote(source)) {ok, result} = result assert ok == :ok result = Enum.at(result, 0) - rname = Map.get result, :name - rvalue = Map.get result, :value - assert rname == unquote(tname) + rname = Map.get(result, :name) + rvalue = Map.get(result, :value) + assert rname == unquote(tname) assert rvalue == unquote(tvalue) end - end + end) - test "String #{inspect "'foo'\n'bar'"} has correct line numbers" do - {ok, tokens} = ExampleLexer.lex "'foo'\n'bar'" + test "String #{inspect("'foo'\n'bar'")} has correct line numbers" do + {ok, tokens} = ExampleLexer.lex("'foo'\n'bar'") assert ok == :ok - token = List.last tokens + token = List.last(tokens) assert token.line == 2 end - test "String #{inspect "'foo'\n'bar' 'baz'"} has correct column numbers" do - {ok, tokens} = ExampleLexer.lex "'foo'\n'bar' 'baz'" + test "String #{inspect("'foo'\n'bar' 'baz'")} has correct column numbers" do + {ok, tokens} = ExampleLexer.lex("'foo'\n'bar' 'baz'") assert ok == :ok - token = List.last tokens + token = List.last(tokens) assert token.value == "baz" assert token.column == 8 end diff --git a/test/rejection_test.exs b/test/rejection_test.exs index 40a075f..a95ec59 100644 --- a/test/rejection_test.exs +++ b/test/rejection_test.exs @@ -2,14 +2,14 @@ defmodule RejectionTest do use ExUnit.Case, async: true test "string not in language fails" do - {status, message} = ExampleLexer.lex "{}" - assert status == :error + {status, message} = ExampleLexer.lex("{}") + assert status == :error assert message == "String not in language: \"{}\"" end test "bogus action" do - {status, message} = ExampleLexer.lex "BOGUS_ACTION" - assert status == :error + {status, message} = ExampleLexer.lex("BOGUS_ACTION") + assert status == :error assert message == "Invalid result from action: \"WAT\"" end end diff --git a/test/test_helper.exs b/test/test_helper.exs index 980d433..d816493 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -8,42 +8,44 @@ defmodule ExampleLexer do use LexLuthor # single tick strings - defrule ~r/^''/, fn(_) -> { :simple_string, "" } end - defrule ~r/^'/, fn(_) -> :simple_string end - defrule ~r/^[^']+/, :simple_string, fn(e) -> { :simple_string, e } end - defrule ~r/^'/, :simple_string, fn(_) -> nil end + defrule(~r/^''/, fn _ -> {:simple_string, ""} end) + defrule(~r/^'/, fn _ -> :simple_string end) + defrule(~r/^[^']+/, :simple_string, fn e -> {:simple_string, e} end) + defrule(~r/^'/, :simple_string, fn _ -> nil end) # double tick strings - defrule ~r/^""/, fn(_) -> { :string, "" } end - defrule ~r/^"/, fn(_) -> :string end - defrule ~R/^#{/, :string, fn(_) -> :default end - defrule ~R/^}/, :default, fn(_) -> nil end - defrule ~R/^[^("|#{)]+/, :string, fn(e) -> { :string, e } end - defrule ~r/^"/, :string, fn(_) -> nil end + defrule(~r/^""/, fn _ -> {:string, ""} end) + defrule(~r/^"/, fn _ -> :string end) + defrule(~R/^#{/, :string, fn _ -> :default end) + defrule(~R/^}/, :default, fn _ -> nil end) + defrule(~R/^[^("|#{)]+/, :string, fn e -> {:string, e} end) + defrule(~r/^"/, :string, fn _ -> nil end) # floats - defrule ~r/^[0-9]+\.[0-9]+/, fn(e) -> { :float, String.to_float(e) } end + defrule(~r/^[0-9]+\.[0-9]+/, fn e -> {:float, String.to_float(e)} end) # integers - defrule ~r/^0x[0-9a-fA-F]+/, fn(e) -> - [ _ | i ] = String.split e, "x" - { :integer, String.to_integer(Enum.at(i, 0), 16) } - end - defrule ~r/^0b[01]+/, fn(e) -> - [ _ | i ] = String.split e, "b" - { :integer, String.to_integer(Enum.at(i, 0), 2) } - end - defrule ~r/^[1-9][0-9]*/, fn(e) -> { :integer, String.to_integer(e) } end - defrule ~r/^0/, fn(_) -> { :integer, 0 } end + defrule(~r/^0x[0-9a-fA-F]+/, fn e -> + [_ | i] = String.split(e, "x") + {:integer, String.to_integer(Enum.at(i, 0), 16)} + end) + + defrule(~r/^0b[01]+/, fn e -> + [_ | i] = String.split(e, "b") + {:integer, String.to_integer(Enum.at(i, 0), 2)} + end) + + defrule(~r/^[1-9][0-9]*/, fn e -> {:integer, String.to_integer(e)} end) + defrule(~r/^0/, fn _ -> {:integer, 0} end) # white space - defrule ~r/^[ \t]+/, fn(e) -> { :ws, String.length(e) } end - defrule ~r/^\r\n/, fn(_) -> { :nl, 1 } end - defrule ~r/^\r/, fn(_) -> { :nl, 1 } end - defrule ~r/^\n/, fn(_) -> { :nl, 1 } end + defrule(~r/^[ \t]+/, fn e -> {:ws, String.length(e)} end) + defrule(~r/^\r\n/, fn _ -> {:nl, 1} end) + defrule(~r/^\r/, fn _ -> {:nl, 1} end) + defrule(~r/^\n/, fn _ -> {:nl, 1} end) # bogus action - defrule ~r/^BOGUS_ACTION/, fn(_) -> "WAT" end + defrule(~r/^BOGUS_ACTION/, fn _ -> "WAT" end) end ExUnit.start()