Skip to content

Commit f9816ec

Browse files
authored
Fix Floki.text/2 when document contains a "PI" tag (#696)
This is a fix for when a processing instruction is inside the document tree (like a XML declaration). Fixes #695
1 parent 561f135 commit f9816ec

3 files changed

Lines changed: 13 additions & 0 deletions

File tree

lib/floki/deep_text.ex

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ defmodule Floki.DeepText do
4343

4444
defp get_text({"script", _, _}, acc, _, _, false, _), do: acc
4545
defp get_text({"style", _, _}, acc, _, _, _, false), do: acc
46+
defp get_text({:pi, _, _}, acc, _, _, _, _), do: acc
4647

4748
defp get_text({"input", attrs, _}, acc, _, true, _, _) do
4849
[acc, Floki.TextExtractor.extract_input_value(attrs)]

test/floki/deep_text_test.exs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,4 +106,10 @@ defmodule Floki.DeepTextTest do
106106

107107
assert Floki.DeepText.get(nodes) == "foo\nbaz"
108108
end
109+
110+
test "HTML that contains a processing instruction (<?xml ... ?>)" do
111+
nodes = [{:pi, "", [{"indica", "indica"}]}, "foo"]
112+
113+
assert Floki.DeepText.get(nodes) == "foo"
114+
end
109115
end

test/floki/flat_text_test.exs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,4 +123,10 @@ defmodule Floki.FlatTextTest do
123123
assert Floki.FlatText.get([]) == ""
124124
assert Floki.FlatText.get({"div", [], []}) == ""
125125
end
126+
127+
test "HTML that contains a processing instruction (<?xml ... ?>)" do
128+
nodes = [{:pi, "", [{"indica", "indica"}]}, "foo"]
129+
130+
assert Floki.FlatText.get(nodes) == "foo"
131+
end
126132
end

0 commit comments

Comments
 (0)