# These tests are for the "special" word boundary assertions. That is, # \b{start}, \b{end}, \b{start-half}, \b{end-half}. These are specialty # assertions for more niche use cases, but hitting those cases without these # assertions is difficult. For example, \b{start-half} and \b{end-half} are # used to implement the -w/--word-regexp flag in a grep program. # Tests for (?-u:\b{start}) [[test]] name = "word-start-ascii-010" regex = '\b{start}' haystack = "a" matches = [[0, 0]] unicode = false [[test]] name = "word-start-ascii-020" regex = '\b{start}' haystack = "a " matches = [[0, 0]] unicode = false [[test]] name = "word-start-ascii-030" regex = '\b{start}' haystack = " a " matches = [[1, 1]] unicode = false [[test]] name = "word-start-ascii-040" regex = '\b{start}' haystack = "" matches = [] unicode = false [[test]] name = "word-start-ascii-050" regex = '\b{start}' haystack = "ab" matches = [[0, 0]] unicode = false [[test]] name = "word-start-ascii-060" regex = '\b{start}' haystack = "𝛃" matches = [] unicode = false [[test]] name = "word-start-ascii-060-bounds" regex = '\b{start}' haystack = "𝛃" bounds = [2, 3] matches = [] unicode = false [[test]] name = "word-start-ascii-070" regex = '\b{start}' haystack = " 𝛃 " matches = [] unicode = false [[test]] name = "word-start-ascii-080" regex = '\b{start}' haystack = "𝛃𐆀" matches = [] unicode = false [[test]] name = "word-start-ascii-090" regex = '\b{start}' haystack = "𝛃b" matches = [[4, 4]] unicode = false [[test]] name = "word-start-ascii-110" regex = '\b{start}' haystack = "b𝛃" matches = [[0, 0]] unicode = false # Tests for (?-u:\b{end}) [[test]] name = "word-end-ascii-010" regex = '\b{end}' haystack = "a" matches = [[1, 1]] unicode = false [[test]] name = "word-end-ascii-020" regex = '\b{end}' haystack = "a " matches = [[1, 1]] unicode = false [[test]] name = "word-end-ascii-030" regex = '\b{end}' haystack = " a " matches = [[2, 2]] unicode = false [[test]] name = "word-end-ascii-040" regex = '\b{end}' haystack = "" matches = [] unicode = false [[test]] name = "word-end-ascii-050" regex = '\b{end}' haystack = "ab" matches = [[2, 2]] unicode = false [[test]] name = "word-end-ascii-060" regex = '\b{end}' haystack = "𝛃" matches = [] unicode = false [[test]] name = "word-end-ascii-060-bounds" regex = '\b{end}' haystack = "𝛃" bounds = [2, 3] matches = [] unicode = false [[test]] name = "word-end-ascii-070" regex = '\b{end}' haystack = " 𝛃 " matches = [] unicode = false [[test]] name = "word-end-ascii-080" regex = '\b{end}' haystack = "𝛃𐆀" matches = [] unicode = false [[test]] name = "word-end-ascii-090" regex = '\b{end}' haystack = "𝛃b" matches = [[5, 5]] unicode = false [[test]] name = "word-end-ascii-110" regex = '\b{end}' haystack = "b𝛃" matches = [[1, 1]] unicode = false # Tests for \b{start} [[test]] name = "word-start-unicode-010" regex = '\b{start}' haystack = "a" matches = [[0, 0]] unicode = true [[test]] name = "word-start-unicode-020" regex = '\b{start}' haystack = "a " matches = [[0, 0]] unicode = true [[test]] name = "word-start-unicode-030" regex = '\b{start}' haystack = " a " matches = [[1, 1]] unicode = true [[test]] name = "word-start-unicode-040" regex = '\b{start}' haystack = "" matches = [] unicode = true [[test]] name = "word-start-unicode-050" regex = '\b{start}' haystack = "ab" matches = [[0, 0]] unicode = true [[test]] name = "word-start-unicode-060" regex = '\b{start}' haystack = "𝛃" matches = [[0, 0]] unicode = true [[test]] name = "word-start-unicode-060-bounds" regex = '\b{start}' haystack = "𝛃" bounds = [2, 3] matches = [] unicode = true [[test]] name = "word-start-unicode-070" regex = '\b{start}' haystack = " 𝛃 " matches = [[1, 1]] unicode = true [[test]] name = "word-start-unicode-080" regex = '\b{start}' haystack = "𝛃𐆀" matches = [[0, 0]] unicode = true [[test]] name = "word-start-unicode-090" regex = '\b{start}' haystack = "𝛃b" matches = [[0, 0]] unicode = true [[test]] name = "word-start-unicode-110" regex = '\b{start}' haystack = "b𝛃" matches = [[0, 0]] unicode = true # Tests for \b{end} [[test]] name = "word-end-unicode-010" regex = '\b{end}' haystack = "a" matches = [[1, 1]] unicode = true [[test]] name = "word-end-unicode-020" regex = '\b{end}' haystack = "a " matches = [[1, 1]] unicode = true [[test]] name = "word-end-unicode-030" regex = '\b{end}' haystack = " a " matches = [[2, 2]] unicode = true [[test]] name = "word-end-unicode-040" regex = '\b{end}' haystack = "" matches = [] unicode = true [[test]] name = "word-end-unicode-050" regex = '\b{end}' haystack = "ab" matches = [[2, 2]] unicode = true [[test]] name = "word-end-unicode-060" regex = '\b{end}' haystack = "𝛃" matches = [[4, 4]] unicode = true [[test]] name = "word-end-unicode-060-bounds" regex = '\b{end}' haystack = "𝛃" bounds = [2, 3] matches = [] unicode = true [[test]] name = "word-end-unicode-070" regex = '\b{end}' haystack = " 𝛃 " matches = [[5, 5]] unicode = true [[test]] name = "word-end-unicode-080" regex = '\b{end}' haystack = "𝛃𐆀" matches = [[4, 4]] unicode = true [[test]] name = "word-end-unicode-090" regex = '\b{end}' haystack = "𝛃b" matches = [[5, 5]] unicode = true [[test]] name = "word-end-unicode-110" regex = '\b{end}' haystack = "b𝛃" matches = [[5, 5]] unicode = true # Tests for (?-u:\b{start-half}) [[test]] name = "word-start-half-ascii-010" regex = '\b{start-half}' haystack = "a" matches = [[0, 0]] unicode = false [[test]] name = "word-start-half-ascii-020" regex = '\b{start-half}' haystack = "a " matches = [[0, 0], [2, 2]] unicode = false [[test]] name = "word-start-half-ascii-030" regex = '\b{start-half}' haystack = " a " matches = [[0, 0], [1, 1], [3, 3]] unicode = false [[test]] name = "word-start-half-ascii-040" regex = '\b{start-half}' haystack = "" matches = [[0, 0]] unicode = false [[test]] name = "word-start-half-ascii-050" regex = '\b{start-half}' haystack = "ab" matches = [[0, 0]] unicode = false [[test]] name = "word-start-half-ascii-060" regex = '\b{start-half}' haystack = "𝛃" matches = [[0, 0], [4, 4]] unicode = false [[test]] name = "word-start-half-ascii-060-noutf8" regex = '\b{start-half}' haystack = "𝛃" matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] unicode = false utf8 = false [[test]] name = "word-start-half-ascii-060-bounds" regex = '\b{start-half}' haystack = "𝛃" bounds = [2, 3] matches = [] unicode = false [[test]] name = "word-start-half-ascii-070" regex = '\b{start-half}' haystack = " 𝛃 " matches = [[0, 0], [1, 1], [5, 5], [6, 6]] unicode = false [[test]] name = "word-start-half-ascii-080" regex = '\b{start-half}' haystack = "𝛃𐆀" matches = [[0, 0], [4, 4], [8, 8]] unicode = false [[test]] name = "word-start-half-ascii-090" regex = '\b{start-half}' haystack = "𝛃b" matches = [[0, 0], [4, 4]] unicode = false [[test]] name = "word-start-half-ascii-110" regex = '\b{start-half}' haystack = "b𝛃" matches = [[0, 0], [5, 5]] unicode = false # Tests for (?-u:\b{end-half}) [[test]] name = "word-end-half-ascii-010" regex = '\b{end-half}' haystack = "a" matches = [[1, 1]] unicode = false [[test]] name = "word-end-half-ascii-020" regex = '\b{end-half}' haystack = "a " matches = [[1, 1], [2, 2]] unicode = false [[test]] name = "word-end-half-ascii-030" regex = '\b{end-half}' haystack = " a " matches = [[0, 0], [2, 2], [3, 3]] unicode = false [[test]] name = "word-end-half-ascii-040" regex = '\b{end-half}' haystack = "" matches = [[0, 0]] unicode = false [[test]] name = "word-end-half-ascii-050" regex = '\b{end-half}' haystack = "ab" matches = [[2, 2]] unicode = false [[test]] name = "word-end-half-ascii-060" regex = '\b{end-half}' haystack = "𝛃" matches = [[0, 0], [4, 4]] unicode = false [[test]] name = "word-end-half-ascii-060-bounds" regex = '\b{end-half}' haystack = "𝛃" bounds = [2, 3] matches = [] unicode = false [[test]] name = "word-end-half-ascii-070" regex = '\b{end-half}' haystack = " 𝛃 " matches = [[0, 0], [1, 1], [5, 5], [6, 6]] unicode = false [[test]] name = "word-end-half-ascii-080" regex = '\b{end-half}' haystack = "𝛃𐆀" matches = [[0, 0], [4, 4], [8, 8]] unicode = false [[test]] name = "word-end-half-ascii-090" regex = '\b{end-half}' haystack = "𝛃b" matches = [[0, 0], [5, 5]] unicode = false [[test]] name = "word-end-half-ascii-110" regex = '\b{end-half}' haystack = "b𝛃" matches = [[1, 1], [5, 5]] unicode = false # Tests for \b{start-half} [[test]] name = "word-start-half-unicode-010" regex = '\b{start-half}' haystack = "a" matches = [[0, 0]] unicode = true [[test]] name = "word-start-half-unicode-020" regex = '\b{start-half}' haystack = "a " matches = [[0, 0], [2, 2]] unicode = true [[test]] name = "word-start-half-unicode-030" regex = '\b{start-half}' haystack = " a " matches = [[0, 0], [1, 1], [3, 3]] unicode = true [[test]] name = "word-start-half-unicode-040" regex = '\b{start-half}' haystack = "" matches = [[0, 0]] unicode = true [[test]] name = "word-start-half-unicode-050" regex = '\b{start-half}' haystack = "ab" matches = [[0, 0]] unicode = true [[test]] name = "word-start-half-unicode-060" regex = '\b{start-half}' haystack = "𝛃" matches = [[0, 0]] unicode = true [[test]] name = "word-start-half-unicode-060-bounds" regex = '\b{start-half}' haystack = "𝛃" bounds = [2, 3] matches = [] unicode = true [[test]] name = "word-start-half-unicode-070" regex = '\b{start-half}' haystack = " 𝛃 " matches = [[0, 0], [1, 1], [6, 6]] unicode = true [[test]] name = "word-start-half-unicode-080" regex = '\b{start-half}' haystack = "𝛃𐆀" matches = [[0, 0], [8, 8]] unicode = true [[test]] name = "word-start-half-unicode-090" regex = '\b{start-half}' haystack = "𝛃b" matches = [[0, 0]] unicode = true [[test]] name = "word-start-half-unicode-110" regex = '\b{start-half}' haystack = "b𝛃" matches = [[0, 0]] unicode = true # Tests for \b{end-half} [[test]] name = "word-end-half-unicode-010" regex = '\b{end-half}' haystack = "a" matches = [[1, 1]] unicode = true [[test]] name = "word-end-half-unicode-020" regex = '\b{end-half}' haystack = "a " matches = [[1, 1], [2, 2]] unicode = true [[test]] name = "word-end-half-unicode-030" regex = '\b{end-half}' haystack = " a " matches = [[0, 0], [2, 2], [3, 3]] unicode = true [[test]] name = "word-end-half-unicode-040" regex = '\b{end-half}' haystack = "" matches = [[0, 0]] unicode = true [[test]] name = "word-end-half-unicode-050" regex = '\b{end-half}' haystack = "ab" matches = [[2, 2]] unicode = true [[test]] name = "word-end-half-unicode-060" regex = '\b{end-half}' haystack = "𝛃" matches = [[4, 4]] unicode = true [[test]] name = "word-end-half-unicode-060-bounds" regex = '\b{end-half}' haystack = "𝛃" bounds = [2, 3] matches = [] unicode = true [[test]] name = "word-end-half-unicode-070" regex = '\b{end-half}' haystack = " 𝛃 " matches = [[0, 0], [5, 5], [6, 6]] unicode = true [[test]] name = "word-end-half-unicode-080" regex = '\b{end-half}' haystack = "𝛃𐆀" matches = [[4, 4], [8, 8]] unicode = true [[test]] name = "word-end-half-unicode-090" regex = '\b{end-half}' haystack = "𝛃b" matches = [[5, 5]] unicode = true [[test]] name = "word-end-half-unicode-110" regex = '\b{end-half}' haystack = "b𝛃" matches = [[5, 5]] unicode = true # Specialty tests. # Since \r is special cased in the start state computation (to deal with CRLF # mode), this test ensures that the correct start state is computed when the # pattern starts with a half word boundary assertion. [[test]] name = "word-start-half-ascii-carriage" regex = '\b{start-half}[a-z]+' haystack = 'ABC\rabc' matches = [[4, 7]] bounds = [4, 7] unescape = true # Since \n is also special cased in the start state computation, this test # ensures that the correct start state is computed when the pattern starts with # a half word boundary assertion. [[test]] name = "word-start-half-ascii-linefeed" regex = '\b{start-half}[a-z]+' haystack = 'ABC\nabc' matches = [[4, 7]] bounds = [4, 7] unescape = true # Like the carriage return test above, but with a custom line terminator. [[test]] name = "word-start-half-ascii-customlineterm" regex = '\b{start-half}[a-z]+' haystack = 'ABC!abc' matches = [[4, 7]] bounds = [4, 7] unescape = true line-terminator = '!'