test = require 'tape'
ROOT = "../src"
if process.env.ROOT
ROOT = process.env.ROOT
matching = require (ROOT + '/matching')
adjacency_graphs = require (ROOT + '/adjacency_graphs')
# takes a pattern and list of prefixes/suffixes
# returns a bunch of variants of that pattern embedded
# with each possible prefix/suffix combination, including no prefix/suffix
# returns a list of triplets [variant, i, j] where [i,j] is the start/end of the pattern, inclusive
genpws = (pattern, prefixes, suffixes) ->
prefixes = prefixes.slice()
suffixes = suffixes.slice()
for lst in [prefixes, suffixes]
lst.unshift '' if '' not in lst
result = []
for prefix in prefixes
for suffix in suffixes
[i, j] = [prefix.length, prefix.length + pattern.length - 1]
result.push [prefix + pattern + suffix, i, j]
result
check_matches = (prefix, t, matches, pattern_names, patterns, ijs, props) ->
if typeof pattern_names is "string"
# shortcut: if checking for a list of the same type of patterns,
# allow passing a string 'pat' instead of array ['pat', 'pat', ...]
pattern_names = (pattern_names for i in [0...patterns.length])
is_equal_len_args = pattern_names.length == patterns.length == ijs.length
for prop, lst of props
# props is structured as: keys that points to list of values
is_equal_len_args = is_equal_len_args and (lst.length == patterns.length)
throw 'unequal argument lists to check_matches' unless is_equal_len_args
msg = "#{prefix}: matches.length == #{patterns.length}"
t.equal matches.length, patterns.length, msg
for k in [0...patterns.length]
match = matches[k]
pattern_name = pattern_names[k]
pattern = patterns[k]
[i, j] = ijs[k]
msg = "#{prefix}: matches[#{k}].pattern == '#{pattern_name}'"
t.equal match.pattern, pattern_name, msg
msg = "#{prefix}: matches[#{k}] should have [i, j] of [#{i}, #{j}]"
t.deepEqual [match.i, match.j], [i, j], msg
msg = "#{prefix}: matches[#{k}].token == '#{pattern}'"
t.equal match.token, pattern, msg
for prop_name, prop_list of props
prop_msg = prop_list[k]
prop_msg = "'#{prop_msg}'" if typeof(prop_msg) == 'string'
msg = "#{prefix}: matches[#{k}].#{prop_name} == #{prop_msg}"
t.deepEqual match[prop_name], prop_list[k], msg
test 'matching utils', (t) ->
return t.end() if matching.no_util
t.ok matching.empty([]), ".empty returns true for an empty array"
t.ok matching.empty({}), ".empty returns true for an empty object"
for obj in [
[1]
[1, 2]
[[]]
{a: 1}
{0: {}}
]
t.notOk matching.empty(obj), ".empty returns false for non-empty objects and arrays"
lst = []
matching.extend lst, []
t.deepEqual lst, [], "extending an empty list with an empty list leaves it empty"
matching.extend lst, [1]
t.deepEqual lst, [1], "extending an empty list with another makes it equal to the other"
matching.extend lst, [2, 3]
t.deepEqual lst, [1, 2, 3], "extending a list with another adds each of the other's elements"
[lst1, lst2] = [[1], [2]]
matching.extend lst1, lst2
t.deepEqual lst2, [2], "extending a list by another doesn't affect the other"
chr_map = {a: 'A', b: 'B'}
for [string, map, result] in [
['a', chr_map, 'A']
['c', chr_map, 'c']
['ab', chr_map, 'AB']
['abc', chr_map, 'ABc']
['aa', chr_map, 'AA']
['abab', chr_map, 'ABAB']
['', chr_map, '']
['', {}, '']
['abc', {}, 'abc']
]
msg = "translates '#{string}' to '#{result}' with provided charmap"
t.equal matching.translate(string, map), result, msg
for [[dividend, divisor], remainder] in [
[[ 0, 1], 0]
[[ 1, 1], 0]
[[-1, 1], 0]
[[ 5, 5], 0]
[[ 3, 5], 3]
[[-1, 5], 4]
[[-5, 5], 0]
[[ 6, 5], 1]
]
msg = "mod(#{dividend}, #{divisor}) == #{remainder}"
t.equal matching.mod(dividend, divisor), remainder, msg
t.deepEqual matching.sorted([]), [], "sorting an empty list leaves it empty"
[m1, m2, m3, m4, m5, m6] = [
{i: 5, j: 5}
{i: 6, j: 7}
{i: 2, j: 5}
{i: 0, j: 0}
{i: 2, j: 3}
{i: 0, j: 3}
]
msg = "matches are sorted on i index primary, j secondary"
t.deepEqual matching.sorted([m1, m2, m3, m4, m5, m6]), [m4, m6, m5, m3, m1, m2], msg
t.end()
test 'dictionary matching', (t) ->
dm = (pw) -> matching.dictionary_match pw, test_dicts
test_dicts =
d1:
motherboard: 1
mother: 2
board: 3
abcd: 4
cdef: 5
d2:
'z': 1
'8': 2
'99': 3
'$': 4
'asdf1234&*': 5
matches = dm 'motherboard'
patterns = ['mother', 'motherboard', 'board']
msg = "matches words that contain other words"
check_matches msg, t, matches, 'dictionary', patterns, [[0,5], [0,10], [6,10]],
matched_word: ['mother', 'motherboard', 'board']
rank: [2, 1, 3]
dictionary_name: ['d1', 'd1', 'd1']
matches = dm 'abcdef'
patterns = ['abcd', 'cdef']
msg = "matches multiple words when they overlap"
check_matches msg, t, matches, 'dictionary', patterns, [[0,3], [2,5]],
matched_word: ['abcd', 'cdef']
rank: [4, 5]
dictionary_name: ['d1', 'd1']
matches = dm 'BoaRdZ'
patterns = ['BoaRd', 'Z']
msg = "ignores uppercasing"
check_matches msg, t, matches, 'dictionary', patterns, [[0,4], [5,5]],
matched_word: ['board', 'z']
rank: [3, 1]
dictionary_name: ['d1', 'd2']
prefixes = ['q', '%%']
suffixes = ['%', 'qq']
word = 'asdf1234&*'
for [password, i, j] in genpws word, prefixes, suffixes
matches = dm password
msg = "identifies words surrounded by non-words"
check_matches msg, t, matches, 'dictionary', [word], [[i,j]],
matched_word: [word]
rank: [5]
dictionary_name: ['d2']
for name, dict of test_dicts
for word, rank of dict
continue if word is 'motherboard' # skip words that contain others
matches = dm word
msg = "matches against all words in provided dictionaries"
check_matches msg, t, matches, 'dictionary', [word], [[0, word.length - 1]],
matched_word: [word]
rank: [rank]
dictionary_name: [name]
# test the default dictionaries
matches = matching.dictionary_match 'wow'
patterns = ['wow']
ijs = [[0,2]]
msg = "default dictionaries"
check_matches msg, t, matches, 'dictionary', patterns, ijs,
matched_word: patterns
rank: [322]
dictionary_name: ['us_tv_and_film']
matching.set_user_input_dictionary ['foo', 'bar']
matches = matching.dictionary_match 'foobar'
matches = matches.filter (match) ->
match.dictionary_name == 'user_inputs'
msg = "matches with provided user input dictionary"
check_matches msg, t, matches, 'dictionary', ['foo', 'bar'], [[0, 2], [3, 5]],
matched_word: ['foo', 'bar']
rank: [1, 2]
t.end()
test 'reverse dictionary matching', (t) ->
test_dicts =
d1:
123: 1
321: 2
456: 3
654: 4
password = '0123456789'
matches = matching.reverse_dictionary_match password, test_dicts
msg = 'matches against reversed words'
check_matches msg, t, matches, 'dictionary', ['123', '456'], [[1, 3], [4, 6]],
matched_word: ['321', '654']
reversed: [true, true]
dictionary_name: ['d1', 'd1']
rank: [2, 4]
t.end()
test 'l33t matching', (t) ->
test_table =
a: ['4', '@']
c: ['(', '{', '[', '<']
g: ['6', '9']
o: ['0']
for [pw, expected] in [
[ '', {} ]
[ 'abcdefgo123578!#$&*)]}>', {} ]
[ 'a', {} ]
[ '4', {'a': ['4']} ]
[ '4@', {'a': ['4','@']} ]
[ '4({60', {'a': ['4'], 'c': ['(','{'], 'g': ['6'], 'o': ['0']} ]
]
msg = "reduces l33t table to only the substitutions that a password might be employing"
t.deepEquals matching.relevant_l33t_subtable(pw, test_table), expected, msg
for [table, subs] in [
[ {}, [{}] ]
[ {a: ['@']}, [{'@': 'a'}] ]
[ {a: ['@','4']}, [{'@': 'a'}, {'4': 'a'}] ]
[ {a: ['@','4'], c: ['(']}, [{'@': 'a', '(': 'c' }, {'4': 'a', '(': 'c'}] ]
]
msg = "enumerates the different sets of l33t substitutions a password might be using"
t.deepEquals matching.enumerate_l33t_subs(table), subs, msg
lm = (pw) -> matching.l33t_match pw, dicts, test_table
dicts =
words:
aac: 1
password: 3
paassword: 4
asdf0: 5
words2:
cgo: 1
t.deepEquals lm(''), [], "doesn't match ''"
t.deepEquals lm('password'), [], "doesn't match pure dictionary words"
for [password, pattern, word, dictionary_name, rank, ij, sub] in [
[ 'p4ssword', 'p4ssword', 'password', 'words', 3, [0,7], {'4': 'a'} ]
[ 'p@ssw0rd', 'p@ssw0rd', 'password', 'words', 3, [0,7], {'@': 'a', '0': 'o'} ]
[ 'aSdfO{G0asDfO', '{G0', 'cgo', 'words2', 1, [5, 7], {'{': 'c', '0': 'o'} ]
]
msg = "matches against common l33t substitutions"
check_matches msg, t, lm(password), 'dictionary', [pattern], [ij],
l33t: [true]
sub: [sub]
matched_word: [word]
rank: [rank]
dictionary_name: [dictionary_name]
matches = lm '@a(go{G0'
msg = "matches against overlapping l33t patterns"
check_matches msg, t, matches, 'dictionary', ['@a(', '(go', '{G0'], [[0,2], [2,4], [5,7]],
l33t: [true, true, true]
sub: [{'@': 'a', '(': 'c'}, {'(': 'c'}, {'{': 'c', '0': 'o'}]
matched_word: ['aac', 'cgo', 'cgo']
rank: [1, 1, 1]
dictionary_name: ['words', 'words2', 'words2']
msg = "doesn't match when multiple l33t substitutions are needed for the same letter"
t.deepEqual lm('p4@ssword'), [], msg
msg = "doesn't match single-character l33ted words"
matches = matching.l33t_match '4 1 @'
t.deepEqual matches, [], msg
# known issue: subsets of substitutions aren't tried.
# for long inputs, trying every subset of every possible substitution could quickly get large,
# but there might be a performant way to fix.
# (so in this example: {'4': a, '0': 'o'} is detected as a possible sub,
# but the subset {'4': 'a'} isn't tried, missing the match for asdf0.)
# TODO: consider partially fixing by trying all subsets of size 1 and maybe 2
msg = "doesn't match with subsets of possible l33t substitutions"
t.deepEqual lm('4sdf0'), [], msg
t.end()
test 'spatial matching', (t) ->
for password in ['', '/', 'qw', '*/']
msg = "doesn't match 1- and 2-character spatial patterns"
t.deepEqual matching.spatial_match(password), [], msg
# for testing, make a subgraph that contains a single keyboard
_graphs = qwerty: adjacency_graphs.qwerty
pattern = '6tfGHJ'
matches = matching.spatial_match "rz!#{pattern}%z", _graphs
msg = "matches against spatial patterns surrounded by non-spatial patterns"
check_matches msg, t, matches, 'spatial', [pattern], [[3, 3 + pattern.length - 1]],
graph: ['qwerty']
turns: [2]
shifted_count: [3]
for [pattern, keyboard, turns, shifts] in [
[ '12345', 'qwerty', 1, 0 ]
[ '@WSX', 'qwerty', 1, 4 ]
[ '6tfGHJ', 'qwerty', 2, 3 ]
[ 'hGFd', 'qwerty', 1, 2 ]
[ '/;p09876yhn', 'qwerty', 3, 0 ]
[ 'Xdr%', 'qwerty', 1, 2 ]
[ '159-', 'keypad', 1, 0 ]
[ '*84', 'keypad', 1, 0 ]
[ '/8520', 'keypad', 1, 0 ]
[ '369', 'keypad', 1, 0 ]
[ '/963.', 'mac_keypad', 1, 0 ]
[ '*-632.0214', 'mac_keypad', 9, 0 ]
[ 'aoEP%yIxkjq:', 'dvorak', 4, 5 ]
[ ';qoaOQ:Aoq;a', 'dvorak', 11, 4 ]
]
_graphs = {}
_graphs[keyboard] = adjacency_graphs[keyboard]
matches = matching.spatial_match pattern, _graphs
msg = "matches '#{pattern}' as a #{keyboard} pattern"
check_matches msg, t, matches, 'spatial', [pattern], [[0, pattern.length - 1]],
graph: [keyboard]
turns: [turns]
shifted_count: [shifts]
t.end()
test 'sequence matching', (t) ->
for password in ['', 'a', '1']
msg = "doesn't match length-#{password.length} sequences"
t.deepEqual matching.sequence_match(password), [], msg
matches = matching.sequence_match 'abcbabc'
msg = "matches overlapping patterns"
check_matches msg, t, matches, 'sequence', ['abc', 'cba', 'abc'], [[0, 2], [2, 4], [4, 6]],
ascending: [true, false, true]
prefixes = ['!', '22']
suffixes = ['!', '22']
pattern = 'jihg'
for [password, i, j] in genpws pattern, prefixes, suffixes
matches = matching.sequence_match password
msg = "matches embedded sequence patterns #{password}"
check_matches msg, t, matches, 'sequence', [pattern], [[i, j]],
sequence_name: ['lower']
ascending: [false]
for [pattern, name, is_ascending] in [
['ABC', 'upper', true]
['CBA', 'upper', false]
['PQR', 'upper', true]
['RQP', 'upper', false]
['XYZ', 'upper', true]
['ZYX', 'upper', false]
['abcd', 'lower', true]
['dcba', 'lower', false]
['jihg', 'lower', false]
['wxyz', 'lower', true]
['zxvt', 'lower', false]
['0369', 'digits', true]
['97531', 'digits', false]
]
matches = matching.sequence_match pattern
msg = "matches '#{pattern}' as a '#{name}' sequence"
check_matches msg, t, matches, 'sequence', [pattern], [[0, pattern.length - 1]],
sequence_name: [name]
ascending: [is_ascending]
t.end()
test 'repeat matching', (t) ->
for password in ['', '#']
msg = "doesn't match length-#{password.length} repeat patterns"
t.deepEqual matching.repeat_match(password), [], msg
# test single-character repeats
prefixes = ['@', 'y4@']
suffixes = ['u', 'u%7']
pattern = '&&&&&'
for [password, i, j] in genpws pattern, prefixes, suffixes
matches = matching.repeat_match password
msg = "matches embedded repeat patterns"
check_matches msg, t, matches, 'repeat', [pattern], [[i, j]],
base_token: ['&']
for length in [3, 12]
for chr in ['a', 'Z', '4', '&']
pattern = Array(length + 1).join(chr)
matches = matching.repeat_match pattern
msg = "matches repeats with base character '#{chr}'"
check_matches msg, t, matches, 'repeat', [pattern], [[0, pattern.length - 1]],
base_token: [chr]
matches = matching.repeat_match 'BBB1111aaaaa@@@@@@'
patterns = ['BBB','1111','aaaaa','@@@@@@']
msg = 'matches multiple adjacent repeats'
check_matches msg, t, matches, 'repeat', patterns, [[0, 2],[3, 6],[7, 11],[12, 17]],
base_token: ['B', '1', 'a', '@']
matches = matching.repeat_match '2818BBBbzsdf1111@*&@!aaaaaEUDA@@@@@@1729'
msg = 'matches multiple repeats with non-repeats in-between'
check_matches msg, t, matches, 'repeat', patterns, [[4, 6],[12, 15],[21, 25],[30, 35]],
base_token: ['B', '1', 'a', '@']
# test multi-character repeats
pattern = 'abab'
matches = matching.repeat_match pattern
msg = 'matches multi-character repeat pattern'
check_matches msg, t, matches, 'repeat', [pattern], [[0, pattern.length - 1]],
base_token: ['ab']
pattern = 'aabaab'
matches = matching.repeat_match pattern
msg = 'matches aabaab as a repeat instead of the aa prefix'
check_matches msg, t, matches, 'repeat', [pattern], [[0, pattern.length - 1]],
base_token: ['aab']
pattern = 'abababab'
matches = matching.repeat_match pattern
msg = 'identifies ab as repeat string, even though abab is also repeated'
check_matches msg, t, matches, 'repeat', [pattern], [[0, pattern.length - 1]],
base_token: ['ab']
t.end()
test 'regex matching', (t) ->
for [pattern, name] in [
['1922', 'recent_year']
['2017', 'recent_year']
]
matches = matching.regex_match pattern
msg = "matches #{pattern} as a #{name} pattern"
check_matches msg, t, matches, 'regex', [pattern], [[0, pattern.length - 1]],
regex_name: [name]
t.end()
test 'date matching', (t) ->
for sep in ['', ' ', '-', '/', '\\', '_', '.']
password = "13#{sep}2#{sep}1921"
matches = matching.date_match password
msg = "matches dates that use '#{sep}' as a separator"
check_matches msg, t, matches, 'date', [password], [[0, password.length - 1]],
separator: [sep]
year: [1921]
month: [2]
day: [13]
for order in ['mdy', 'dmy', 'ymd', 'ydm']
[d,m,y] = [8,8,88]
password = order
.replace 'y', y
.replace 'm', m
.replace 'd', d
matches = matching.date_match password
msg = "matches dates with '#{order}' format"
check_matches msg, t, matches, 'date', [password], [[0, password.length - 1]],
separator: ['']
year: [1988]
month: [8]
day: [8]
password = '111504'
matches = matching.date_match password
msg = "matches the date with year closest to REFERENCE_YEAR when ambiguous"
check_matches msg, t, matches, 'date', [password], [[0, password.length - 1]],
separator: ['']
year: [2004] # picks '04' -> 2004 as year, not '1504'
month: [11]
day: [15]
for [day, month, year] in [
[1, 1, 1999]
[11, 8, 2000]
[9, 12, 2005]
[22, 11, 1551]
]
password = "#{year}#{month}#{day}"
matches = matching.date_match password
msg = "matches #{password}"
check_matches msg, t, matches, 'date', [password], [[0, password.length - 1]],
separator: ['']
year: [year]
password = "#{year}.#{month}.#{day}"
matches = matching.date_match password
msg = "matches #{password}"
check_matches msg, t, matches, 'date', [password], [[0, password.length - 1]],
separator: ['.']
year: [year]
password = "02/02/02"
matches = matching.date_match password
msg = "matches zero-padded dates"
check_matches msg, t, matches, 'date', [password], [[0, password.length - 1]],
separator: ['/']
year: [2002]
month: [2]
day: [2]
prefixes = ['a', 'ab']
suffixes = ['!']
pattern = '1/1/91'
for [password, i, j] in genpws pattern, prefixes, suffixes
matches = matching.date_match password
msg = "matches embedded dates"
check_matches msg, t, matches, 'date', [pattern], [[i, j]],
year: [1991]
month: [1]
day: [1]
matches = matching.date_match '12/20/1991.12.20'
msg = "matches overlapping dates"
check_matches msg, t, matches, 'date', ['12/20/1991', '1991.12.20'], [[0, 9], [6,15]],
separator: ['/', '.']
year: [1991, 1991]
month: [12, 12]
day: [20, 20]
matches = matching.date_match '912/20/919'
msg = "matches dates padded by non-ambiguous digits"
check_matches msg, t, matches, 'date', ['12/20/91'], [[1, 8]],
separator: ['/']
year: [1991]
month: [12]
day: [20]
t.end()
test 'omnimatch', (t) ->
t.deepEquals matching.omnimatch(''), [], "doesn't match ''"
password = 'r0sebudmaelstrom11/20/91aaaa'
matches = matching.omnimatch password
for [pattern_name, [i, j]] in [
[ 'dictionary', [0, 6] ]
[ 'dictionary', [7, 15] ]
[ 'date', [16, 23] ]
[ 'repeat', [24, 27] ]
]
included = false
for match in matches
included = true if match.i == i and match.j == j and match.pattern == pattern_name
msg = "for #{password}, matches a #{pattern_name} pattern at [#{i}, #{j}]"
t.ok included, msg
t.end()