test = require 'tape'
ROOT = "../src"
if process.env.ROOT
ROOT = process.env.ROOT
scoring = require (ROOT + '/scoring')
matching = require (ROOT + '/matching')
log2 = scoring.log2
log10 = scoring.log10
nCk = scoring.nCk
EPSILON = 1e-10 # truncate to 10th decimal place
truncate_float = (float) -> Math.round(float / EPSILON) * EPSILON
approx_equal = (t, actual, expected, msg) ->
t.equal truncate_float(actual), truncate_float(expected), msg
test 'nCk', (t) ->
for [n, k, result] in [
[ 0, 0, 1 ]
[ 1, 0, 1 ]
[ 5, 0, 1 ]
[ 0, 1, 0 ]
[ 0, 5, 0 ]
[ 2, 1, 2 ]
[ 4, 2, 6 ]
[ 33, 7, 4272048 ]
]
t.equal nCk(n, k), result, "nCk(#{n}, #{k}) == #{result}"
n = 49
k = 12
t.equal nCk(n, k), nCk(n, n-k), "mirror identity"
t.equal nCk(n, k), nCk(n-1, k-1) + nCk(n-1, k), "pascal's triangle identity"
t.end()
test 'log', (t) ->
for [n, result] in [
[ 1, 0 ]
[ 2, 1 ]
[ 4, 2 ]
[ 32, 5 ]
]
t.equal log2(n), result, "log2(#{n}) == #{result}"
for [n, result] in [
[ 1, 0]
[ 10, 1]
[ 100, 2]
]
t.equal log10(n), result, "log10(#{n}) == #{result}"
n = 17
p = 4
approx_equal t, log10(n * p), log10(n) + log10(p), "product rule"
approx_equal t, log10(n / p), log10(n) - log10(p), "quotient rule"
approx_equal t, log10(Math.E), 1 / Math.log(10), "base switch rule"
approx_equal t, log10(Math.pow(n, p)), p * log10(n), "power rule"
approx_equal t, log10(n), Math.log(n) / Math.log(10), "base change rule"
t.end()
test 'search', (t) ->
m = (i, j, guesses) ->
i: i
j: j
guesses: guesses
password = '0123456789'
# for tests, set additive penalty to zero.
exclude_additive = true
msg = (s) -> "returns one bruteforce match given an empty match sequence: #{s}"
result = scoring.most_guessable_match_sequence password, []
t.equal result.sequence.length, 1, msg("result.length == 1")
m0 = result.sequence[0]
t.equal m0.pattern, 'bruteforce', msg("match.pattern == 'bruteforce'")
t.equal m0.token, password, msg("match.token == #{password}")
t.deepEqual [m0.i, m0.j], [0, 9], msg("[i, j] == [#{m0.i}, #{m0.j}]")
msg = (s) -> "returns match + bruteforce when match covers a prefix of password: #{s}"
matches = [m0] = [m(0, 5, 1)]
result = scoring.most_guessable_match_sequence password, matches, exclude_additive
t.equal result.sequence.length, 2, msg("result.match.sequence.length == 2")
t.equal result.sequence[0], m0, msg("first match is the provided match object")
m1 = result.sequence[1]
t.equal m1.pattern, 'bruteforce', msg("second match is bruteforce")
t.deepEqual [m1.i, m1.j], [6, 9], msg("second match covers full suffix after first match")
msg = (s) -> "returns bruteforce + match when match covers a suffix: #{s}"
matches = [m1] = [m(3, 9, 1)]
result = scoring.most_guessable_match_sequence password, matches, exclude_additive
t.equal result.sequence.length, 2, msg("result.match.sequence.length == 2")
m0 = result.sequence[0]
t.equal m0.pattern, 'bruteforce', msg("first match is bruteforce")
t.deepEqual [m0.i, m0.j], [0, 2], msg("first match covers full prefix before second match")
t.equal result.sequence[1], m1, msg("second match is the provided match object")
msg = (s) -> "returns bruteforce + match + bruteforce when match covers an infix: #{s}"
matches = [m1] = [m(1, 8, 1)]
result = scoring.most_guessable_match_sequence password, matches, exclude_additive
t.equal result.sequence.length, 3, msg("result.length == 3")
t.equal result.sequence[1], m1, msg("middle match is the provided match object")
m0 = result.sequence[0]
m2 = result.sequence[2]
t.equal m0.pattern, 'bruteforce', msg("first match is bruteforce")
t.equal m2.pattern, 'bruteforce', msg("third match is bruteforce")
t.deepEqual [m0.i, m0.j], [0, 0], msg("first match covers full prefix before second match")
t.deepEqual [m2.i, m2.j], [9, 9], msg("third match covers full suffix after second match")
msg = (s) -> "chooses lower-guesses match given two matches of the same span: #{s}"
matches = [m0, m1] = [m(0, 9, 1), m(0, 9, 2)]
result = scoring.most_guessable_match_sequence password, matches, exclude_additive
t.equal result.sequence.length, 1, msg("result.length == 1")
t.equal result.sequence[0], m0, msg("result.sequence[0] == m0")
# make sure ordering doesn't matter
m0.guesses = 3
result = scoring.most_guessable_match_sequence password, matches, exclude_additive
t.equal result.sequence.length, 1, msg("result.length == 1")
t.equal result.sequence[0], m1, msg("result.sequence[0] == m1")
msg = (s) -> "when m0 covers m1 and m2, choose [m0] when m0 < m1 * m2 * fact(2): #{s}"
matches = [m0, m1, m2] = [m(0, 9, 3), m(0, 3, 2), m(4, 9, 1)]
result = scoring.most_guessable_match_sequence password, matches, exclude_additive
t.equal result.guesses, 3, msg("total guesses == 3")
t.deepEqual result.sequence, [m0], msg("sequence is [m0]")
msg = (s) -> "when m0 covers m1 and m2, choose [m1, m2] when m0 > m1 * m2 * fact(2): #{s}"
m0.guesses = 5
result = scoring.most_guessable_match_sequence password, matches, exclude_additive
t.equal result.guesses, 4, msg("total guesses == 4")
t.deepEqual result.sequence, [m1, m2], msg("sequence is [m1, m2]")
t.end()
test 'calc_guesses', (t) ->
match =
guesses: 1
msg = "estimate_guesses returns cached guesses when available"
t.equal scoring.estimate_guesses(match, ''), 1, msg
match =
pattern: 'date'
token: '1977'
year: 1977
month: 7
day: 14
msg = "estimate_guesses delegates based on pattern"
t.equal scoring.estimate_guesses(match, '1977'), scoring.date_guesses(match), msg
t.end()
test 'repeat guesses', (t) ->
for [token, base_token, repeat_count] in [
[ 'aa', 'a', 2]
[ '999', '9', 3]
[ '$$$$', '$', 4]
[ 'abab', 'ab', 2]
[ 'batterystaplebatterystaplebatterystaple', 'batterystaple', 3]
]
base_guesses = scoring.most_guessable_match_sequence(
base_token
matching.omnimatch base_token
).guesses
match =
token: token
base_token: base_token
base_guesses: base_guesses
repeat_count: repeat_count
expected_guesses = base_guesses * repeat_count
msg = "the repeat pattern '#{token}' has guesses of #{expected_guesses}"
t.equal scoring.repeat_guesses(match), expected_guesses, msg
t.end()
test 'sequence guesses', (t) ->
for [token, ascending, guesses] in [
[ 'ab', true, 4 * 2 ] # obvious start * len-2
[ 'XYZ', true, 26 * 3 ] # base26 * len-3
[ '4567', true, 10 * 4 ] # base10 * len-4
[ '7654', false, 10 * 4 * 2 ] # base10 * len 4 * descending
[ 'ZYX', false, 4 * 3 * 2 ] # obvious start * len-3 * descending
]
match =
token: token
ascending: ascending
msg = "the sequence pattern '#{token}' has guesses of #{guesses}"
t.equal scoring.sequence_guesses(match), guesses, msg
t.end()
test 'regex guesses', (t) ->
match =
token: 'aizocdk'
regex_name: 'alpha_lower'
regex_match: ['aizocdk']
msg = "guesses of 26^7 for 7-char lowercase regex"
t.equal scoring.regex_guesses(match), Math.pow(26, 7), msg
match =
token: 'ag7C8'
regex_name: 'alphanumeric'
regex_match: ['ag7C8']
msg = "guesses of 62^5 for 5-char alphanumeric regex"
t.equal scoring.regex_guesses(match), Math.pow(2 * 26 + 10, 5), msg
match =
token: '1972'
regex_name: 'recent_year'
regex_match: ['1972']
msg = "guesses of |year - REFERENCE_YEAR| for distant year matches"
t.equal scoring.regex_guesses(match), Math.abs(scoring.REFERENCE_YEAR - 1972), msg
match =
token: '2005'
regex_name: 'recent_year'
regex_match: ['2005']
msg = "guesses of MIN_YEAR_SPACE for a year close to REFERENCE_YEAR"
t.equal scoring.regex_guesses(match), scoring.MIN_YEAR_SPACE, msg
t.end()
test 'date guesses', (t) ->
match =
token: '1123'
separator: ''
has_full_year: false
year: 1923
month: 1
day: 1
msg = "guesses for #{match.token} is 365 * distance_from_ref_year"
t.equal scoring.date_guesses(match), 365 * Math.abs(scoring.REFERENCE_YEAR - match.year), msg
match =
token: '1/1/2010'
separator: '/'
has_full_year: true
year: 2010
month: 1
day: 1
msg = "recent years assume MIN_YEAR_SPACE."
msg += " extra guesses are added for separators."
t.equal scoring.date_guesses(match), 365 * scoring.MIN_YEAR_SPACE * 4, msg
t.end()
test 'spatial guesses', (t) ->
match =
token: 'zxcvbn'
graph: 'qwerty'
turns: 1
shifted_count: 0
base_guesses = (
scoring.KEYBOARD_STARTING_POSITIONS *
scoring.KEYBOARD_AVERAGE_DEGREE *
# - 1 term because: not counting spatial patterns of length 1
# eg for length==6, multiplier is 5 for needing to try len2,len3,..,len6
(match.token.length - 1)
)
msg = "with no turns or shifts, guesses is starts * degree * (len-1)"
t.equal scoring.spatial_guesses(match), base_guesses, msg
match.guesses = null
match.token = 'ZxCvbn'
match.shifted_count = 2
shifted_guesses = base_guesses * (nCk(6, 2) + nCk(6, 1))
msg = "guesses is added for shifted keys, similar to capitals in dictionary matching"
t.equal scoring.spatial_guesses(match), shifted_guesses, msg
match.guesses = null
match.token = 'ZXCVBN'
match.shifted_count = 6
shifted_guesses = base_guesses * 2
msg = "when everything is shifted, guesses are doubled"
t.equal scoring.spatial_guesses(match), shifted_guesses, msg
match =
token: 'zxcft6yh'
graph: 'qwerty'
turns: 3
shifted_count: 0
guesses = 0
L = match.token.length
s = scoring.KEYBOARD_STARTING_POSITIONS
d = scoring.KEYBOARD_AVERAGE_DEGREE
for i in [2..L]
for j in [1..Math.min(match.turns, i-1)]
guesses += nCk(i-1, j-1) * s * Math.pow(d, j)
msg = "spatial guesses accounts for turn positions, directions and starting keys"
t.equal scoring.spatial_guesses(match), guesses, msg
t.end()
test 'dictionary_guesses', (t) ->
match =
token: 'aaaaa'
rank: 32
msg = "base guesses == the rank"
t.equal scoring.dictionary_guesses(match), 32, msg
match =
token: 'AAAaaa'
rank: 32
msg = "extra guesses are added for capitalization"
t.equal scoring.dictionary_guesses(match), 32 * scoring.uppercase_variations(match), msg
match =
token: 'aaa'
rank: 32
reversed: true
msg = "guesses are doubled when word is reversed"
t.equal scoring.dictionary_guesses(match), 32 * 2, msg
match =
token: 'aaa@@@'
rank: 32
l33t: true
sub: {'@': 'a'}
msg = "extra guesses are added for common l33t substitutions"
t.equal scoring.dictionary_guesses(match), 32 * scoring.l33t_variations(match), msg
match =
token: 'AaA@@@'
rank: 32
l33t: true
sub: {'@': 'a'}
msg = "extra guesses are added for both capitalization and common l33t substitutions"
expected = 32 * scoring.l33t_variations(match) * scoring.uppercase_variations(match)
t.equal scoring.dictionary_guesses(match), expected, msg
t.end()
test 'uppercase variants', (t) ->
for [word, variants] in [
[ '', 1 ]
[ 'a', 1 ]
[ 'A', 2 ]
[ 'abcdef', 1 ]
[ 'Abcdef', 2 ]
[ 'abcdeF', 2 ]
[ 'ABCDEF', 2 ]
[ 'aBcdef', nCk(6,1) ]
[ 'aBcDef', nCk(6,1) + nCk(6,2) ]
[ 'ABCDEf', nCk(6,1) ]
[ 'aBCDEf', nCk(6,1) + nCk(6,2) ]
[ 'ABCdef', nCk(6,1) + nCk(6,2) + nCk(6,3) ]
]
msg = "guess multiplier of #{word} is #{variants}"
t.equal scoring.uppercase_variations(token: word), variants, msg
t.end()
test 'l33t variants', (t) ->
match = l33t: false
t.equal scoring.l33t_variations(match), 1, "1 variant for non-l33t matches"
for [word, variants, sub] in [
[ '', 1, {} ]
[ 'a', 1, {} ]
[ '4', 2, {'4': 'a'} ]
[ '4pple', 2, {'4': 'a'} ]
[ 'abcet', 1, {} ]
[ '4bcet', 2, {'4': 'a'} ]
[ 'a8cet', 2, {'8': 'b'} ]
[ 'abce+', 2, {'+': 't'} ]
[ '48cet', 4, {'4': 'a', '8': 'b'} ]
[ 'a4a4aa', nCk(6, 2) + nCk(6, 1), {'4': 'a'} ]
[ '4a4a44', nCk(6, 2) + nCk(6, 1), {'4': 'a'} ]
[ 'a44att+', (nCk(4, 2) + nCk(4, 1)) * nCk(3, 1), {'4': 'a', '+': 't'} ]
]
match =
token: word
sub: sub
l33t: not matching.empty(sub)
msg = "extra l33t guesses of #{word} is #{variants}"
t.equal scoring.l33t_variations(match), variants, msg
match =
token: 'Aa44aA'
l33t: true
sub: {'4': 'a'}
variants = nCk(6, 2) + nCk(6, 1)
msg = "capitalization doesn't affect extra l33t guesses calc"
t.equal scoring.l33t_variations(match), variants, msg
t.end()