#!/usr/bin/python
import sys
import codecs
def usage():
return '''
This script converts surname/name data from the US 1990 census into a format zxcvbn
recognizes. To use, first obtain the census files:
http://www2.census.gov/topics/genealogy/1990surnames
download dist.all.last, dist.female.first and dist.male.first
Then run:
%s dist.all.lst ../data/surnames.txt
%s dist.female.first ../data/female_names.txt
%s dist.male.names ../data/male_names.txt
for each file.
''' % [sys.argv[0]] * 3
def main(input_filename, output_filename):
with codecs.open(output_filename, 'w', 'utf8') as f:
for line in codecs.open(input_filename, 'r', 'utf8'):
if line.strip():
name = line.split()[0].lower()
f.write(name+'\n')
if __name__ == '__main__':
if len(sys.argv) != 3:
print usage()
else:
main(*sys.argv[1:])
sys.exit(0)