# Copyright 2011 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Utility to use a browser to visit multiple URLs.
Prerequisites:
1. The command_line package from tools/site_compare
2. Either the IE BHO or Firefox extension (or both)
Installation:
1. Build the IE BHO, or call regsvr32 on a prebuilt binary
2. Add a file called "[email protected]" to
the default Firefox profile directory under extensions, containing
the path to the Firefox extension root
Invoke with the command line arguments as documented within
the command line.
"""
import command_line
import scrapers
import socket
import time
from drivers import windowing
# Constants
MAX_URL = 1024
PORT = 42492
def SetupIterationCommandLine(cmd):
"""Adds the necessary flags for iteration to a command.
Args:
cmd: an object created by cmdline.AddCommand
"""
cmd.AddArgument(
["-b", "--browser"], "Browser to use (ie, firefox, chrome)",
type="string", required=True)
cmd.AddArgument(
["-b1v", "--browserver"], "Version of browser", metaname="VERSION")
cmd.AddArgument(
["-p", "--browserpath"], "Path to browser.",
type="string", required=False)
cmd.AddArgument(
["-u", "--url"], "URL to visit")
cmd.AddArgument(
["-l", "--list"], "File containing list of URLs to visit", type="readfile")
cmd.AddMutualExclusion(["--url", "--list"])
cmd.AddArgument(
["-s", "--startline"], "First line of URL list", type="int")
cmd.AddArgument(
["-e", "--endline"], "Last line of URL list (exclusive)", type="int")
cmd.AddArgument(
["-c", "--count"], "Number of lines of URL file to use", type="int")
cmd.AddDependency("--startline", "--list")
cmd.AddRequiredGroup(["--url", "--list"])
cmd.AddDependency("--endline", "--list")
cmd.AddDependency("--count", "--list")
cmd.AddMutualExclusion(["--count", "--endline"])
cmd.AddDependency("--count", "--startline")
cmd.AddArgument(
["-t", "--timeout"], "Amount of time (seconds) to wait for browser to "
"finish loading",
type="int", default=300)
cmd.AddArgument(
["-sz", "--size"], "Browser window size", default=(800, 600), type="coords")
def Iterate(command, iteration_func):
"""Iterates over a list of URLs, calling a function on each.
Args:
command: the command line containing the iteration flags
iteration_func: called for each URL with (proc, wnd, url, result)
"""
# Retrieve the browser scraper to use to invoke the browser
scraper = scrapers.GetScraper((command["--browser"], command["--browserver"]))
def AttachToBrowser(path, timeout):
"""Invoke the browser process and connect to the socket."""
(proc, frame, wnd) = scraper.GetBrowser(path)
if not wnd: raise ValueError("Could not invoke browser.")
# Try to connect the socket. If it fails, wait and try
# again. Do this for ten seconds
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)
for attempt in xrange(10):
try:
s.connect(("localhost", PORT))
except socket.error:
time.sleep(1)
continue
break
try:
s.getpeername()
except socket.error:
raise ValueError("Could not connect to browser")
if command["--size"]:
# Resize and reposition the frame
windowing.MoveAndSizeWindow(frame, (0, 0), command["--size"], wnd)
s.settimeout(timeout)
Iterate.proc = proc
Iterate.wnd = wnd
Iterate.s = s
def DetachFromBrowser():
"""Close the socket and kill the process if necessary."""
if Iterate.s:
Iterate.s.close()
Iterate.s = None
if Iterate.proc:
if not windowing.WaitForProcessExit(Iterate.proc, 0):
try:
windowing.EndProcess(Iterate.proc)
windowing.WaitForProcessExit(Iterate.proc, 0)
except pywintypes.error:
# Exception here most likely means the process died on its own
pass
Iterate.proc = None
if command["--browserpath"]:
browser = command["--browserpath"]
else:
browser = None
# Read the URLs from the file
if command["--url"]:
url_list = [command["--url"]]
else:
startline = command["--startline"]
if command["--count"]:
endline = startline+command["--count"]
else:
endline = command["--endline"]
url_list = []
file = open(command["--list"], "r")
for line in xrange(startline-1):
file.readline()
for line in xrange(endline-startline):
url_list.append(file.readline().strip())
timeout = command["--timeout"]
# Loop through the URLs and send them through the socket
Iterate.s = None
Iterate.proc = None
Iterate.wnd = None
for url in url_list:
# Invoke the browser if necessary
if not Iterate.proc:
AttachToBrowser(browser, timeout)
# Send the URL and wait for a response
Iterate.s.send(url + "\n")
response = ""
while (response.find("\n") < 0):
try:
recv = Iterate.s.recv(MAX_URL)
response = response + recv
# Workaround for an oddity: when Firefox closes
# gracefully, somehow Python doesn't detect it.
# (Telnet does)
if not recv:
raise socket.error
except socket.timeout:
response = url + ",hang\n"
DetachFromBrowser()
except socket.error:
# If there was a socket error, it's probably a crash
response = url + ",crash\n"
DetachFromBrowser()
# If we received a timeout response, restart the browser
if response[-9:] == ",timeout\n":
DetachFromBrowser()
# Invoke the iteration function
iteration_func(url, Iterate.proc, Iterate.wnd, response)
# We're done
DetachFromBrowser()