Bootstrap

selenium与PhantomJSDriver整合 加速 网站爬取

在使用 PhantomJSDriver 的时候 ,因为每次start client 是每次爬取 数据非常 在 30 s 左右 ,对于 源码的研究 自己改造 了 ,其中关键代码 已贴出


源码: http://git.oschina.net/wds/contact


package org.openqa.selenium.phantomjs;

import java.io.IOException;
import java.lang.reflect.Field;
import java.util.Map;

import org.openqa.selenium.Capabilities;
import org.openqa.selenium.Platform;
import org.openqa.selenium.remote.CapabilityType;
import org.openqa.selenium.remote.DesiredCapabilities;
import org.openqa.selenium.remote.DriverCommand;
import org.openqa.selenium.remote.MyHttpCommandExecutor;
import org.openqa.selenium.remote.RemoteWebDriver;
import org.openqa.selenium.remote.Response;

import com.google.common.collect.ImmutableMap;

public class MyPhantomJSDriver extends PhantomJSDriver {
	private String mySessionId;

	private Capabilities desiredCapabilities;
	private Capabilities requiredCapabilities;

	private int port;

	public MyPhantomJSDriver(String mySessionId, int port) {
		super(port);
		this.mySessionId = mySessionId;
		this.port = port;
		try {
			startSession();
		} catch (RuntimeException e) {
			try {
				quit();
			} catch (Exception localException1) {
			}
			throw e;
		}
	}

	
	protected void startSession() {
		if (this.mySessionId != null && !this.mySessionId.isEmpty()) {

//			// URL driverserver = new URL(localServer);
//			 MyHttpCommandExecutor delegate = new PhantomJSCommandExecutor(
//					PhantomJSDriverService.createDefaultServiceWithPort(desiredCapabilities, this.port));
//			// HttpCommandExecutor(driverserver);
//
//			try {
//				// TODO: use a more intelligent way of testing if the server is
//				// ready.
//				delegate.getAddressOfRemoteServer().openConnection().connect();
//				super.setCommandExecutor(delegate);
//
//			} catch (IOException e) {
//				e.printStackTrace();
//			}

			super.setSessionId(this.mySessionId);
			// Command command = new Command(super.getSessionId(),
			// DriverCommand.GET_CAPABILITIES);

			ImmutableMap.Builder<String, Capabilities> paramBuilder = new ImmutableMap.Builder();
			paramBuilder.put("desiredCapabilities", desiredCapabilities);
			if (requiredCapabilities != null) {
				paramBuilder.put("requiredCapabilities", requiredCapabilities);
			}
			Map<String, ?> parameters = paramBuilder.build();

			Response response = execute(DriverCommand.GET_CAPABILITIES, parameters);

			Map<String, Object> rawCapabilities = (Map<String, Object>) response.getValue();
			DesiredCapabilities returnedCapabilities = (DesiredCapabilities) super.getCapabilities();
			if (returnedCapabilities == null) {
				returnedCapabilities = new DesiredCapabilities();
			}
			for (Map.Entry<String, Object> entry : rawCapabilities.entrySet()) {
				// Handle the platform later
				if (CapabilityType.PLATFORM.equals(entry.getKey())) {
					continue;
				}
				returnedCapabilities.setCapability(entry.getKey(), entry.getValue());
			}
			String platformString = (String) rawCapabilities.get(CapabilityType.PLATFORM);
			Platform platform;
			try {
				if (platformString == null || "".equals(platformString)) {
					platform = Platform.ANY;
				} else {
					platform = Platform.valueOf(platformString);
				}
			} catch (IllegalArgumentException e) {
				// The server probably responded with a name matching the
				// os.name
				// system property. Try to recover and parse this.
				platform = Platform.extractFromSysProperty(platformString);
			}
			returnedCapabilities.setPlatform(platform);

			// this.myCapabilities = returnedCapabilities;
			try {
				Field f = RemoteWebDriver.class.getDeclaredField("capabilities");
				f.setAccessible(true);
				f.set(this, returnedCapabilities);
			} catch (Exception e) {
				e.printStackTrace();
			}
		} else {
			super.startSession(desiredCapabilities, requiredCapabilities);
		}
	}

	@Override
	protected void startSession(Capabilities desiredCapabilities, Capabilities requiredCapabilities) {
		this.desiredCapabilities = desiredCapabilities;
		this.requiredCapabilities = requiredCapabilities;
	}
}
/*
This file is part of the GhostDriver by Ivan De Marino <http://ivandemarino.me>.

Copyright (c) 2012-2014, Ivan De Marino <http://ivandemarino.me>
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package org.openqa.selenium.phantomjs;

import java.io.IOException;
import java.net.ConnectException;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.net.SocketAddress;

import org.openqa.selenium.WebDriverException;
import org.openqa.selenium.remote.Command;
import org.openqa.selenium.remote.DriverCommand;
import org.openqa.selenium.remote.MyHttpCommandExecutor;
import org.openqa.selenium.remote.Response;

import com.google.common.base.Throwables;

/**
 * A specialized {@link org.openqa.selenium.remote.MyHttpCommandExecutor} that
 * will use a {@link PhantomJSDriverService} that lives and dies with a single
 * WebDriver session.
 * <p/>
 * The service will be restarted upon each new session request and shutdown
 * after each quit command.
 * <p/>
 * NOTE: Yes, the design of this class is heavily inspired by
 * {@link org.openqa.selenium.chrome.ChromeCommandExecutor}.
 *
 * @author Ivan De Marino <http://ivandemarino.me>
 */
class PhantomJSCommandExecutor extends MyHttpCommandExecutor {

	private final PhantomJSDriverService service;

	/**
	 * Creates a new PhantomJSCommandExecutor. The PhantomJSCommandExecutor will
	 * communicate with the PhantomJS/GhostDriver through the given
	 * {@code service}.
	 *
	 * @param service
	 *            The PhantomJSDriverService to send commands to.
	 */
	PhantomJSCommandExecutor(PhantomJSDriverService service) {
		super(PhantomJSDriver.getCustomCommands(), service.getUrl());
		this.service = service;
	}

	/**
	 * Sends the {@code command} to the PhantomJS/GhostDriver server for
	 * execution. The server will be started if requesting a new session.
	 * Likewise, if terminating a session, the server will be shutdown once a
	 * response is received.
	 *
	 * @param command
	 *            The command to execute.
	 * @return The command response.
	 * @throws java.io.IOException
	 *             If an I/O error occurs while sending the command.
	 */
	@Override
	public Response execute(Command command) {

		Socket socket = new Socket();
		try {

			SocketAddress remoteAddr = new InetSocketAddress("127.0.0.1", service.getPort());
			socket.connect(remoteAddr, 3000);
		} catch (IOException e) {
			if (DriverCommand.NEW_SESSION.equals(command.getName())) {
				try {
					service.start();
				} catch (IOException e1) {
					e1.printStackTrace();
				}
			}
			e.printStackTrace();
		} finally {
			try {
				socket.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}

		try {
			return super.execute(command);
		} catch (Throwable t) {
			Throwable rootCause = Throwables.getRootCause(t);
			if (rootCause instanceof ConnectException && "Connection refused".equals(rootCause.getMessage())
					&& !service.isRunning()) {
				throw new WebDriverException("The PhantomJS/GhostDriver server has unexpectedly died!", t);
			}
			Throwables.propagateIfPossible(t);
			throw new WebDriverException(t);
		} finally {
			if (DriverCommand.QUIT.equals(command.getName())) {
				service.stop();
			}
		}
	}
}



;