最代码官方的gravatar头像
最代码官方2013-11-20 12:35:42

一个站长常用的类似于获取关键词排名的java工具类

可以通过指定关键词和站点名来自动抓取百度的搜索结果从而得知自己的网站在某个搜索词的排名,类似于http://www.aizhan.com/siteall/zuidaima.com/

一个站长常用的类似于获取关键词排名的java工具类

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;

public class AnyBaiduKeywordRank {

	public static String request(String url) {
		StringBuffer res = new StringBuffer();
		HttpURLConnection conn = null;
		try {
			URL serverUrl = new URL(url);
			conn = (HttpURLConnection) serverUrl.openConnection();
			conn.setRequestMethod("GET");// "POST" ,"GET"
			conn.setConnectTimeout(10000);
			conn.setReadTimeout(10000);
			conn.connect();
			InputStream ins = conn.getInputStream();
			String charset = "UTF-8";
			InputStreamReader inr = new InputStreamReader(ins, charset);
			BufferedReader bfr = new BufferedReader(inr);
			String line = "";
			do {
				res.append(line);
				line = bfr.readLine();
			} while (line != null);
			inr.close();
			bfr.close();
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (conn != null) {
				conn.disconnect();
			}
		}
		return res.toString();
	}

	public static void main(String[] args) throws Exception {
		String keyword = "分享";
		String site = "javaniu";
		keyword = URLEncoder.encode(keyword, "utf-8");
		int p = 1;
		int s = 10;
		String url = "http://www.baidu.com/s?wd=%s&pn=%s&ie=utf-8&usm=1&rsv_page=1";
		while (true) {
			int pn = (p - 1) * s;
			String _url = String.format(url, keyword, pn + "");
			System.out.println("Request url " + _url);
			String html = request(_url);
			if (html.indexOf(site) != -1) {
				System.out.println("Find keyword");
				break;
			}
			p++;
		}

	}
}

原理很简单,大家可以发挥想象力去抓取任意想象的数据。这不正是code的魅力么。


打赏

顶部客服微信二维码底部
>扫描二维码关注最代码为好友扫描二维码关注最代码为好友