import java.util.ArrayList;
import java.util.List;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableRow;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
public class ParsePage {
public static String filter(String character)
{
character = character.replaceAll("[^(0-9)]", "");
return character;
}
public static String parseFromString(String content) throws Exception {
Parser parser1 = new Parser(content);
parser1.setEncoding("utf-8");
NodeFilter filter1 = new NodeClassFilter(TableTag.class);
NodeList nodeList11 = parser1.parse(filter1);
List<String> lsit= new ArrayList<String>();
for(int i = 0; i < nodeList11.size(); ++i){
if(nodeList11.elementAt(i) instanceof TableTag){
TableTag tag = (TableTag) nodeList11.elementAt(i);
TableRow[] rows = tag.getRows();
for (int j = 0; j < rows.length; ++j) {
TableRow row = (TableRow) rows[j];
TableColumn[] columns = row.getColumns();
for (int k = 0; k < columns.length; ++k) {
String info = filter(columns[k].toPlainTextString().trim());
// System.out.println(info);
lsit.add(info);
}
}
}
}
System.out.println("第"+lsit.get(3)+"期获取号码:"+lsit.get(5));
return lsit.get(5);
}
}