package com.daimac.worm.thread;
import com.daimac.worm.entity.PPTEntity;
import com.daimac.worm.entity.TagEntity;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
@EqualsAndHashCode(callSuper = true)
@Data
public class PPTThread extends Thread {
/**
* 页码开始
*/
private int numBegin;
/**
* 页码结束
*/
private int numEnd;
/**
* 当前工作页码
*/
private int currWork;
/**
* 错误的页码
*/
private List<String> errorPage = new ArrayList<>();
/**
* 错误的索引页
*/
private List<String> errorIndex = new ArrayList<>();
/**
* 容器指针
*/
private List<PPTEntity> ppts;
/**
* 重试次数
*/
private int retryCount = 3;
/**
* 完成
*/
private boolean complete = false;
public PPTThread(List<PPTEntity> ppts) {
this.ppts = ppts;
}
@Override
public void run() {
for (int j = this.numBegin + 1; j <= this.numEnd; j++) {
this.currWork = j;
String numUrl = "http://www.1ppt.com/moban/ppt_moban_" + j + ".html";
Document doc = findDocument(numUrl);
if(doc == null){continue;}
Elements select = doc.select(".tplist li");
AtomicInteger pnum = new AtomicInteger(1);
select.forEach(v -> {
Element a = v.selectFirst("a");
String href = a.attr("href");
String indexUrl = "http://www.1ppt.com/" + href;
PPTEntity pptEntity = new PPTEntity();
pptEntity.setName(a.selectFirst("img").attr("alt"));
Document document = findDocument(indexUrl);
if(document == null){return;}
// 获取ppt详情
pptEntity.setPic(v.selectFirst("img").attr("src"));
String pptName = document.selectFirst(".ppt_info h1").text();
Elements infoUl = document.select(".info_left ul");
pptEntity.setSort(infoUl.select("li:eq(0) a").text());
pptEntity.setUpdateTime(infoUl.select("li:eq(1)").text().replace("更新时间:", ""));
pptEntity.setPptVersion(infoUl.select("li:eq(2)").text().replace("素材版本:", ""));
pptEntity.setFileSize(infoUl.select("li:eq(4)").text().replace("文件大小:", ""));
pptEntity.setFileType(infoUl.select("li:eq(6)").text().replace("附件类型:", ""));
Elements taga = infoUl.select("li:eq(8) a");
List<TagEntity> tagList = new ArrayList<>();
pptEntity.setTags(tagList);
taga.forEach(vv -> {
TagEntity tag = new TagEntity();
tag.setName(vv.text());
tagList.add(tag);
});
pptEntity.setDownUrl(document.selectFirst(".downurllist a").attr("href"));
this.ppts.add(pptEntity);
pnum.getAndIncrement();
});
}
complete = true;
}
/**
* 获取页面文档(获取失败,自动重试${retryCount}次)
* @param url 地址
* @return 文档
*/
private Document findDocument(String url){
Document document = null;
for (int r = 0; r < this.retryCount; r++) {
try {
document = Jsoup.connect(url).get();
break;
} catch (IOException e) {
e.printStackTrace();
}
}
return document;
}
}