package test.poi; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.util.List; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.PicturesManager; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.usermodel.PictureType; import org.apache.poi.xwpf.converter.core.BasicURIResolver; import org.apache.poi.xwpf.converter.core.FileImageExtractor; import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter; import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.w3c.dom.Document; public class test { public static synchronized String getOnlyId(){ long curL = System.currentTimeMillis(); if(curL>curL){ cacheInt = 0; }else{ cacheInt += 1; } return String.valueOf(curL)+String.valueOf(cacheInt); } private static int cacheInt = 0; private static long curL = 0; private static String projectUrl = "http://192.168.1.1:8080/test/"; private static String projectPath = "D:/test/piccache/"; /** * 处理word2003 * @param inFile * @return */ public static String doWord(File inFile) { ByteArrayOutputStream out = new ByteArrayOutputStream(); String randomName = "PIC"+getOnlyId(); //转换后html中图片src的链接 final String baseUrl = projectUrl+"wordpic/"+randomName+"/"; //转换后图片存放的位置 String dir = projectPath+"/wordpic/"+randomName+"/"; File dirF = new File(dir); if(!dirF.exists()||!dirF.isDirectory()){ dirF.mkdir(); } try{ HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(inFile)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); wordToHtmlConverter.setPicturesManager( new PicturesManager() { public String savePicture( byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches ) { return baseUrl+suggestedName; } } ); wordToHtmlConverter.processDocument(wordDocument); List<Picture> pics=wordDocument.getPicturesTable().getAllPictures(); if(pics!=null){ for(int i=0;i<pics.size();i++){ Picture pic = (Picture)pics.get(i); try { pic.writeImageContent(new FileOutputStream(dir + pic.suggestFullFileName())); } catch (FileNotFoundException e) { e.printStackTrace(); } } } Document htmlDocument = wordToHtmlConverter.getDocument(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close(); }catch(Exception e){ e.printStackTrace(); } return new String(out.toByteArray()); } /** * 处理word2007 * @param inFile * @return */ public static String doWord2007(File inFile) { ByteArrayOutputStream out = new ByteArrayOutputStream(); String randomName = "PIC"+getOnlyId(); //转换后html中图片src的链接 final String baseUrl = projectUrl+"wordpic/"+randomName+"/"; //转换后图片存放的位置 String dir = projectPath+"/wordpic/"+randomName+"/"; File dirF = new File(dir); if(!dirF.exists()||!dirF.isDirectory()){ dirF.mkdir(); } try{ XWPFDocument wordDocument = new XWPFDocument(new FileInputStream(inFile)); XHTMLOptions options = XHTMLOptions.create().URIResolver(new BasicURIResolver(baseUrl)); File imageFolderFile = new File(dir); options.setExtractor(new FileImageExtractor(imageFolderFile)); XHTMLConverter.getInstance().convert(wordDocument, out, options); out.close(); }catch(Exception e){ e.printStackTrace(); } return new String(out.toByteArray()); } public static void main(String[] args) { File word2003 = new File("d:/test/员工思想工作总结(李海博).doc"); File word2007 = new File("d:/test/发改委oa系统功能说明书.docx"); /** 打印出word2003转换后的html内容*/ System.out.println(doWord(word2003)); /** 打印出word2007转换后的html内容*/ System.out.println(doWord2007(word2007)); } }
最近下载更多
微信网友_6683277771722752 LV1
2023年10月8日
whfuai LV14
2022年7月5日
yurineko LV1
2022年1月12日
x2b2d2 LV12
2021年7月10日
fei6549 LV10
2021年6月3日
2235140624 LV17
2020年11月28日
xuyongff LV24
2020年11月27日
味卜鲜码 LV6
2020年10月28日
yys956979778 LV10
2020年9月24日
zzyHenry LV2
2020年7月18日