public static String docToHtml(String basePath, String outImgPath,String outImgFolder, String fileName) throws TransformerException,IOException, ParserConfigurationException {
final String url = basePath +"wordToHtml/"+outImgFolder; System.out.println(url); HWPFDocument wordDocument = new HWPFDocument(new FileInputStream( fileName));// WordToHtmlUtils.loadDoc(new // FileInputStream(inputFile)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder() .newDocument()); wordToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { System.out.println(); return url + suggestedName; } }); wordToHtmlConverter.processDocument(wordDocument); // save pictures List<Picture> pics = wordDocument.getPicturesTable().getAllPictures(); if (pics != null) { File imgFile = new File(outImgPath); /** "I:/tomcat/apache-tomcat-7.0.62-windows-x64/apache-tomcat-7.0.62/webapps/ZYFP/wordImg" **/ if (!imgFile.exists()) { imgFile.mkdirs(); } for (int i = 0; i < pics.size(); i++) { Picture pic = (Picture) pics.get(i); try { /** * "I:/tomcat/apache-tomcat-7.0.62-windows-x64/apache-tomcat-7.0.62/webapps/ZYFP/wordImg/" * + pic.suggestFullFileName() **/ pic.writeImageContent(new FileOutputStream(outImgPath+ pic.suggestFullFileName())); } catch (FileNotFoundException e) { e.printStackTrace(); } } } Document htmlDocument = wordToHtmlConverter.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out);TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "gb2312"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close(); byte[] lens = out.toByteArray(); String html = new String(lens, "gb2312"); return html; }public static String docsToHtml(String basePath, String fileUrl) throws IOException,TransformerException, ParserConfigurationException {
String data = null; String outImgPath = "I:/tomcat/apache-tomcat-7.0.62-windows-x64/apache-tomcat-7.0.62/webapps/ZYFP/wordToHtml/image/"; String outImgFolder = "image/"; String outHtmlPath = "I:/tomcat/apache-tomcat-7.0.62-windows-x64/apache-tomcat-7.0.62/webapps/ZYFP/wordToHtml/docx.html"; File file = new File(fileUrl); if (!file.exists()) { return "文件不存在"; } else { if (file.getName().endsWith(".docx")|| file.getName().endsWith(".DOCX")) { // 2007版本 data = docxToHtml(basePath,outImgPath,outHtmlPath,fileUrl); } else { // 2003版本 data = docToHtml(basePath, outImgPath, outImgFolder, fileUrl); }}
return data; }public static Map<String,String> getHtml_Css(String basePath,String filePath) {
String txt = ""; try { txt = ReqUtils.docsToHtml(basePath, filePath); } catch (IOException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } org.jsoup.nodes.Document doc = Jsoup.parse(txt); String[] styles = doc.head().select("style").html().split("\r\n"); Map<String,String> css = new HashMap<String,String>(); for(String style:styles) { String[] kv = style.split("\\{|\\}"); css.put(kv[0], kv[1]); } return css; }
public static String getHtml_LineStyle(String basePath,String filePath) {
Map<String,String> css = getHtml_Css(basePath,filePath); String txt = ""; try { txt = ReqUtils.docsToHtml(basePath, filePath); } catch (IOException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } org.jsoup.nodes.Document doc = Jsoup.parse(txt); Element body = doc.body(); for(String key:css.keySet()) { body.select(key).attr("style", css.get(key)).outerHtml(); } return body.html(); }