滴滴滴上重點(diǎn)。。。
方式一:使用apache提供的工具包poi,poi使用的是4.1.2版本
? ?缺點(diǎn):對字體樣式處理不精確;wmf公式圖片部分轉(zhuǎn)換不精確,本文檔只支持doc格式
? ?優(yōu)點(diǎn):轉(zhuǎn)換速度相對很快,本地也方便調(diào)試
方式二:使用libreoffice,使用的是7.5版本
? ?地址:下載 LibreOffice | LibreOffice 簡體中文官方網(wǎng)站 - 自由免費(fèi)的辦公套件
? ?Linux安裝libreoffice案例:linux centos7工具安裝之 libreOffice篇 libreOffice安裝教程_centos7 安裝libreoffice_the_bog的博客-CSDN博客
? ?缺點(diǎn):轉(zhuǎn)換速度相對慢
? ?優(yōu)點(diǎn):字體樣式十分精確,本文檔只支持doc,docx等等。轉(zhuǎn)換pdf等相關(guān)命令百度獲取
廢話不多說直接上代碼?。?!
方式一代碼實(shí)現(xiàn):
? 相關(guān)jar包地址:文章來源:http://www.zghlxwxcb.cn/news/detail-788074.html
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.9.2</version>
</dependency>
<dependency>
<groupId>org.apache.xmlgraphics</groupId>
<artifactId>batik-codec</artifactId>
<version>1.7</version>
</dependency>
<dependency>
<groupId>net.arnx</groupId>
<artifactId>wmf2svg</artifactId>
<version>0.9.5</version>
</dependency>
package cn.hls.winner.winner_problem_manage.utils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.util.IOUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.FileCopyUtils;
import org.springframework.web.multipart.MultipartFile;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
/**
* @author lhz
* @description TODO
* @date 2023/9/18 10:14
*/
public class Word2003Util {
private static final Logger logger = LoggerFactory.getLogger(Word2003Util.class);
/**
*
* @param multipartFile 上傳的文件
* @param htmlFile html上傳路徑
* @param htmlFileImgUrl html圖片上傳路徑
* @param wordFileUrl word上傳路徑
* @return
*/
public static String word2003ToHtml(MultipartFile multipartFile, String htmlFile, String htmlFileImgUrl, String wordFileUrl) {
// 需要判斷文件是否為doc,docx
if (multipartFile == null) {
return "word文檔上傳為空!";
}
if (multipartFile.getOriginalFilename().endsWith("docx")) {
return "word文檔格式有誤,請上傳doc格式的!";
}
logger.info("***** word2003ToHtml start file:{}", multipartFile);
//返回服務(wù)器代理地址
String htmlUrl = "";
//隨機(jī)命名html文件
String uuid = UUID.randomUUID().toString();
String htmlFileName = uuid + "." + "html";
logger.info("==== 初始化====(htmlFileName){參數(shù)} " + htmlFileName);
try {
//上傳服務(wù)器的圖片本地地址
logger.info("==== htmlFile{參數(shù)} ====" + htmlFile);
//nginx轉(zhuǎn)發(fā)后的圖片地址
logger.info("==== htmlFileImgUrl{參數(shù)} ====" + htmlFileImgUrl);
//生成網(wǎng)頁的文件夾地址
String htmlFileUrl = htmlFile + uuid + "/";
logger.info("==== htmlFileUrl{參數(shù)} ==== " + htmlFileUrl);
//上傳文件到服務(wù)器
boolean flag = upload(multipartFile, wordFileUrl, uuid);
if (!flag) {
return "word文檔上傳失??!";
}
logger.info("===== word文檔上傳成功!====");
//獲取文件名稱
String name = multipartFile.getOriginalFilename();
String suffix = name.substring(name.lastIndexOf("."));//.后綴名
String filePath = wordFileUrl + uuid + suffix;
logger.info("==== filePath ====" + filePath);
File file = new File(filePath);
// 1) 加載word文檔生成 HWPFDocument對象
InputStream inputStream = new FileInputStream(file);
HWPFDocument wordDocument = new HWPFDocument(inputStream);
WordToHtmlConverter wordToHtmlConverter =
new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
//圖片地址
String fileImg = htmlFileUrl + "images/";
File htmlFile1 = new File(htmlFileUrl);
if (!htmlFile1.exists()) {
//創(chuàng)建
if (htmlFile1.mkdirs()) {
logger.info("創(chuàng)建" + htmlFileUrl + "成功");
} else {
logger.info("創(chuàng)建" + htmlFileUrl + "成功");
}
}
//html代理地址
htmlUrl = htmlFileImgUrl + uuid + "/" + htmlFileName;
//html生成路徑
htmlFileName = htmlFileUrl + htmlFileName;
logger.info("==== htmlFileName{ html ======== 輸出地址} " + htmlFileName);
//設(shè)置圖片存放的位置
String finalFileImg = fileImg;
final int[] index = {1};
//處理圖片地址
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
File imgPath = new File(finalFileImg);
if (!imgPath.exists()) {//圖片目錄不存在則創(chuàng)建
imgPath.mkdirs();
}
String extension = pictureType.getExtension();
//隨機(jī)生成圖片名稱
suggestedName = finalFileImg + "image" + index[0] + "." + extension;
File file = new File(suggestedName);
OutputStream os = null;
try {
os = new FileOutputStream(file);
os.write(content);
os.close();
//處理wmf公式圖片
// if (extension.equals("wmf") || extension.equals("svg")) {
// if (extension.equals("wmf")) {
// String svgFile = suggestedName.substring(0,
// suggestedName.lastIndexOf(".wmf"))
// + ".svg";
// SvgToPngUtil.wmfToSvg(suggestedName, svgFile);
// }
// String suggestedNameSVG = suggestedName.substring(0, suggestedName.lastIndexOf(".")) + ".svg";
String s = SvgToPngUtil.readToString(suggestedNameSVG);
String suggestedNamePng = suggestedName.substring(0, suggestedName.lastIndexOf(".")) + ".png";
SvgToPngUtil.convertToPng(s, suggestedNamePng);
String s1 = SvgToPngUtil.GetImageStr(suggestedNameSVG);
// //刪除無用圖片
deleteFile(suggestedNameSVG, suggestedName);
// suggestedName = suggestedNameSVG;
// }
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
}
//這里可以指定word文檔中圖片的路徑。
String imgUlr = suggestedName.replace(htmlFile, htmlFileImgUrl);
index[0]++;
return imgUlr;
}
});
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
OutputStream outputStream = new FileOutputStream(htmlFileName);
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outputStream);
TransformerFactory factory = TransformerFactory.newInstance();
Transformer serializer = factory.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outputStream.close();
logger.info("===== 網(wǎng)頁樣式轉(zhuǎn)換開始 ====");
String htmlContents = readHtml(htmlFileName);
FileCopyUtils.copy(htmlContents.getBytes("utf-8"), new File(htmlFileName));
logger.info("===== 網(wǎng)頁樣式轉(zhuǎn)換完成 ====");
} catch (Exception e) {
logger.error("word2003ToHtml====異常");
logger.error(e.getMessage());
throw new RuntimeException(e);
}
//
return htmlUrl;
}
//獲取網(wǎng)頁內(nèi)容
public static String readHtml(String htmlFileName) throws Exception {
StringBuilder htmlContents1 = new StringBuilder();
String htmlContents = "";
//讀圖網(wǎng)頁內(nèi)容
BufferedReader buf = new BufferedReader(
new InputStreamReader(new FileInputStream(htmlFileName), "utf-8"));
String c = "";
while ((c = buf.readLine()) != null) {
htmlContents1.append(c + "\n");
}
buf.close();
htmlContents = htmlContents1.toString();
htmlContents = htmlContents.replace("hyphenate:auto;font-family:Times New Roman;", "hyphenate:auto;font-family:宋體;").replace("vertical-align:text-bottom;", "vertical-align: middle;").replace("’","'").replace("’","'");
org.jsoup.nodes.Document document = Jsoup.parse(htmlContents);
formatHtml(document);
htmlContents = document.toString();
return htmlContents;
}
//網(wǎng)頁字體樣式
public static void formatHtml(org.jsoup.nodes.Document document) {
Elements elements = document.getAllElements();
String title = document.title();
logger.info("==== formatHtml ====title"+title);
for (Element element : elements) {
if ("main".equals(element.className())) {
continue;
}
if (title.contains("物理") || title.contains("數(shù)學(xué)") || title.contains("化學(xué)")) {
if (element.hasClass("s1")) {
element.attr("style", "font-family:Times New Roman;" + element.attr("style"));
}
}
String[] attrs = element.attr("style").split(";");
List<String> attrList = new ArrayList();
for (String attr : attrs) {
if (attr.contains("font-family")) {
attrList.add(attr);
}
}
//將<body>標(biāo)簽里的class屬性b1 b2去掉
Elements bodys = element.getElementsByTag("body");
for(Element body : bodys){
System.out.println("=======className:" + body.className() + "==========");
if("b1 b2".equals(body.className())){
body.attr("class","");
}
}
}
}
public static void deleteFile(String... imgUrl) {
for (String s : imgUrl) {
File file = new File(s);
try {
if (file.isFile()) {
// 刪除文件
if (file.delete()) {
logger.info("刪除文件成功==== 名稱為:" + file.getName());
} else {
}
} else {
}
} catch (Exception e) {
logger.error("====== 刪除圖片失敗 ======" + e.getMessage());
throw new RuntimeException();
}
}
}
/**
* @param file 文件
* @param htmlFile 文件上傳地址
* @param fileName 文件名稱
* @return
*/
public static boolean upload(MultipartFile file, String htmlFile, String fileName) {
InputStream is = null;
OutputStream os = null;
try {
File file1 = new File(htmlFile);
if (!file1.exists()) {
file1.mkdirs();
}
String name = file.getOriginalFilename();
String suffix = name.substring(name.lastIndexOf("."));//.后綴名
is = file.getInputStream();
os = new FileOutputStream(htmlFile + fileName + suffix);
//數(shù)據(jù)對拷
IOUtils.copy(is, os);
logger.info("==== 文件寫入成功!====");
} catch (IOException e) {
logger.error("===== 文件上傳失敗 ====" + e.getMessage());
return false;
} finally {
if (null != is) {
try {
is.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
if (null != os) {
try {
os.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
return true;
}
}
方式二代碼實(shí)現(xiàn):文章來源地址http://www.zghlxwxcb.cn/news/detail-788074.html
package com.hls.poi.service;
import com.hls.poi.controller.WordToHtmlController;
import org.apache.poi.util.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.multipart.MultipartFile;
import java.io.*;
import java.util.UUID;
public class LibreOfficeCommandWordService {
private static final Logger logger = LoggerFactory.getLogger(WordToHtmlController.class);
/**
* /opt/libreoffice7.5/program/soffice --headless --invisible --convert-to pdf /opt/a/1.docx --outdir /opt/a/
* –convert-to pdf 后面的 /opt/a/1.docx 為原文件路徑
* –outdir /opt/a/(轉(zhuǎn)換后文件存放目錄)
* <p>
* soffice --headless --invisible --convert-to html:HTML ffc75d91-3594-451d-a55f-a941325bc380.doc --outdir mmm
*/
//需要根據(jù)實(shí)際情況,查找LibreOffice安裝的實(shí)際目錄,
//Mac下是默認(rèn)安裝到/usr/local/bin,
//CentOS下默認(rèn)安裝在/usr/bin
private final static String sofficeDir = "/opt/libreoffice7.6/program/";
/**
* @param multipartFile 上傳的文件
* @param htmlFile html上傳路徑
* @param htmlFileImgUrl html圖片上傳路徑
* @param wordFileUrl word上傳路徑
* @param sofficeDir libreOffice安裝地址
* @throws Exception
*/
public String word2html(MultipartFile multipartFile, String htmlFile, String htmlFileImgUrl, String wordFileUrl, String sofficeDir) throws Exception {
try {
logger.info("exec command:[{}]\noutput: [{}]", "進(jìn)入word2pdf{} 方法");
// 需要判斷文件是否為doc,docx
if (multipartFile == null) {
return "word文檔上傳為空!";
}
//返回服務(wù)器代理地址
String htmlUrl = "";
//隨機(jī)命名html文件
String uuid = UUID.randomUUID().toString();
String htmlFileName = uuid + "." + "html";
logger.info("==== 初始化====(htmlFileName){參數(shù)} " + htmlFileName);
//上傳服務(wù)器的圖片本地地址
logger.info("==== htmlFile{參數(shù)} ====" + htmlFile);
//nginx轉(zhuǎn)發(fā)后的圖片地址
logger.info("==== htmlFileImgUrl{參數(shù)} ====" + htmlFileImgUrl);
//生成網(wǎng)頁的文件夾地址
String htmlFileUrl = htmlFile + uuid + "/";
logger.info("==== htmlFileUrl{參數(shù)} ==== " + htmlFileUrl);
//上傳文件到服務(wù)器
boolean flag = upload(multipartFile, wordFileUrl, uuid);
if (!flag) {
return "word文檔上傳失?。?;
}
logger.info("===== word文檔上傳成功!====");
//獲取文件名稱
String name = multipartFile.getOriginalFilename();
String suffix = name.substring(name.lastIndexOf("."));//.后綴名
//上傳后word文檔路徑 /home/winnersoft/date/tomcat/html-root/office/word/8ea8aec0-7fb5-4fbc-b73c-6f0e47b2857e.doc
String inPath = wordFileUrl + uuid + suffix;
logger.info("==== inPath ====" + inPath);
if (!new File(inPath).exists()) {
return "word文檔不存在!";
}
//圖片地址
File htmlFile1 = new File(htmlFileUrl);
if (!htmlFile1.exists()) {
//創(chuàng)建
if (htmlFile1.mkdirs()) {
logger.info("創(chuàng)建" + htmlFileUrl + "成功");
} else {
logger.info("創(chuàng)建" + htmlFileUrl + "成功");
}
}
//html代理地址 //http://172.18.222.25:82/office/html/8ea8aec0-7fb5-4fbc-b73c-6f0e47b2857e/8ea8aec0-7fb5-4fbc-b73c-6f0e47b2857e.html
htmlUrl = htmlFileImgUrl + uuid + "/" + htmlFileName;
//html生成路徑 /home/winnersoft/date/tomcat/html-root/office/html/af7ac82f-71bc-498c-8866-8bf7ef325345/
htmlFileName = htmlFileUrl;
logger.info("==== outPath{ html ======== 輸出地址} " + htmlFileName);
//設(shè)置圖片存放的位置
// String command = String.format("%s/soffice --convert-to pdf:writer_pdf_Export %s --outdir %s", sofficeDir, inPath, outPath);
String command = String.format("%s/soffice --headless --invisible --convert-to html:HTML %s --outdir %s", sofficeDir, inPath, htmlFileName);
logger.info("command==================================" + command);
String output = this.executeCommand(command);
logger.info("exec command:[{}]\noutput: [{}]", command, output);
return htmlUrl;
} catch (IOException e) {
logger.error("io異常"+e.getMessage());
throw new RuntimeException(e);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
protected String executeCommand(String command) throws IOException, InterruptedException {
logger.info("executeCommand{} 執(zhí)行轉(zhuǎn)化");
StringBuffer output = new StringBuffer();
Process p;
p = Runtime.getRuntime().exec(command);
p.waitFor();
try (
InputStreamReader inputStreamReader = new InputStreamReader(p.getInputStream(), "UTF-8");
BufferedReader reader = new BufferedReader(inputStreamReader)
) {
String line = "";
while ((line = reader.readLine()) != null) {
output.append(line + "\n");
}
}
// 銷毀子進(jìn)程
p.destroy();
return output.toString();
}
/**
* @param file 文件
* @param htmlFile 文件上傳地址
* @param fileName 文件名稱
* @return
*/
public static boolean upload(MultipartFile file, String htmlFile, String fileName) {
InputStream is = null;
OutputStream os = null;
try {
File file1 = new File(htmlFile);
if (!file1.exists()) {
file1.mkdirs();
}
String name = file.getOriginalFilename();
String suffix = name.substring(name.lastIndexOf("."));//.后綴名
is = file.getInputStream();
os = new FileOutputStream(htmlFile + fileName + suffix);
//數(shù)據(jù)對拷
IOUtils.copy(is, os);
logger.info("==== 文件寫入成功!====");
} catch (IOException e) {
logger.error("===== 文件上傳失敗 ====" + e.getMessage());
return false;
} finally {
if (null != is) {
try {
is.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
if (null != os) {
try {
os.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
return true;
}
}
到了這里,關(guān)于Java word轉(zhuǎn)為html 兩種方式的文章就介紹完了。如果您還想了解更多內(nèi)容,請?jiān)谟疑辖撬阉鱐OY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!