参考资料: jQuery-Word-Export导出word_jquery.wordexport.js下载-CSDN博客
近期又需要自己做个 Html2Doc 的解决方案,因为客户又不想要 Html2pdf 的下载了,当初还给我费尽心思解决Html转pdf时中文输出的问题(html转pdf文件下载之最合理的方法支持中文_jspdf.umd.min.js-CSDN博客)以及手机端下载pdf的问题(手机端 Android WebView 获取 blob 链接文件名并下载网页动态生成的 pdf 文件且调用外部程序打开_blob链接怎么打开-CSDN博客)
查了下,基于 IIS 的解决方案,感觉还是参考资料中的相对靠谱,测试了一下,页面上的例子正确工作,输出文件正确,之后逐步添加不同元素测试也正常,但是到加了 img 时就报错了:
问 AI 说是叫检查 jquery.word.export.js 的第 33行:
$('<canvas>').attr("id", "test_word_img_" + i).width(w).height(h).insertAfter(img_id);
看了前后代码,发现是 img 元素没有 id 造成的,这样的话,就需要修复 jquery.word.export.js,以下是修复后的代码,不管 img 有没有 id 都能正确输出了,当然,是交给通义千问去修复的:
if (typeof jQuery !== "undefined" && typeof saveAs !== "undefined") {(function ($) {$.fn.wordExport = function (fileName) {fileName = typeof fileName !== 'undefined' ? fileName : "jQuery-Word-Export";var static = {mhtml: {top: "Mime-Version: 1.0\nContent-Base: " + location.href + "\nContent-Type: Multipart/related; boundary=\"NEXT.ITEM-BOUNDARY\";type=\"text/html\"\n\n--NEXT.ITEM-BOUNDARY\nContent-Type: text/html; charset=\"utf-8\"\nContent-Location: " + location.href + "\n\n<!DOCTYPE html>\n<html>\n_html_</html>",head: "<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n<style>\n_styles_\n</style>\n</head>\n",body: "<body>_body_</body>"}};var options = {maxWidth: 624};// Clone selected element before manipulating it var markup = $(this).clone();// Remove hidden elements from the output markup.each(function () {var self = $(this);if (self.is(':hidden'))self.remove();});// Embed all images using Data URLs var images = Array();var img = markup.find('img');for (var i = 0; i < img.length; i++) {// Calculate dimensions of output image var w = Math.min(img[i].width, options.maxWidth);var h = img[i].height * (w / img[i].width);// Ensure each image has a unique idvar img_id = img[i].id || "dynamic_img_" + i;if (!img[i].id) {$(img[i]).attr("id", img_id);}$('<canvas>').attr("id", "test_word_img_" + i).width(w).height(h).insertAfter("#" + img_id);// Create canvas for converting image to data URL var canvas = document.createElement("CANVAS");canvas.width = w;canvas.height = h;// Draw image to canvas var context = canvas.getContext('2d');context.drawImage(img[i], 0, 0, w, h);// Get data URL encoding of image var uri = canvas.toDataURL("image/png");// Save encoded image to array images[i] = {type: uri.substring(uri.indexOf(":") + 1, uri.indexOf(";")),encoding: uri.substring(uri.indexOf(";") + 1, uri.indexOf(",")),location: "#" + img_id,data: uri.substring(uri.indexOf(",") + 1)};// Replace original image with canvas in markup$("#" + img_id).replaceWith(canvas);}// Prepare bottom of mhtml file with image data var mhtmlBottom = "\n";for (var i = 0; i < images.length; i++) {mhtmlBottom += "--NEXT.ITEM-BOUNDARY\n";mhtmlBottom += "Content-Location: " + images[i].location + "\n";mhtmlBottom += "Content-Type: " + images[i].type + "\n";mhtmlBottom += "Content-Transfer-Encoding: " + images[i].encoding + "\n\n";mhtmlBottom += images[i].data + "\n\n";}mhtmlBottom += "--NEXT.ITEM-BOUNDARY--";//TODO: load css from included stylesheet var styles = "";// Aggregate parts of the file together var fileContent = static.mhtml.top.replace("_html_", static.mhtml.head.replace("_styles_", styles) + static.mhtml.body.replace("_body_", markup.html())) + mhtmlBottom;// Create a Blob with the file contents var blob = new Blob([fileContent], {type: "application/msword;charset=utf-8"});saveAs(blob, fileName + ".doc");};})(jQuery);
} else {if (typeof jQuery === "undefined") {console.error("jQuery Word Export: missing dependency (jQuery)");}if (typeof saveAs === "undefined") {console.error("jQuery Word Export: missing dependency (FileSaver.js)");}
}
中间还遇到 img 标签中 src 使用其他网站的链接也会报错:
但是这是由于跨域问题,因此将图片放置到网站本地即可:
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Export to DOCX</title><script src="./js/jquery-3.3.1.min.js"></script><script src="./js/FileSaver.js"></script><script src="./js/jquery.wordexport.js"></script>
</head>
<body>
<div id="contentAA"><h1>这是一个标题</h1><h1> Your content here...</h1><p>这是段落内容。</p><img src="./img/01.png" alt="示例图片"> <table border="1"><tr><th>表头1</th><th>表头2</th></tr><tr><td>数据1</td><td>数据2</td></tr></table><img src="./img/02.png" alt="示例图片">
</div>
<hr/>
<button id="export">导出</button><script>$("#export").click(function(){$("#contentAA").wordExport();});</script>
</body>
</html>
图片也正常导出了,打开导出的文件看了一下,发现是个加了壳的 Html 文件,仔细看了下格式,发现这是 Mhtml 格式的,也即 .MHT 单个网页文件格式:
Mime-Version: 1.0
Content-Base: http://127.0.0.1/html2doc02.html
Content-Type: Multipart/related; boundary="NEXT.ITEM-BOUNDARY";type="text/html"--NEXT.ITEM-BOUNDARY
Content-Type: text/html; charset="utf-8"
Content-Location: http://127.0.0.1/html2doc02.html<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<style></style>
</head>
<body><h1>这是一个标题</h1><h1> Your content here...</h1><p>这是段落内容。</p><img src="./img/01.png" alt="示例图片" id="dynamic_img_0"> <table border="1"><tbody><tr><th>表头1</th><th>表头2</th></tr><tr><td>数据1</td><td>数据2</td></tr></tbody></table><img src="./img/02.png" alt="示例图片" id="dynamic_img_1">
</body></html>
--NEXT.ITEM-BOUNDARY
Content-Location: #dynamic_img_0
Content-Type: image/png
Content-Transfer-Encoding: base64iVBORw0KGgoAAAANSUhEUgAAAFAAAAAwCAYAAACG5f33AAAAAXNSR0IArs4c6QAABRFJREFUaEPtm71rFEEUwN8Zc8ldbAykUjuttJBYSCzEgKLiByiksEghiqKi4EchEQwRjFFiAkoExUKwEKIRESwEC/FvUBS0EKKFJCTx43I5v1beyCxz72Z33ps99i5w297szHu/fd+TZIIgCKDxeBPINAB6s1MvNgAm49cAmJBfA2DNAP5+8xZ+PnoMP5+/gL+TnxLJsWTVSsidPgktPfsT7VOLlyti4K+Xr6A4eA3+fJmCYGYmNZkyLS2w/MPr1M6LOwgZLNy6A9me/c6PWgFwrnMT/J2eBqhBddM++b4uACoGU1PA+agVAGdWrQEsDDMOVZZ0dEB2x1Zo3rMLmrs2eiuO5+mnXgBKZLICTFMhibBRX6n08DEUR2+qn5tWrlAftbX3QCofddEDLI7cCOGZxDAp5c6c8oIo+ag1Bzi7eh0EpZJSNHf+HOROHGUpjYG+0NcfWwH4ZvdUAWpFFACPUmR+4DIs3L33vy0SZGId6DXt5i2bYdnYKPw4cRpQJv1I9tTvpArQVMRHWLQ+tEJu3LVZXsve3ZAfGVIfAPeb7+uH0vhEuKfEsvGl1ADS+IOBOz94ieWC5iKuwAgPLSz49i18HeG1jY1WnEktO3/lkrOmS9UCKbwoRTg0uQCp25qWR89BS5xbuyGMr2idXIhceVTYoeMs18s2F1Lx5+4t5UI+j+tM3BPP/d57KNyeY+3FsdtQHBoWx0OOPGGMlQAsc6FMhtWtcDIhR2Bqfdyim0JsG7nqdGWOPGKAPvC4mdAl8ML9BzDfd1FkfaYnmKUSJ9G55DH3Zrvw3KbusObitHrUleMsxiWwaX0+sZZaoct6XfJ4ATSFYMUf0iEkAWgqhBMbn1grgSJZy7ZAalF65GPrOyUZOkwOGFMBoO36UEWMkigUlcQke0jWegM0XdrsOyk8V4ZW+3z6HCYktK5c/4VwGECzr8v9OABdiaRqAOMOMgtVVQ+1tysIwexsqIMLHi7E0DA/NFwxPtPdg2/2pSAliSQRQHpQVPGJheqPw8fL+k5TaA48c31UzWYqw4m9URYoSSSJABbOni/rI+PS/sKDcSgODEJQKJTJncnnoRUnKwd7RXW1rS+WKOM6jLsXd521E6Eug4to3NEDTNdlEhbRrceOiIabVHiJMnUB0BQYBcK6K9uzT12yLO1cD6Wnz6wzuOzunaq8KE08idTD1ZXQghk/3KIHiHXX165udclie2hDbxsnlRWeMTM/W8FsAnRlz7qwwLIk0tQEyz++K7MCLWTcJATXxIGMKkVsBbMke9YFQFu2MpVAIc1syL1D1fvGZVKbu0qyZxxAST0pCRvWPy6iG+iaL7t9mxpemq2U5A7V10IkCkWdIaknJeexAEYJZQv6Lkhxv0cJLlGI04m46knJed4A6XjdnJJw3ZoqGyW4782dub8EimStN0DqEnpKYoLlzN44Svre3HH2tllsKgBNq9AuYbv0kTT/UYLTDsWnnJFAkaz1tkCaVW3wXLGG68K4Lkk5I43VqQCkypvjLVrqcBNLnOA+dxv6XOlEuyYApRNrn9hDrdCcG0Z9JFr/cSbaNQHItTKfMka/Q63Q50xOTF6UALmdQhKI3JhsWroLuDOJ4ATFNbZyWQJnrCXpFFwDC5s8XHj4Lqft1GdYAdLe1wWo2r9LlK322dL9rACTuIlUANt6l9tU44xq7RH5n0rVyKoopNTdFpP1oX6Nf/VKaIoNgA2ACQkkfP0f773NrWxc5+8AAAAASUVORK5CYII=--NEXT.ITEM-BOUNDARY
Content-Location: #dynamic_img_1
Content-Type: image/png
Content-Transfer-Encoding: base64iVBORw0KGgoAAAANSUhEUgAAABMAAAALCAYAAACd1bY6AAAAAXNSR0IArs4c6QAAAHRJREFUOE9j/P///38GKgFGUgxT3uSLYu1dv80ofBTD0BXDVMI0geSxsWHqMAxDtw2XAcjiGIbBXIXPMGQ/ETQM3SCQZmyacFkM9yYxLkMOU2wWY40AZIUwl+GyDNnrGEkDW4yCDMcV08gxTlI6I5S2qWoYAMYcbeBk+Zz2AAAAAElFTkSuQmCC--NEXT.ITEM-BOUNDARY--
既然如此,那么就有很多方法可以输出这种格式的文件了。不一定非得用 jQuery-Word-Export 了,而且从代码看 jquery.wordexport.js ,还可以做很多优化和自定义,现在这个只能勉强算是验证,还不能投入实用。
刚刚测试了 <img src="data:image/png;base64, 格式的图片,发现无法输出后无法在 word 2003 中显示,是因为没有处理 Src 和图片资源区 Content-Location: #dynamic_img_0 这样的设置不对造成的,按照 MHTML 格式规范,手工修改了一下下载的文件,在 Word 2003 里面就能正确显示图片了:
Mime-Version: 1.0
Content-Type: Multipart/related; boundary="NEXT.ITEM-BOUNDARY";type="text/html"--NEXT.ITEM-BOUNDARY
Content-Type: text/html; charset="utf-8"<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<style></style>
</head>
<body><h1>这是一个标题</h1><p>这是段落内容。</p><img src="02.png" alt="示例图片"> <table border="1"><tbody><tr><th>表头1</th><th>表头2</th></tr><tr><td>数据1</td><td>数据2</td></tr></tbody></table><img src="01.png" alt="示例图片">
</body></html>
--NEXT.ITEM-BOUNDARY
Content-Type: image/png
Content-Transfer-Encoding: base64
Content-Location: 01.pngiVBORw0KGgoAAAANSUhEUgAAAFAAAAAwCAYAAACG5f33AAAAAXNSR0IArs4c6QAABRFJREFUaEPtm71rFEEUwN8Zc8ldbAykUjuttJBYSCzEgKLiByiksEghiqKi4EchEQwRjFFiAkoExUKwEKIRESwEC/FvUBS0EKKFJCTx43I5v1beyCxz72Z33ps99i5w297szHu/fd+TZIIgCKDxeBPINAB6s1MvNgAm49cAmJBfA2DNAP5+8xZ+PnoMP5+/gL+TnxLJsWTVSsidPgktPfsT7VOLlyti4K+Xr6A4eA3+fJmCYGYmNZkyLS2w/MPr1M6LOwgZLNy6A9me/c6PWgFwrnMT/J2eBqhBddM++b4uACoGU1PA+agVAGdWrQEsDDMOVZZ0dEB2x1Zo3rMLmrs2eiuO5+mnXgBKZLICTFMhibBRX6n08DEUR2+qn5tWrlAftbX3QCofddEDLI7cCOGZxDAp5c6c8oIo+ag1Bzi7eh0EpZJSNHf+HOROHGUpjYG+0NcfWwH4ZvdUAWpFFACPUmR+4DIs3L33vy0SZGId6DXt5i2bYdnYKPw4cRpQJv1I9tTvpArQVMRHWLQ+tEJu3LVZXsve3ZAfGVIfAPeb7+uH0vhEuKfEsvGl1ADS+IOBOz94ieWC5iKuwAgPLSz49i18HeG1jY1WnEktO3/lkrOmS9UCKbwoRTg0uQCp25qWR89BS5xbuyGMr2idXIhceVTYoeMs18s2F1Lx5+4t5UI+j+tM3BPP/d57KNyeY+3FsdtQHBoWx0OOPGGMlQAsc6FMhtWtcDIhR2Bqfdyim0JsG7nqdGWOPGKAPvC4mdAl8ML9BzDfd1FkfaYnmKUSJ9G55DH3Zrvw3KbusObitHrUleMsxiWwaX0+sZZaoct6XfJ4ATSFYMUf0iEkAWgqhBMbn1grgSJZy7ZAalF65GPrOyUZOkwOGFMBoO36UEWMkigUlcQke0jWegM0XdrsOyk8V4ZW+3z6HCYktK5c/4VwGECzr8v9OABdiaRqAOMOMgtVVQ+1tysIwexsqIMLHi7E0DA/NFwxPtPdg2/2pSAliSQRQHpQVPGJheqPw8fL+k5TaA48c31UzWYqw4m9URYoSSSJABbOni/rI+PS/sKDcSgODEJQKJTJncnnoRUnKwd7RXW1rS+WKOM6jLsXd521E6Eug4to3NEDTNdlEhbRrceOiIabVHiJMnUB0BQYBcK6K9uzT12yLO1cD6Wnz6wzuOzunaq8KE08idTD1ZXQghk/3KIHiHXX165udclie2hDbxsnlRWeMTM/W8FsAnRlz7qwwLIk0tQEyz++K7MCLWTcJATXxIGMKkVsBbMke9YFQFu2MpVAIc1syL1D1fvGZVKbu0qyZxxAST0pCRvWPy6iG+iaL7t9mxpemq2U5A7V10IkCkWdIaknJeexAEYJZQv6Lkhxv0cJLlGI04m46knJed4A6XjdnJJw3ZoqGyW4782dub8EimStN0DqEnpKYoLlzN44Svre3HH2tllsKgBNq9AuYbv0kTT/UYLTDsWnnJFAkaz1tkCaVW3wXLGG68K4Lkk5I43VqQCkypvjLVrqcBNLnOA+dxv6XOlEuyYApRNrn9hDrdCcG0Z9JFr/cSbaNQHItTKfMka/Q63Q50xOTF6UALmdQhKI3JhsWroLuDOJ4ATFNbZyWQJnrCXpFFwDC5s8XHj4Lqft1GdYAdLe1wWo2r9LlK322dL9rACTuIlUANt6l9tU44xq7RH5n0rVyKoopNTdFpP1oX6Nf/VKaIoNgA2ACQkkfP0f773NrWxc5+8AAAAASUVORK5CYII=--NEXT.ITEM-BOUNDARY
Content-Type: image/png
Content-Transfer-Encoding: base64
Content-Location: 02.pngiVBORw0KGgoAAAANSUhEUgAAABMAAAALCAYAAACd1bY6AAAAAXNSR0IArs4c6QAAAHRJREFUOE9j/P///38GKgFGUgxT3uSLYu1dv80ofBTD0BXDVMI0geSxsWHqMAxDtw2XAcjiGIbBXIXPMGQ/ETQM3SCQZmyacFkM9yYxLkMOU2wWY40AZIUwl+GyDNnrGEkDW4yCDMcV08gxTlI6I5S2qWoYAMYcbeBk+Zz2AAAAAElFTkSuQmCC--NEXT.ITEM-BOUNDARY--
下一步就是继续修正 jquery.word.export.js 让它能正确按照这个格式输出文件,并且正确处理 <img 的 Src 属性。
下面补充一下 FileSaver.js ,省得还要去 github下载,好麻烦:
/* FileSaver.js * A saveAs() FileSaver implementation. * 1.3.2 * 2016-06-16 18:25:19 * * By Eli Grey, http://eligrey.com * License: MIT * See https://github.com/eligrey/FileSaver.js/blob/master/LICENSE.md */ /*global self */
/*jslint bitwise: true, indent: 4, laxbreak: true, laxcomma: true, smarttabs: true, plusplus: true */ /*! @source http://purl.eligrey.com/github/FileSaver.js/blob/master/FileSaver.js */ var saveAs = saveAs || (function(view) { "use strict"; // IE <10 is explicitly unsupported if (typeof view === "undefined" || typeof navigator !== "undefined" && /MSIE [1-9]\./.test(navigator.userAgent)) { return; } var doc = view.document // only get URL when necessary in case Blob.js hasn't overridden it yet , get_URL = function() { return view.URL || view.webkitURL || view; } , save_link = doc.createElementNS("http://www.w3.org/1999/xhtml", "a") , can_use_save_link = "download" in save_link , click = function(node) { var event = new MouseEvent("click"); node.dispatchEvent(event); } , is_safari = /constructor/i.test(view.HTMLElement) , is_chrome_ios =/CriOS\/[\d]+/.test(navigator.userAgent) , throw_outside = function(ex) { (view.setImmediate || view.setTimeout)(function() { throw ex; }, 0); } , force_saveable_type = "application/octet-stream" // the Blob API is fundamentally broken as there is no "downloadfinished" event to subscribe to , arbitrary_revoke_timeout = 1000 * 40 // in ms , revoke = function(file) { var revoker = function() { if (typeof file === "string") { // file is an object URL get_URL().revokeObjectURL(file); } else { // file is a File file.remove(); } }; setTimeout(revoker, arbitrary_revoke_timeout); } , dispatch = function(filesaver, event_types, event) { event_types = [].concat(event_types); var i = event_types.length; while (i--) { var listener = filesaver["on" + event_types[i]]; if (typeof listener === "function") { try { listener.call(filesaver, event || filesaver); } catch (ex) { throw_outside(ex); } } } } , auto_bom = function(blob) { // prepend BOM for UTF-8 XML and text/* types (including HTML) // note: your browser will automatically convert UTF-16 U+FEFF to EF BB BF if (/^\s*(?:text\/\S*|application\/xml|\S*\/\S*\+xml)\s*;.*charset\s*=\s*utf-8/i.test(blob.type)) { return new Blob([String.fromCharCode(0xFEFF), blob], {type: blob.type}); } return blob; } , FileSaver = function(blob, name, no_auto_bom) { if (!no_auto_bom) { blob = auto_bom(blob); } // First try a.download, then web filesystem, then object URLs var filesaver = this , type = blob.type , force = type === force_saveable_type , object_url , dispatch_all = function() { dispatch(filesaver, "writestart progress write writeend".split(" ")); } // on any filesys errors revert to saving with object URLs , fs_error = function() { if ((is_chrome_ios || (force && is_safari)) && view.FileReader) { // Safari doesn't allow downloading of blob urls var reader = new FileReader(); reader.onloadend = function() { var url = is_chrome_ios ? reader.result : reader.result.replace(/^data:[^;]*;/, 'data:attachment/file;'); var popup = view.open(url, '_blank'); if(!popup) view.location.href = url; url=undefined; // release reference before dispatching filesaver.readyState = filesaver.DONE; dispatch_all(); }; reader.readAsDataURL(blob); filesaver.readyState = filesaver.INIT; return; } // don't create more object URLs than needed if (!object_url) { object_url = get_URL().createObjectURL(blob); } if (force) { view.location.href = object_url; } else { var opened = view.open(object_url, "_blank"); if (!opened) { // Apple does not allow window.open, see https://developer.apple.com/library/safari/documentation/Tools/Conceptual/SafariExtensionGuide/WorkingwithWindowsandTabs/WorkingwithWindowsandTabs.html view.location.href = object_url; } } filesaver.readyState = filesaver.DONE; dispatch_all(); revoke(object_url); } ; filesaver.readyState = filesaver.INIT; if (can_use_save_link) { object_url = get_URL().createObjectURL(blob); setTimeout(function() { save_link.href = object_url; save_link.download = name; click(save_link); dispatch_all(); revoke(object_url); filesaver.readyState = filesaver.DONE; }); return; } fs_error(); } , FS_proto = FileSaver.prototype , saveAs = function(blob, name, no_auto_bom) { return new FileSaver(blob, name || blob.name || "download", no_auto_bom); } ; // IE 10+ (native saveAs) if (typeof navigator !== "undefined" && navigator.msSaveOrOpenBlob) { return function(blob, name, no_auto_bom) { name = name || blob.name || "download"; if (!no_auto_bom) { blob = auto_bom(blob); } return navigator.msSaveOrOpenBlob(blob, name); }; } FS_proto.abort = function(){}; FS_proto.readyState = FS_proto.INIT = 0; FS_proto.WRITING = 1; FS_proto.DONE = 2; FS_proto.error = FS_proto.onwritestart = FS_proto.onprogress = FS_proto.onwrite = FS_proto.onabort = FS_proto.onerror = FS_proto.onwriteend = null; return saveAs;
}( typeof self !== "undefined" && self || typeof window !== "undefined" && window || this.content
));
// `self` is undefined in Firefox for Android content script context
// while `this` is nsIContentFrameMessageManager
// with an attribute `content` that corresponds to the window if (typeof module !== "undefined" && module.exports) { module.exports.saveAs = saveAs;
} else if ((typeof define !== "undefined" && define !== null) && (define.amd !== null)) { define([], function() { return saveAs; });
}