2015-07-27 01:26:58 +02:00
|
|
|
// word2md - Word to Markdown conversion tool
|
|
|
|
//
|
|
|
|
// word2md converts a Microsoft Word document to Markdown formatted text. The tool uses the
|
|
|
|
// Word Automation APIs to start an instance of Word and access the contents of the document
|
|
|
|
// being converted. The tool must be run using the cscript.exe script host and requires Word
|
|
|
|
// to be installed on the target machine. The name of the document to convert must be specified
|
|
|
|
// as a command line argument and the resulting Markdown is written to standard output. The
|
|
|
|
// tool recognizes the specific Word styles used in the TypeScript Language Specification.
|
2014-09-25 20:17:14 +02:00
|
|
|
var sys = (function () {
|
2014-10-16 22:27:48 +02:00
|
|
|
var fileStream = new ActiveXObject("ADODB.Stream");
|
2015-07-27 01:26:58 +02:00
|
|
|
fileStream.Type = 2 /*text*/;
|
2014-10-16 22:27:48 +02:00
|
|
|
var binaryStream = new ActiveXObject("ADODB.Stream");
|
2015-07-27 01:26:58 +02:00
|
|
|
binaryStream.Type = 1 /*binary*/;
|
2014-09-25 20:17:14 +02:00
|
|
|
var args = [];
|
|
|
|
for (var i = 0; i < WScript.Arguments.length; i++) {
|
|
|
|
args[i] = WScript.Arguments.Item(i);
|
|
|
|
}
|
|
|
|
return {
|
|
|
|
args: args,
|
|
|
|
createObject: function (typeName) { return new ActiveXObject(typeName); },
|
2014-10-16 22:27:48 +02:00
|
|
|
write: function (s) {
|
|
|
|
WScript.StdOut.Write(s);
|
|
|
|
},
|
|
|
|
writeFile: function (fileName, data) {
|
|
|
|
fileStream.Open();
|
|
|
|
binaryStream.Open();
|
|
|
|
try {
|
2015-07-27 01:26:58 +02:00
|
|
|
// Write characters in UTF-8 encoding
|
2014-10-16 22:27:48 +02:00
|
|
|
fileStream.Charset = "utf-8";
|
|
|
|
fileStream.WriteText(data);
|
2015-07-27 01:26:58 +02:00
|
|
|
// We don't want the BOM, skip it by setting the starting location to 3 (size of BOM).
|
2014-10-16 22:27:48 +02:00
|
|
|
fileStream.Position = 3;
|
|
|
|
fileStream.CopyTo(binaryStream);
|
2015-07-27 01:26:58 +02:00
|
|
|
binaryStream.SaveToFile(fileName, 2 /*overwrite*/);
|
2014-10-16 22:27:48 +02:00
|
|
|
}
|
|
|
|
finally {
|
|
|
|
binaryStream.Close();
|
|
|
|
fileStream.Close();
|
|
|
|
}
|
|
|
|
}
|
2014-09-25 20:17:14 +02:00
|
|
|
};
|
|
|
|
})();
|
|
|
|
function convertDocumentToMarkdown(doc) {
|
|
|
|
var result = "";
|
|
|
|
var lastStyle;
|
|
|
|
var lastInTable;
|
|
|
|
var tableColumnCount;
|
|
|
|
var tableCellIndex;
|
|
|
|
var columnAlignment = [];
|
2014-09-25 22:26:43 +02:00
|
|
|
function setProperties(target, properties) {
|
|
|
|
for (var name in properties) {
|
|
|
|
if (properties.hasOwnProperty(name)) {
|
|
|
|
var value = properties[name];
|
|
|
|
if (typeof value === "object") {
|
|
|
|
setProperties(target[name], value);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
target[name] = value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-09-25 20:17:14 +02:00
|
|
|
}
|
2014-09-26 15:56:31 +02:00
|
|
|
function findReplace(findText, findOptions, replaceText, replaceOptions) {
|
2014-09-25 20:17:14 +02:00
|
|
|
var find = doc.range().find;
|
|
|
|
find.clearFormatting();
|
2014-09-26 15:56:31 +02:00
|
|
|
setProperties(find, findOptions);
|
2014-09-25 20:17:14 +02:00
|
|
|
var replace = find.replacement;
|
|
|
|
replace.clearFormatting();
|
2014-09-26 15:56:31 +02:00
|
|
|
setProperties(replace, replaceOptions);
|
2014-09-25 22:26:43 +02:00
|
|
|
find.execute(findText, false, false, false, false, false, true, 0, true, replaceText, 2);
|
2014-09-25 20:17:14 +02:00
|
|
|
}
|
2015-07-27 01:26:58 +02:00
|
|
|
function fixHyperlinks() {
|
|
|
|
var count = doc.hyperlinks.count;
|
|
|
|
for (var i = 0; i < count; i++) {
|
|
|
|
var hyperlink = doc.hyperlinks.item(i + 1);
|
|
|
|
var address = hyperlink.address;
|
|
|
|
if (address && address.length > 0) {
|
|
|
|
var textToDisplay = hyperlink.textToDisplay;
|
|
|
|
hyperlink.textToDisplay = "[" + textToDisplay + "](" + address + ")";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-09-25 20:17:14 +02:00
|
|
|
function write(s) {
|
|
|
|
result += s;
|
|
|
|
}
|
|
|
|
function writeTableHeader() {
|
|
|
|
for (var i = 0; i < tableColumnCount - 1; i++) {
|
|
|
|
switch (columnAlignment[i]) {
|
|
|
|
case 1:
|
|
|
|
write("|:---:");
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
write("|---:");
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
write("|---");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
write("|\n");
|
|
|
|
}
|
2014-09-25 22:26:43 +02:00
|
|
|
function trimEndFormattingMarks(text) {
|
2014-09-25 20:17:14 +02:00
|
|
|
var i = text.length;
|
|
|
|
while (i > 0 && text.charCodeAt(i - 1) < 0x20)
|
|
|
|
i--;
|
|
|
|
return text.substr(0, i);
|
|
|
|
}
|
|
|
|
function writeBlockEnd() {
|
|
|
|
switch (lastStyle) {
|
|
|
|
case "Code":
|
|
|
|
write("```\n\n");
|
|
|
|
break;
|
|
|
|
case "List Paragraph":
|
|
|
|
case "Table":
|
|
|
|
case "TOC":
|
|
|
|
write("\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
function writeParagraph(p) {
|
|
|
|
var text = p.range.text;
|
|
|
|
var style = p.style.nameLocal;
|
|
|
|
var inTable = p.range.tables.count > 0;
|
|
|
|
var level = 1;
|
|
|
|
var sectionBreak = text.indexOf("\x0C") >= 0;
|
2014-09-25 22:26:43 +02:00
|
|
|
text = trimEndFormattingMarks(text);
|
2014-09-25 20:17:14 +02:00
|
|
|
if (inTable) {
|
|
|
|
style = "Table";
|
|
|
|
}
|
|
|
|
else if (style.match(/\s\d$/)) {
|
|
|
|
level = +style.substr(style.length - 1);
|
|
|
|
style = style.substr(0, style.length - 2);
|
|
|
|
}
|
|
|
|
if (lastStyle && style !== lastStyle) {
|
|
|
|
writeBlockEnd();
|
|
|
|
}
|
|
|
|
switch (style) {
|
|
|
|
case "Heading":
|
|
|
|
case "Appendix":
|
|
|
|
var section = p.range.listFormat.listString;
|
2014-09-25 22:26:43 +02:00
|
|
|
write("####".substr(0, level) + ' <a name="' + section + '"/>' + section + " " + text + "\n\n");
|
2014-09-25 20:17:14 +02:00
|
|
|
break;
|
|
|
|
case "Normal":
|
|
|
|
if (text.length) {
|
|
|
|
write(text + "\n\n");
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case "List Paragraph":
|
|
|
|
write(" ".substr(0, p.range.listFormat.listLevelNumber * 2 - 2) + "* " + text + "\n");
|
|
|
|
break;
|
|
|
|
case "Grammar":
|
|
|
|
write("  " + text.replace(/\s\s\s/g, " ").replace(/\x0B/g, " \n   ") + "\n\n");
|
|
|
|
break;
|
|
|
|
case "Code":
|
|
|
|
if (lastStyle !== "Code") {
|
|
|
|
write("```TypeScript\n");
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
write("\n");
|
|
|
|
}
|
|
|
|
write(text.replace(/\x0B/g, " \n") + "\n");
|
|
|
|
break;
|
|
|
|
case "Table":
|
|
|
|
if (!lastInTable) {
|
|
|
|
tableColumnCount = p.range.tables.item(1).columns.count + 1;
|
|
|
|
tableCellIndex = 0;
|
|
|
|
}
|
|
|
|
if (tableCellIndex < tableColumnCount) {
|
|
|
|
columnAlignment[tableCellIndex] = p.alignment;
|
|
|
|
}
|
|
|
|
write("|" + text);
|
|
|
|
tableCellIndex++;
|
|
|
|
if (tableCellIndex % tableColumnCount === 0) {
|
|
|
|
write("\n");
|
|
|
|
if (tableCellIndex === tableColumnCount) {
|
|
|
|
writeTableHeader();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case "TOC Heading":
|
|
|
|
write("## " + text + "\n\n");
|
|
|
|
break;
|
|
|
|
case "TOC":
|
|
|
|
var strings = text.split("\t");
|
|
|
|
write(" ".substr(0, level * 2 - 2) + "* [" + strings[0] + " " + strings[1] + "](#" + strings[0] + ")\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (sectionBreak) {
|
|
|
|
write("<br/>\n\n");
|
|
|
|
}
|
|
|
|
lastStyle = style;
|
|
|
|
lastInTable = inTable;
|
|
|
|
}
|
|
|
|
function writeDocument() {
|
2014-10-16 22:43:34 +02:00
|
|
|
var title = doc.builtInDocumentProperties.item(1) + "";
|
2014-10-16 22:27:48 +02:00
|
|
|
if (title.length) {
|
|
|
|
write("# " + title + "\n\n");
|
|
|
|
}
|
2014-09-25 22:26:43 +02:00
|
|
|
for (var p = doc.paragraphs.first; p; p = p.next()) {
|
2014-09-25 20:17:14 +02:00
|
|
|
writeParagraph(p);
|
|
|
|
}
|
|
|
|
writeBlockEnd();
|
|
|
|
}
|
2014-09-26 15:56:31 +02:00
|
|
|
findReplace("<", {}, "<", {});
|
|
|
|
findReplace("<", { style: "Code" }, "<", {});
|
|
|
|
findReplace("<", { style: "Code Fragment" }, "<", {});
|
|
|
|
findReplace("<", { style: "Terminal" }, "<", {});
|
2014-09-25 22:26:43 +02:00
|
|
|
findReplace("", { font: { subscript: true } }, "<sub>^&</sub>", { font: { subscript: false } });
|
2015-07-27 01:26:58 +02:00
|
|
|
findReplace("", { style: "Code Fragment" }, "`^&`", { style: -66 /* default font */ });
|
|
|
|
findReplace("", { style: "Production" }, "*^&*", { style: -66 /* default font */ });
|
|
|
|
findReplace("", { style: "Terminal" }, "`^&`", { style: -66 /* default font */ });
|
2014-09-25 22:26:43 +02:00
|
|
|
findReplace("", { font: { bold: true, italic: true } }, "***^&***", { font: { bold: false, italic: false } });
|
|
|
|
findReplace("", { font: { italic: true } }, "*^&*", { font: { italic: false } });
|
|
|
|
doc.fields.toggleShowCodes();
|
|
|
|
findReplace("^19 REF", {}, "[^&](#^&)", {});
|
|
|
|
doc.fields.toggleShowCodes();
|
2015-07-27 01:26:58 +02:00
|
|
|
fixHyperlinks();
|
2014-09-25 20:17:14 +02:00
|
|
|
writeDocument();
|
2014-10-16 22:27:48 +02:00
|
|
|
result = result.replace(/\x85/g, "\u2026");
|
|
|
|
result = result.replace(/\x96/g, "\u2013");
|
|
|
|
result = result.replace(/\x97/g, "\u2014");
|
2014-09-25 20:17:14 +02:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
function main(args) {
|
2014-10-16 22:27:48 +02:00
|
|
|
if (args.length !== 2) {
|
|
|
|
sys.write("Syntax: word2md <inputfile> <outputfile>\n");
|
2014-09-25 20:17:14 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
var app = sys.createObject("Word.Application");
|
|
|
|
var doc = app.documents.open(args[0]);
|
2014-10-16 22:27:48 +02:00
|
|
|
sys.writeFile(args[1], convertDocumentToMarkdown(doc));
|
2014-09-25 20:17:14 +02:00
|
|
|
doc.close(false);
|
|
|
|
app.quit();
|
|
|
|
}
|
|
|
|
main(sys.args);
|
2015-07-27 01:26:58 +02:00
|
|
|
//# sourceMappingURL=file:///c:/ts/scripts/word2md.js.map
|