Attribute VB_Name = "exportAndFormatWord" Private Sub exportWord(fileFolder As String, fileName As String) Const my_h2 = "Table of Contents" 'log file Const ForAppending = 8 logFileLocation = fileFolder & "\log.txt" Set objFSO = CreateObject("Scripting.FileSystemObject") If objFSO.FileExists(logFileLocation) Then Set out = objFSO.GetFile(logFileLocation) Set logFile = out.OpenAsTextStream(ForAppending, TristateUseDefault) Else Set logFile = objFSO.CreateTextFile(logFileLocation) End If 'Open the temporary html file generated from Sigasi. logFile.WriteLine ("Converting to .docx format.") ChangeFileOpenDirectory _ fileFolder Documents.Open fileName:=fileName, ConfirmConversions:=False, _ ReadOnly:=False, AddToRecentFiles:=False, PasswordDocument:="", _ PasswordTemplate:="", Revert:=False, WritePasswordDocument:="", _ WritePasswordTemplate:="", Format:=wdOpenFormatAuto, XMLTransform:="" Set current_doc = Documents(fileName) ' Clean up the document current_doc.Activate ' This section becomes a H2 section ChangeSpecificH3toH2 "Project files overview" ' We'll re-generate the ToC, so we get rid of the original one DeleteBetweenH2AndNextH2 "Table of Contents" ' In Word, the Design Units section looks like a duplicate of the ToC so we take it out DeleteBetweenH2AndNextH2 "Design Units" ' In Word, HTML links are broken so we remove them UnlinkAllHyperlinks_ToPlainText 'Save the .html file as .docx current_doc.SaveAs2 fileName:="documentation.docx", FileFormat:= _ wdFormatXMLDocument, LockComments:=False, Password:="", AddToRecentFiles _ :=True, WritePassword:="", ReadOnlyRecommended:=False, EmbedTrueTypeFonts _ :=False, SaveNativePictureFormat:=False, SaveFormsData:=False, _ SaveAsAOCELetter:=False, CompatibilityMode:=15 logFile.WriteLine ("Creating the cover page.") 'Go to the top of the document and set the title. Selection.HomeKey Unit:=wdStory Selection.Style = current_doc.Styles("Title") 'Set the subtitle. Selection.MoveDown Unit:=wdParagraph, Count:=1 Selection.Style = current_doc.Styles("Subtitle") 'Break away the cover page. Selection.MoveDown Unit:=wdParagraph, Count:=1 Selection.InsertBreak Type:=wdPageBreak logFile.WriteLine ("Adding styling to the document.") 'Apply a quickstyle to the complete document from MS Word 2016. current_doc.ApplyQuickStyleSet2 ("Shaded") logFile.WriteLine ("Writing table of content.") 'Insert Table Of Contents Selection.InsertBreak Type:=wdPageBreak Selection.HomeKey Unit:=wdStory Selection.GoTo What:=wdGoToPage, Which:=2 With current_doc .TablesOfContents.Add Range:=Selection.Range, RightAlignPageNumbers:= _ True, UseHeadingStyles:=True, UpperHeadingLevel:=1, _ LowerHeadingLevel:=3, IncludePageNumbers:=True, AddedStyles:="", _ UseHyperlinks:=True, HidePageNumbersInWeb:=True, UseOutlineLevels:= _ True .TablesOfContents(1).TabLeader = wdTabLeaderDots .TablesOfContents.Format = wdIndexIndent End With 'Create a new word document for size manipulation of large images. Set new_doc = Documents.Add(DocumentType:=wdNewBlankDocument) page_width = current_doc.PageSetup.TextColumns.Width page_height = current_doc.PageSetup.PageHeight - current_doc.PageSetup.TopMargin - current_doc.PageSetup.BottomMargin 'Size manipulation of large images. current_doc.Activate Dim total As Integer Dim curr As Integer total = 0 curr = 0 For Each ishape In current_doc.InlineShapes total = total + 1 Next For Each ishape In current_doc.InlineShapes curr = curr + 1 logFile.WriteLine ("Resizing Image " & curr & " of " & total) ishape.Select ' Selection.Copy ' new_doc.Content.PasteAndFormat (wdPasteDefault) Set new_ishape = new_doc.InlineShapes(1) new_ishape.LockAspectRatio = msoFalse new_ishape.ScaleWidth = 100 new_ishape.ScaleHeight = 100 ishape.LockAspectRatio = msoFalse If (new_ishape.Width > page_width) Then If ((page_width / new_ishape.Width) * new_ishape.Height > page_height) Then ishape.Width = page_height / new_ishape.Height * page_width ishape.Height = page_height Else ishape.Width = page_width ishape.Height = (page_width / new_ishape.Width) * new_ishape.Height End If ElseIf (new_ishape.Height > page_height) Then ' going to be shrinking both height and width, and width is okay already, so it'll be even okayer ishape.Width = page_height / new_ishape.Height * new_ishape.Width ishape.Height = page_height Else ishape.Width = new_ishape.Width ishape.Height = new_ishape.Height End If new_ishape.Delete ishape.LockAspectRatio = msoTrue Next 'Align all images to center. logFile.WriteLine ("Aligning Images") For Each oILShp In current_doc.InlineShapes oILShp.Select Selection.ParagraphFormat.Alignment = wdAlignParagraphCenter Next 'Adding borders and alignment to tables. logFile.WriteLine ("Styling tables") Dim tbl As Table For Each tbl In current_doc.Tables tbl.Style = "Table Grid" tbl.Rows.Alignment = wdAlignRowCenter Next tbl logFile.WriteLine ("Saving the document") 'Save documentation.docx. current_doc.Save 'False positive to discard the new document created without prompt. 'new_doc.Saved = True new_doc.Close SaveChanges:=wdDoNotSaveChanges current_doc.Activate ActiveWindow.ScrollIntoView ActiveDocument.Range(0, 0), True End Sub ' Main procedure Public Sub exportAndFormatWord() Dim htmlPath As String, outDocx As String htmlPath = GetFilePath("Select the HTML file to import", "HTML Files (*.htm;*.html)|*.htm;*.html") If Len(htmlPath) = 0 Then Exit Sub ' Find the last backslash position Dim pos As Long pos = InStrRev(htmlPath, "\") Dim fileFolder As String, fileName As String If pos > 0 Then fileFolder = Left$(htmlPath, pos - 1) fileName = Mid$(htmlPath, pos + 1) Else fileFolder = "" fileName = htmlPath End If exportWord fileFolder, fileName End Sub '===== Helpers ===== Private Function GetFilePath(prompt As String, filter As String) As String With Application.FileDialog(msoFileDialogFilePicker) .title = prompt .Filters.Clear .Filters.Add "HTML", "*.htm;*.html" .AllowMultiSelect = False If .Show = -1 Then GetFilePath = .SelectedItems(1) End With End Function Private Sub ChangeSpecificH3toH2(title As String) Dim doc As Document Dim rng As Range Set doc = ActiveDocument Set rng = doc.Content With rng.Find .ClearFormatting .Style = doc.Styles(wdStyleHeading3) .Text = title .Forward = True .Wrap = wdFindStop If .Execute Then rng.Paragraphs(1).Style = wdStyleHeading2 Else MsgBox "Heading 3 not found" End If End With End Sub Private Sub UnlinkAllHyperlinks_ToPlainText() Dim fld As Field Dim s As Range Application.ScreenUpdating = False '1) Unlink HYPERLINK fields (removes clickability, keeps display text) For Each s In ActiveDocument.StoryRanges Dim r As Range: Set r = s Do For Each fld In r.Fields If fld.Type = wdFieldHyperlink Then fld.Unlink Next fld Set r = r.NextStoryRange Loop Until r Is Nothing Next s '2) Clear the "Hyperlink" character style formatting wherever it remains Dim findR As Range Set findR = ActiveDocument.Range(0, ActiveDocument.Content.End) With findR.Find .ClearFormatting .Text = "" .Forward = True .Wrap = wdFindStop .Format = True .Style = ActiveDocument.Styles(wdStyleHyperlink) Do While .Execute 'Normalize appearance to surrounding text findR.Style = wdStyleDefaultParagraphFont findR.Font.Underline = wdUnderlineNone findR.Font.Color = wdColorAutomatic findR.Collapse wdCollapseEnd Loop End With Application.ScreenUpdating = True End Sub Private Sub DeleteBetweenH2AndNextH2(TARGET_H2_TITLE As String) ' Const TARGET_H2_TITLE As String = "Table of Contents" Const IncludeH2 As Boolean = True 'True = also delete the H2 paragraph itself Const IncludeH3 As Boolean = False 'True = also delete the next H3 paragraph Dim doc As Document Dim rngH2 As Range, rngH3 As Range Dim startCut As Long, endCut As Long, startCutEnd As Long Set doc = ActiveDocument '1) Find the target H2 Set rngH2 = doc.Content With rngH2.Find .ClearFormatting .Style = doc.Styles(wdStyleHeading2) .Text = TARGET_H2_TITLE 'leave as "" to grab the next H2 regardless of text .MatchWildcards = InStr(TARGET_H2_TITLE, "*") > 0 Or InStr(TARGET_H2_TITLE, "?") > 0 .Forward = True .Wrap = wdFindStop If Not .Execute Then MsgBox "Heading 2 not found: " & TARGET_H2_TITLE, vbExclamation Exit Sub End If End With '2) Compute start of deletion (end of H2 para, unless we include it) If IncludeH2 Then startCut = rngH2.Paragraphs(1).Range.Start Else startCut = rngH2.Paragraphs(1).Range.End End If startCutEnd = rngH2.Paragraphs(1).Range.End '3) From there, find the next H2 Set rngH3 = doc.Range(Start:=startCutEnd, End:=doc.Content.End) With rngH3.Find .ClearFormatting .Style = doc.Styles(wdStyleHeading2) .Text = "" .Forward = True .Wrap = wdFindStop If .Execute Then If IncludeH3 Then endCut = rngH3.Paragraphs(1).Range.End Else endCut = rngH3.Paragraphs(1).Range.Start End If Else 'No H3 after the H2: delete to end of document endCut = doc.Content.End End If End With '4) Delete the range If endCut > startCut Then doc.Range(Start:=startCut, End:=endCut).Delete Else MsgBox "Nothing to delete between the selected headings.", vbInformation End If End Sub