@@ -25,15 +25,22 @@ const convertHTML = HTMLToVDOM({
25
25
VText,
26
26
} ) ;
27
27
28
+ // Helper function to add lineRule attribute for image consistency
29
+ const addLineRuleToImageFragment = ( imageFragment ) => {
30
+ imageFragment
31
+ . first ( )
32
+ . first ( )
33
+ . att ( 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' , 'lineRule' , 'auto' ) ;
34
+ } ;
35
+
28
36
// Image cache to prevent duplicate downloads during the same document generation
29
37
const imageCache = new Map ( ) ;
30
38
31
-
32
39
// Track retry statistics
33
40
let retryStats = {
34
41
totalAttempts : 0 ,
35
42
successAfterRetry : 0 ,
36
- finalFailures : 0
43
+ finalFailures : 0 ,
37
44
} ;
38
45
39
46
// Function to clear the image cache (useful for testing or memory management)
@@ -44,7 +51,7 @@ export const clearImageCache = () => {
44
51
retryStats = {
45
52
totalAttempts : 0 ,
46
53
successAfterRetry : 0 ,
47
- finalFailures : 0
54
+ finalFailures : 0 ,
48
55
} ;
49
56
return cacheSize ;
50
57
} ;
@@ -53,9 +60,9 @@ export const clearImageCache = () => {
53
60
export const getImageCacheStats = ( ) => ( {
54
61
size : imageCache . size ,
55
62
urls : Array . from ( imageCache . keys ( ) ) ,
56
- successCount : Array . from ( imageCache . values ( ) ) . filter ( v => v !== null ) . length ,
57
- failureCount : Array . from ( imageCache . values ( ) ) . filter ( v => v === null ) . length ,
58
- retryStats
63
+ successCount : Array . from ( imageCache . values ( ) ) . filter ( ( v ) => v !== null ) . length ,
64
+ failureCount : Array . from ( imageCache . values ( ) ) . filter ( ( v ) => v === null ) . length ,
65
+ retryStats,
59
66
} ) ;
60
67
61
68
// Helper function for conditional verbose logging
@@ -67,22 +74,36 @@ const logVerbose = (verboseLogging, message, ...args) => {
67
74
} ;
68
75
69
76
// eslint-disable-next-line consistent-return, no-shadow
70
- export const buildImage = async ( docxDocumentInstance , vNode , maximumWidth = null , options = { } ) => {
77
+ export const buildImage = async (
78
+ docxDocumentInstance ,
79
+ vNode ,
80
+ maximumWidth = null ,
81
+ options = { }
82
+ ) => {
71
83
// Extract image processing options with defaults from constants.js
72
- const maxRetries = options . maxRetries || docxDocumentInstance . imageProcessing ?. maxRetries || defaultDocumentOptions . imageProcessing . maxRetries ;
73
- const verboseLogging = options . verboseLogging || docxDocumentInstance . imageProcessing ?. verboseLogging || defaultDocumentOptions . imageProcessing . verboseLogging ;
84
+ const maxRetries =
85
+ options . maxRetries ||
86
+ docxDocumentInstance . imageProcessing ?. maxRetries ||
87
+ defaultDocumentOptions . imageProcessing . maxRetries ;
88
+ const verboseLogging =
89
+ options . verboseLogging ||
90
+ docxDocumentInstance . imageProcessing ?. verboseLogging ||
91
+ defaultDocumentOptions . imageProcessing . verboseLogging ;
74
92
let response = null ;
75
93
let base64Uri = null ;
76
-
94
+
77
95
try {
78
96
const imageSource = vNode . properties . src ;
79
-
97
+
80
98
// Check cache first for external URLs
81
99
if ( isValidUrl ( imageSource ) && imageCache . has ( imageSource ) ) {
82
100
const cachedData = imageCache . get ( imageSource ) ;
83
101
if ( cachedData === null ) {
84
102
// Previously failed to download in this document generation, skip this image
85
- logVerbose ( verboseLogging , `[CACHE] Skipping previously failed image in this document: ${ imageSource } ` ) ;
103
+ logVerbose (
104
+ verboseLogging ,
105
+ `[CACHE] Skipping previously failed image in this document: ${ imageSource } `
106
+ ) ;
86
107
return null ;
87
108
}
88
109
logVerbose ( verboseLogging , `[CACHE] Using cached image data for: ${ imageSource } ` ) ;
@@ -93,35 +114,43 @@ export const buildImage = async (docxDocumentInstance, vNode, maximumWidth = nul
93
114
// Download and cache the image with retry mechanism
94
115
let base64String = null ;
95
116
let lastError = null ;
96
-
117
+
97
118
for ( let attempt = 1 ; attempt <= maxRetries ; attempt += 1 ) {
98
119
retryStats . totalAttempts += 1 ;
99
-
120
+
100
121
try {
101
- logVerbose ( verboseLogging , `[RETRY] Attempt ${ attempt } /${ maxRetries } for: ${ imageSource } ` ) ;
102
-
122
+ logVerbose (
123
+ verboseLogging ,
124
+ `[RETRY] Attempt ${ attempt } /${ maxRetries } for: ${ imageSource } `
125
+ ) ;
126
+
103
127
base64String = await imageToBase64 ( imageSource ) ;
104
128
if ( base64String ) {
105
129
if ( attempt > 1 ) {
106
130
retryStats . successAfterRetry += 1 ;
107
- logVerbose ( verboseLogging , `[RETRY] Success on attempt ${ attempt } for: ${ imageSource } ` ) ;
131
+ logVerbose (
132
+ verboseLogging ,
133
+ `[RETRY] Success on attempt ${ attempt } for: ${ imageSource } `
134
+ ) ;
108
135
}
109
136
break ;
110
137
}
111
138
} catch ( error ) {
112
139
lastError = error ;
113
140
// eslint-disable-next-line no-console
114
- console . warn ( `[RETRY] Attempt ${ attempt } /${ maxRetries } failed for ${ imageSource } : ${ error . message } ` ) ;
115
-
141
+ console . warn (
142
+ `[RETRY] Attempt ${ attempt } /${ maxRetries } failed for ${ imageSource } : ${ error . message } `
143
+ ) ;
144
+
116
145
// Add delay before retry (exponential backoff: 500ms, 1000ms, etc.)
117
146
if ( attempt < maxRetries ) {
118
147
const delay = 500 * attempt ;
119
148
logVerbose ( verboseLogging , `[RETRY] Waiting ${ delay } ms before retry...` ) ;
120
- await new Promise ( resolve => setTimeout ( resolve , delay ) ) ;
149
+ await new Promise ( ( resolve ) => setTimeout ( resolve , delay ) ) ;
121
150
}
122
151
}
123
152
}
124
-
153
+
125
154
if ( ! base64String ) {
126
155
retryStats . finalFailures += 1 ;
127
156
}
@@ -139,7 +168,11 @@ export const buildImage = async (docxDocumentInstance, vNode, maximumWidth = nul
139
168
// Note: Cache is cleared between document generations, so failures can be retried in future runs
140
169
imageCache . set ( imageSource , null ) ;
141
170
// eslint-disable-next-line no-console
142
- console . error ( `[ERROR] buildImage: Failed to convert URL to base64 after ${ maxRetries } attempts: ${ lastError ?. message || 'Unknown error' } - will skip duplicates in this document` ) ;
171
+ console . error (
172
+ `[ERROR] buildImage: Failed to convert URL to base64 after ${ maxRetries } attempts: ${
173
+ lastError ?. message || 'Unknown error'
174
+ } - will skip duplicates in this document`
175
+ ) ;
143
176
}
144
177
} else {
145
178
base64Uri = decodeURIComponent ( vNode . properties . src ) ;
@@ -163,14 +196,19 @@ export const buildImage = async (docxDocumentInstance, vNode, maximumWidth = nul
163
196
// Validate response has required properties
164
197
if ( ! response . fileContent || ! response . fileNameWithExtension ) {
165
198
// eslint-disable-next-line no-console
166
- console . error ( `[ERROR] buildImage: Invalid response object for ${ vNode . properties . src } :` , response ) ;
199
+ console . error (
200
+ `[ERROR] buildImage: Invalid response object for ${ vNode . properties . src } :` ,
201
+ response
202
+ ) ;
167
203
return null ;
168
204
}
169
205
206
+ const imageBuffer = Buffer . from ( response . fileContent , 'base64' ) ;
207
+
170
208
docxDocumentInstance . zip
171
209
. folder ( 'word' )
172
210
. folder ( 'media' )
173
- . file ( response . fileNameWithExtension , Buffer . from ( response . fileContent , 'base64' ) , {
211
+ . file ( response . fileNameWithExtension , imageBuffer , {
174
212
createFolders : false ,
175
213
} ) ;
176
214
@@ -181,39 +219,43 @@ export const buildImage = async (docxDocumentInstance, vNode, maximumWidth = nul
181
219
internalRelationship
182
220
) ;
183
221
184
- const imageBuffer = Buffer . from ( response . fileContent , 'base64' ) ;
185
-
186
222
// Add validation before calling sizeOf
187
223
if ( ! imageBuffer || imageBuffer . length === 0 ) {
188
224
// eslint-disable-next-line no-console
189
225
console . error ( `[ERROR] buildImage: Empty image buffer for ${ vNode . properties . src } ` ) ;
190
226
return null ;
191
227
}
192
-
228
+
193
229
// Check if we got HTML instead of image data (common with Wikimedia errors)
194
230
const firstBytes = imageBuffer . slice ( 0 , 20 ) . toString ( 'utf8' ) ;
195
231
if ( firstBytes . startsWith ( '<!DOCTYPE' ) || firstBytes . startsWith ( '<html' ) ) {
196
232
// eslint-disable-next-line no-console
197
- console . error ( `[ERROR] buildImage: Received HTML instead of image data for ${ vNode . properties . src } ` ) ;
233
+ console . error (
234
+ `[ERROR] buildImage: Received HTML instead of image data for ${ vNode . properties . src } `
235
+ ) ;
198
236
return null ;
199
237
}
200
238
201
239
let imageProperties ;
202
- try {
240
+ try {
203
241
imageProperties = sizeOf ( imageBuffer ) ;
204
242
if ( ! imageProperties || ! imageProperties . width || ! imageProperties . height ) {
205
243
// eslint-disable-next-line no-console
206
- console . error ( `[ERROR] buildImage: Invalid image properties for ${ vNode . properties . src } :` , imageProperties ) ;
244
+ console . error (
245
+ `[ERROR] buildImage: Invalid image properties for ${ vNode . properties . src } :` ,
246
+ imageProperties
247
+ ) ;
207
248
return null ;
208
249
}
209
250
} catch ( sizeError ) {
210
251
// eslint-disable-next-line no-console
211
- console . error ( `[ERROR] buildImage: sizeOf failed for ${ vNode . properties . src } :` , sizeError . message ) ;
252
+ console . error (
253
+ `[ERROR] buildImage: sizeOf failed for ${ vNode . properties . src } :` ,
254
+ sizeError . message
255
+ ) ;
212
256
return null ;
213
257
}
214
258
215
-
216
-
217
259
const imageFragment = await xmlBuilder . buildParagraph (
218
260
vNode ,
219
261
{
@@ -434,19 +476,17 @@ async function findXMLEquivalent(docxDocumentInstance, vNode, xmlFragment, image
434
476
xmlFragment . import ( emptyParagraphFragment ) ;
435
477
}
436
478
} else if ( childVNode . tagName === 'img' ) {
437
- const imageFragment = await buildImage ( docxDocumentInstance , childVNode , null , imageOptions ) ;
479
+ const imageFragment = await buildImage (
480
+ docxDocumentInstance ,
481
+ childVNode ,
482
+ null ,
483
+ imageOptions
484
+ ) ;
438
485
if ( imageFragment ) {
439
486
// Add lineRule attribute for consistency
440
487
// Direct image processing includes this attribute, but HTML image processing was missing it
441
488
// This ensures both processing paths generate identical XML structure
442
- imageFragment
443
- . first ( )
444
- . first ( )
445
- . att (
446
- 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' ,
447
- 'lineRule' ,
448
- 'auto'
449
- ) ;
489
+ addLineRuleToImageFragment ( imageFragment ) ;
450
490
xmlFragment . import ( imageFragment ) ;
451
491
} else {
452
492
// eslint-disable-next-line no-console
@@ -484,10 +524,7 @@ async function findXMLEquivalent(docxDocumentInstance, vNode, xmlFragment, image
484
524
// Add lineRule attribute for consistency
485
525
// Direct image processing includes this attribute, but HTML image processing was missing it
486
526
// This ensures both processing paths generate identical XML structure
487
- imageFragment
488
- . first ( )
489
- . first ( )
490
- . att ( 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' , 'lineRule' , 'auto' ) ;
527
+ addLineRuleToImageFragment ( imageFragment ) ;
491
528
xmlFragment . import ( imageFragment ) ;
492
529
} else {
493
530
// eslint-disable-next-line no-console
@@ -512,7 +549,12 @@ async function findXMLEquivalent(docxDocumentInstance, vNode, xmlFragment, image
512
549
}
513
550
514
551
// eslint-disable-next-line consistent-return
515
- export async function convertVTreeToXML ( docxDocumentInstance , vTree , xmlFragment , imageOptions = null ) {
552
+ export async function convertVTreeToXML (
553
+ docxDocumentInstance ,
554
+ vTree ,
555
+ xmlFragment ,
556
+ imageOptions = null
557
+ ) {
516
558
// Use default options if not provided
517
559
if ( ! imageOptions ) {
518
560
imageOptions = docxDocumentInstance . imageProcessing || defaultDocumentOptions . imageProcessing ;
@@ -538,34 +580,46 @@ export async function convertVTreeToXML(docxDocumentInstance, vTree, xmlFragment
538
580
539
581
async function renderDocumentFile ( docxDocumentInstance ) {
540
582
// Get image processing options from document instance with centralized defaults
541
- const imageOptions = docxDocumentInstance . imageProcessing || defaultDocumentOptions . imageProcessing ;
583
+ const imageOptions =
584
+ docxDocumentInstance . imageProcessing || defaultDocumentOptions . imageProcessing ;
542
585
// Clear image cache at the start of each document generation to allow retrying failed URLs in new documents
543
586
const previousCacheSize = clearImageCache ( ) ;
544
587
if ( previousCacheSize > 0 && imageOptions . verboseLogging ) {
545
588
// eslint-disable-next-line no-console
546
- console . log ( `[CACHE] Cleared previous cache (${ previousCacheSize } images) for new document generation` ) ;
589
+ console . log (
590
+ `[CACHE] Cleared previous cache (${ previousCacheSize } images) for new document generation`
591
+ ) ;
547
592
}
548
-
593
+
549
594
const vTree = convertHTML ( docxDocumentInstance . htmlString ) ;
550
595
551
596
const xmlFragment = fragment ( { namespaceAlias : { w : namespaces . w } } ) ;
552
597
553
- const populatedXmlFragment = await convertVTreeToXML ( docxDocumentInstance , vTree , xmlFragment , imageOptions ) ;
598
+ const populatedXmlFragment = await convertVTreeToXML (
599
+ docxDocumentInstance ,
600
+ vTree ,
601
+ xmlFragment ,
602
+ imageOptions
603
+ ) ;
554
604
555
605
// Log cache statistics at the end of document generation
556
606
const cacheStats = getImageCacheStats ( ) ;
557
- if ( ( cacheStats . size > 0 || cacheStats . retryStats . totalAttempts > 0 ) && imageOptions . verboseLogging ) {
607
+ if (
608
+ ( cacheStats . size > 0 || cacheStats . retryStats . totalAttempts > 0 ) &&
609
+ imageOptions . verboseLogging
610
+ ) {
558
611
// eslint-disable-next-line no-console
559
612
console . log ( `[CACHE] Image processing statistics:` , {
560
613
totalImages : cacheStats . size ,
561
614
successful : cacheStats . successCount ,
562
615
failed : cacheStats . failureCount ,
563
- cacheHitRatio : cacheStats . size > 1 ? 'Cache prevented duplicate downloads' : 'No duplicates found' ,
616
+ cacheHitRatio :
617
+ cacheStats . size > 1 ? 'Cache prevented duplicate downloads' : 'No duplicates found' ,
564
618
retries : {
565
619
totalAttempts : cacheStats . retryStats . totalAttempts ,
566
620
successAfterRetry : cacheStats . retryStats . successAfterRetry ,
567
- finalFailures : cacheStats . retryStats . finalFailures
568
- }
621
+ finalFailures : cacheStats . retryStats . finalFailures ,
622
+ } ,
569
623
} ) ;
570
624
}
571
625
0 commit comments