@@ -69,7 +69,7 @@ function isEndOfHTMLComment(word: string): boolean {
69
69
}
70
70
71
71
// Added head and style (for style tags inside the body)
72
- const atomicTagsRegExp = / ^ < ( i f r a m e | o b j e c t | m a t h | s v g | s c r i p t | v i d e o | h e a d | s t y l e | a ) / ;
72
+ const atomicTagsRegExp = / ^ < ( i f r a m e | o b j e c t | m a t h | s v g | s c r i p t | v i d e o | h e a d | s t y l e | a ) $ / ;
73
73
74
74
/**
75
75
* Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose
@@ -264,7 +264,7 @@ export function htmlToTokens(html: string): Token[] {
264
264
const char = html [ charIdx ] as string ;
265
265
switch ( mode ) {
266
266
case 'tag' : {
267
- const atomicTag = isStartOfAtomicTag ( currentWord ) ;
267
+ const atomicTag = ( ' ' === char || '/' === char || '>' === char ) ? isStartOfAtomicTag ( currentWord ) : false ;
268
268
const styleTag = isStartOfStyleTag ( currentWord + char ) ;
269
269
const latestStyleTag = currentStyleTags . length && currentStyleTags [ currentStyleTags . length - 1 ] ;
270
270
const endOfStyleTag = isEndOfTag ( char ) && latestStyleTag && isEndOfStyleTag ( currentWord , latestStyleTag ) ;
@@ -313,7 +313,7 @@ export function htmlToTokens(html: string): Token[] {
313
313
break ;
314
314
}
315
315
case 'atomic_tag' :
316
- if ( isEndOfTag ( char ) && isEndOfAtomicTag ( currentWord , currentAtomicTag ) ) {
316
+ if ( isEndOfTag ( char ) && ( isImage ( currentWord + '>' ) || isEndOfAtomicTag ( currentWord , currentAtomicTag ) ) ) {
317
317
currentWord += '>' ;
318
318
words . push ( createToken ( currentWord , currentStyleTags , currentTableTags ) ) ;
319
319
currentWord = '' ;
@@ -445,6 +445,16 @@ function getKeyForToken(token: string){
445
445
return token ;
446
446
}
447
447
448
+ /**
449
+ * Checks if a given token is image
450
+ *
451
+ * @param { } token
452
+ * @returns
453
+ */
454
+ function isImage ( token : string ) {
455
+ return / ^ < i m g .* s r c = [ ' " ] ( [ ^ " ' ] * ) [ ' " ] .* > $ / . exec ( token ) ;
456
+ }
457
+
448
458
const tokenMapKey = ( token : Token ) => token . key + JSON . stringify ( token . styles ) + JSON . stringify ( token . tableTags ) ;
449
459
450
460
/**
@@ -639,16 +649,16 @@ function getFullMatch(segment: Segment, beforeStart: number, afterStart: number,
639
649
}
640
650
}
641
651
642
- // Extend the current match as far foward as it can go, without overflowing beforeTokens or
652
+ // Extend the current match as far forward as it can go, without overflowing beforeTokens or
643
653
// afterTokens.
644
654
let searching = true ;
645
655
let currentLength = 1 ;
646
656
let beforeIndex = beforeStart + currentLength ;
647
657
let afterIndex = afterStart + currentLength ;
648
658
649
659
while ( searching && beforeIndex < beforeTokens . length && afterIndex < afterTokens . length ) {
650
- const beforeWord = beforeTokens [ beforeIndex ] ?. key ;
651
- const afterWord = afterTokens [ afterIndex ] ?. key ;
660
+ const beforeWord = getTextToCompare ( beforeIndex , beforeTokens ) ;
661
+ const afterWord = getTextToCompare ( afterIndex , afterTokens ) ;
652
662
const beforeStyle = JSON . stringify ( beforeTokens [ beforeIndex ] ?. styles ) ;
653
663
const afterStyle = JSON . stringify ( afterTokens [ afterIndex ] ?. styles ) ;
654
664
if ( beforeWord === afterWord && beforeStyle === afterStyle ) {
@@ -675,6 +685,16 @@ function getFullMatch(segment: Segment, beforeStart: number, afterStart: number,
675
685
676
686
return makeMatch ( beforeStart , afterStart , currentLength , segment ) ;
677
687
}
688
+
689
+ function getTextToCompare ( index : number , tokens : any [ ] ) : string {
690
+ const token = tokens [ index ] ;
691
+ if ( ! token ) {
692
+ throw Error ( `Expected ${ tokens } to have an element at position ${ index } ` ) ;
693
+ }
694
+ const key = ! ! isStartOfAtomicTag ( token . key ) ? 'string' : 'key' ;
695
+ return token [ key ] ;
696
+ }
697
+
678
698
type Segment = {
679
699
beforeTokens : Token [ ] ;
680
700
afterTokens : Token [ ] ;
0 commit comments