pegmalibrary
diff --git a/‎README.md
Lines changed: 5 additions & 2 deletions b/‎README.md
Lines changed: 5 additions & 2 deletions
diff --git a/‎dist/htmldiff.js
Lines changed: 28 additions & 11 deletions b/‎dist/htmldiff.js
Lines changed: 28 additions & 11 deletions
diff --git a/‎dist/htmldiff.js.map
Lines changed: 1 addition & 1 deletion b/‎dist/htmldiff.js.map
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/htmldiff.ts
Lines changed: 26 additions & 6 deletions b/‎src/htmldiff.ts
Lines changed: 26 additions & 6 deletions
diff --git a/‎test/diff.spec.js
Lines changed: 12 additions & 0 deletions b/‎test/diff.spec.js
Lines changed: 12 additions & 0 deletions
@@ -53,7 +53,10 @@ of these three parameters it will be ignored:
   not be compared - the entire tag should be treated as one token. This is useful for tags 
   where it does not make sense to insert `<ins>` and `<del>` tags. If not used, the default 
   list will be used:
-  `iframe,object,math,svg,script,video,head,style`.
+  `iframe,object,math,svg,script,video,head,style`.  
+  The tags specified here will be used as 'begin with'. So if tag 'i' is added, <i>
+  tags will be treated as atomic, as well as <img>. If you wish to exclude <img> tag
+  from the <i> one, configure it as 'i(?!mg)'
 
 
 ### Example
@@ -75,7 +78,7 @@ Result:
 ## Development
 * `npm install` to install dependencies
 * `npm run lint` to ESLint the TypeScript
-* `npm run make` to compile the TypeScript
+* `npm run build` to transpile the TypeScript to JavaScript
 * `npm run test` to run the tests
 
 ## Credits
 
@@ -69,7 +69,7 @@ function isEndOfHTMLComment(word: string): boolean {
 }
 
 // Added head and style (for style tags inside the body)
-const atomicTagsRegExp = /^<(iframe|object|math|svg|script|video|head|style|a)/;
+const atomicTagsRegExp = /^<(iframe|object|math|svg|script|video|head|style|a)$/;
 
 /**
  * Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose
@@ -264,7 +264,7 @@ export function htmlToTokens(html: string): Token[] {
     const char = html[charIdx] as string;
     switch (mode){
       case 'tag': {
-        const atomicTag = isStartOfAtomicTag(currentWord);
+        const atomicTag = (' ' === char || '/' === char || '>' === char) ? isStartOfAtomicTag(currentWord) : false;
         const styleTag = isStartOfStyleTag(currentWord + char);
         const latestStyleTag = currentStyleTags.length && currentStyleTags[currentStyleTags.length - 1];
         const endOfStyleTag = isEndOfTag(char) && latestStyleTag && isEndOfStyleTag(currentWord, latestStyleTag);
@@ -313,7 +313,7 @@ export function htmlToTokens(html: string): Token[] {
         break;
       }
       case 'atomic_tag':
-        if (isEndOfTag(char) && isEndOfAtomicTag(currentWord, currentAtomicTag)){
+        if (isEndOfTag(char) && (isImage(currentWord + '>') || isEndOfAtomicTag(currentWord, currentAtomicTag))){
           currentWord += '>';
           words.push(createToken(currentWord, currentStyleTags, currentTableTags));
           currentWord = '';
@@ -445,6 +445,16 @@ function getKeyForToken(token: string){
   return token;
 }
 
+/**
+ * Checks if a given token is image
+ *
+ * @param {} token
+ * @returns
+ */
+function isImage(token: string) {
+  return /^<img.*src=['"]([^"']*)['"].*>$/.exec(token);
+}
+
 const tokenMapKey = (token: Token) => token.key + JSON.stringify(token.styles) + JSON.stringify(token.tableTags);
 
 /**
@@ -639,16 +649,16 @@ function getFullMatch(segment: Segment, beforeStart: number, afterStart: number,
     }
   }
 
-  // Extend the current match as far foward as it can go, without overflowing beforeTokens or
+  // Extend the current match as far forward as it can go, without overflowing beforeTokens or
   // afterTokens.
   let searching = true;
   let currentLength = 1;
   let beforeIndex = beforeStart + currentLength;
   let afterIndex = afterStart + currentLength;
 
   while (searching && beforeIndex < beforeTokens.length && afterIndex < afterTokens.length){
-    const beforeWord = beforeTokens[beforeIndex]?.key;
-    const afterWord = afterTokens[afterIndex]?.key;
+    const beforeWord = getTextToCompare(beforeIndex, beforeTokens);
+    const afterWord = getTextToCompare(afterIndex, afterTokens);
     const beforeStyle = JSON.stringify(beforeTokens[beforeIndex]?.styles);
     const afterStyle = JSON.stringify(afterTokens[afterIndex]?.styles);
     if (beforeWord === afterWord && beforeStyle === afterStyle){
@@ -675,6 +685,16 @@ function getFullMatch(segment: Segment, beforeStart: number, afterStart: number,
 
   return makeMatch(beforeStart, afterStart, currentLength, segment);
 }
+
+function getTextToCompare(index: number, tokens: any[]): string {
+  const token = tokens[index];
+  if (!token) {
+    throw Error(`Expected ${tokens} to have an element at position ${index}`);
+  }
+  const key = !!isStartOfAtomicTag(token.key) ? 'string' : 'key';
+  return token[key];
+}
+
 type Segment = {
   beforeTokens: Token[];
   afterTokens: Token[];
 
@@ -225,4 +225,16 @@ describe('Diff', function(){
     });
   });
 
+  describe('processing tags', function(){
+    it('should detect atomic tag correctly', function() {
+      res = diff(
+          'Some <abb class=" my-abb">Text</abb> within <embb class=" my-embb">custom tags</embb>',
+          'Some <abb class=" my-abb"> other Text</abb> within <embb class=" my-embb">the same tags</embb>'
+      );
+      expect(res).to.equal(
+          'Some <abb class=" my-abb"><ins data-operation-index="1"> other </ins>Text</abb> within <embb class=" my-embb"><del data-operation-index="3">custom</del><ins data-operation-index="3">the same</ins> tags</embb>'
+      );
+    });
+  });
+
 });