@@ -46,27 +46,50 @@ export function tokenizeHTML(htmlContent) {
4646 // const pattern = /<\?(?:php|=)[\s\S]*?\?>/gs;
4747 // const pattern =
4848 // /(?<before>(?:[^\s]|\s|^)\s*)(?<php><\?(?:php|=).*?(?:\?>|$))(?<after>(?:\s*)[^\s]|$)/gs;
49+ // const pattern =
50+ // /((?:[^\s]|\s|^)\s*)(<\?(?:php|=).*?(?:\?>|$))((?:\s*)[^\s]|$)/gms;
51+ // // const pattern = /([^\s]+)\s*(<\?(?:php|=).*?(?:\?>|$))\s*([^\s]*)/gms;
52+ // const pattern =
53+ // /([^\s]?\s*)?(<\?(?:php|=).*?(?:\?>|$))((?:\s*)[^\s]|$)/gms;
4954 const pattern =
50- / ( ( ?:[ ^ \s ] | \s | ^ ) \s * ) ( < \? (?: p h p | = ) .* ?(?: \? > | $ ) ) ( ( ?:\s * ) [ ^ \s ] | $ ) / gs ;
55+ / (?< = ( ( ?:[ ^ \s ] | \s | ^ ) \s * ) ) ( < \? (?: p h p | = ) .* ?\? > ) (? = ( ( ?:\s * ) [ ^ \s ] | $ ) ) / gms ;
5156
52- const tokenizedHTML = htmlContent . replace (
57+ let tokenizedHTML = htmlContent . replace (
5358 pattern ,
5459 ( string , before , phpCodeBlock , after , offset ) => {
5560 const start = [ ">" , "" ] . includes ( before . trim ( ) ) ? "<" : "_" ;
5661 const end = [ "<" , "" ] . includes ( after . trim ( ) ) ? " />" : "___" ;
5762
58- console . log ( { offset, string, before, phpCodeBlock, after, offset } ) ;
63+ // end-pad the token to the length of the span, up to 80 characters
64+ const codeLength = Math . min ( phpCodeBlock . length , 80 - end . length ) ;
65+ const token =
66+ `${ start } php_${ tokenCount ++ } __` . padEnd ( codeLength , "_" ) + end ;
67+ phpCodeBlocks [ token ] = phpCodeBlock ;
68+
69+ return token ;
70+ } ,
71+ ) ;
72+
73+ /**
74+ * special case followup for open-ended PHP tags at the end of the document
75+ * TODO: Merge this back up into a single pattern
76+ */
77+ tokenizedHTML = tokenizedHTML . replace (
78+ / (?< = ( (?: [ ^ \s ] | \s | ^ ) \s * ) ) ( < \? (?: p h p | = ) .* $ ) / gms,
79+
80+ ( string , before , phpCodeBlock , offset ) => {
81+ const start = [ ">" , "" ] . includes ( before . trim ( ) ) ? "<" : "_" ;
82+ const end = start === "<" ? " />" : "___" ;
5983
60- // end-pad the token to the lengh of the span, up to 80 characters
6184 const codeLength = Math . min ( phpCodeBlock . length , 80 - end . length ) ;
6285 const token =
6386 `${ start } php_${ tokenCount ++ } __` . padEnd ( codeLength , "_" ) + end ;
6487 phpCodeBlocks [ token ] = phpCodeBlock ;
65- return `${ before } ${ token } ${ after } ` ;
88+
89+ return token ;
6690 } ,
6791 ) ;
6892
69- console . log ( { tokenizedHTML, phpCodeBlocks } ) ;
7093 return { tokenizedHTML, phpCodeBlocks } ;
7194}
7295
0 commit comments