"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const boundaries_1 = require("./boundaries"); // BreakTypes // @type {BreakType} const NotBreak = 0; const BreakStart = 1; const Break = 2; const BreakLastRegional = 3; const BreakPenultimateRegional = 4; class GraphemerHelper { /** * Check if the the character at the position {pos} of the string is surrogate * @param str {string} * @param pos {number} * @returns {boolean} */ static isSurrogate(str, pos) { return (0xd800 <= str.charCodeAt(pos) && str.charCodeAt(pos) <= 0xdbff && 0xdc00 <= str.charCodeAt(pos + 1) && str.charCodeAt(pos + 1) <= 0xdfff); } /** * The String.prototype.codePointAt polyfill * Private function, gets a Unicode code point from a JavaScript UTF-16 string * handling surrogate pairs appropriately * @param str {string} * @param idx {number} * @returns {number} */ static codePointAt(str, idx) { if (idx === undefined) { idx = 0; } const code = str.charCodeAt(idx); // if a high surrogate if (0xd800 <= code && code <= 0xdbff && idx < str.length - 1) { const hi = code; const low = str.charCodeAt(idx + 1); if (0xdc00 <= low && low <= 0xdfff) { return (hi - 0xd800) * 0x400 + (low - 0xdc00) + 0x10000; } return hi; } // if a low surrogate if (0xdc00 <= code && code <= 0xdfff && idx >= 1) { const hi = str.charCodeAt(idx - 1); const low = code; if (0xd800 <= hi && hi <= 0xdbff) { return (hi - 0xd800) * 0x400 + (low - 0xdc00) + 0x10000; } return low; } // just return the char if an unmatched surrogate half or a // single-char codepoint return code; } // /** * Private function, returns whether a break is allowed between the two given grapheme breaking classes * Implemented the UAX #29 3.1.1 Grapheme Cluster Boundary Rules on extended grapheme clusters * @param start {number} * @param mid {Array} * @param end {number} * @param startEmoji {number} * @param midEmoji {Array} * @param endEmoji {number} * @returns {number} */ static shouldBreak(start, mid, end, startEmoji, midEmoji, endEmoji) { const all = [start].concat(mid).concat([end]); const allEmoji = [startEmoji].concat(midEmoji).concat([endEmoji]); const previous = all[all.length - 2]; const next = end; const nextEmoji = endEmoji; // Lookahead terminator for: // GB12. ^ (RI RI)* RI ? RI // GB13. [^RI] (RI RI)* RI ? RI const rIIndex = all.lastIndexOf(boundaries_1.CLUSTER_BREAK.REGIONAL_INDICATOR); if (rIIndex > 0 && all.slice(1, rIIndex).every(function (c) { return c === boundaries_1.CLUSTER_BREAK.REGIONAL_INDICATOR; }) && [boundaries_1.CLUSTER_BREAK.PREPEND, boundaries_1.CLUSTER_BREAK.REGIONAL_INDICATOR].indexOf(previous) === -1) { if (all.filter(function (c) { return c === boundaries_1.CLUSTER_BREAK.REGIONAL_INDICATOR; }).length % 2 === 1) { return BreakLastRegional; } else { return BreakPenultimateRegional; } } // GB3. CR × LF if (previous === boundaries_1.CLUSTER_BREAK.CR && next === boundaries_1.CLUSTER_BREAK.LF) { return NotBreak; } // GB4. (Control|CR|LF) ÷ else if (previous === boundaries_1.CLUSTER_BREAK.CONTROL || previous === boundaries_1.CLUSTER_BREAK.CR || previous === boundaries_1.CLUSTER_BREAK.LF) { return BreakStart; } // GB5. ÷ (Control|CR|LF) else if (next === boundaries_1.CLUSTER_BREAK.CONTROL || next === boundaries_1.CLUSTER_BREAK.CR || next === boundaries_1.CLUSTER_BREAK.LF) { return BreakStart; } // GB6. L × (L|V|LV|LVT) else if (previous === boundaries_1.CLUSTER_BREAK.L && (next === boundaries_1.CLUSTER_BREAK.L || next === boundaries_1.CLUSTER_BREAK.V || next === boundaries_1.CLUSTER_BREAK.LV || next === boundaries_1.CLUSTER_BREAK.LVT)) { return NotBreak; } // GB7. (LV|V) × (V|T) else if ((previous === boundaries_1.CLUSTER_BREAK.LV || previous === boundaries_1.CLUSTER_BREAK.V) && (next === boundaries_1.CLUSTER_BREAK.V || next === boundaries_1.CLUSTER_BREAK.T)) { return NotBreak; } // GB8. (LVT|T) × (T) else if ((previous === boundaries_1.CLUSTER_BREAK.LVT || previous === boundaries_1.CLUSTER_BREAK.T) && next === boundaries_1.CLUSTER_BREAK.T) { return NotBreak; } // GB9. × (Extend|ZWJ) else if (next === boundaries_1.CLUSTER_BREAK.EXTEND || next === boundaries_1.CLUSTER_BREAK.ZWJ) { return NotBreak; } // GB9a. × SpacingMark else if (next === boundaries_1.CLUSTER_BREAK.SPACINGMARK) { return NotBreak; } // GB9b. Prepend × else if (previous === boundaries_1.CLUSTER_BREAK.PREPEND) { return NotBreak; } // GB11. \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic} const previousNonExtendIndex = allEmoji .slice(0, -1) .lastIndexOf(boundaries_1.EXTENDED_PICTOGRAPHIC); if (previousNonExtendIndex !== -1 && allEmoji[previousNonExtendIndex] === boundaries_1.EXTENDED_PICTOGRAPHIC && all.slice(previousNonExtendIndex + 1, -2).every(function (c) { return c === boundaries_1.CLUSTER_BREAK.EXTEND; }) && previous === boundaries_1.CLUSTER_BREAK.ZWJ && nextEmoji === boundaries_1.EXTENDED_PICTOGRAPHIC) { return NotBreak; } // GB12. ^ (RI RI)* RI × RI // GB13. [^RI] (RI RI)* RI × RI if (mid.indexOf(boundaries_1.CLUSTER_BREAK.REGIONAL_INDICATOR) !== -1) { return Break; } if (previous === boundaries_1.CLUSTER_BREAK.REGIONAL_INDICATOR && next === boundaries_1.CLUSTER_BREAK.REGIONAL_INDICATOR) { return NotBreak; } // GB999. Any ? Any return BreakStart; } } exports.default = GraphemerHelper;