|
|
// @orchidjs/unicode-variants@1.1.2 downloaded from https://ga.jspm.io/npm:@orchidjs/unicode-variants@1.1.2/dist/esm/index.js
|
|
|
|
|
|
import{setToPattern as t,escape_regex as e,arrayToPattern as s,sequencePattern as n}from"./regex.js";import{allSubstrings as r}from"./strings.js";const o=[[0,65535]];const l="[̀-ͯ·ʾʼ]";let a;let u;const h=3;const c={};const i={"/":"⁄∕",0:"߀",a:"ⱥɐɑ",aa:"ꜳ",ae:"æǽǣ",ao:"ꜵ",au:"ꜷ",av:"ꜹꜻ",ay:"ꜽ",b:"ƀɓƃ",c:"ꜿƈȼↄ",d:"đɗɖᴅƌꮷԁɦ",e:"ɛǝᴇɇ",f:"ꝼƒ",g:"ǥɠꞡᵹꝿɢ",h:"ħⱨⱶɥ",i:"ɨı",j:"ɉȷ",k:"ƙⱪꝁꝃꝅꞣ",l:"łƚɫⱡꝉꝇꞁɭ",m:"ɱɯϻ",n:"ꞥƞɲꞑᴎлԉ",o:"øǿɔɵꝋꝍᴑ",oe:"œ",oi:"ƣ",oo:"ꝏ",ou:"ȣ",p:"ƥᵽꝑꝓꝕρ",q:"ꝗꝙɋ",r:"ɍɽꝛꞧꞃ",s:"ßȿꞩꞅʂ",t:"ŧƭʈⱦꞇ",th:"þ",tz:"ꜩ",u:"ʉ",v:"ʋꝟʌ",vy:"ꝡ",w:"ⱳ",y:"ƴɏỿ",z:"ƶȥɀⱬꝣ",hv:"ƕ"};for(let t in i){let e=i[t]||"";for(let s=0;s<e.length;s++){let n=e.substring(s,s+1);c[n]=t}}const d=new RegExp(Object.keys(c).join("|")+"|"+l,"gu");const initialize=t=>{a===void 0&&(a=generateMap(t||o))};const normalize=(t,e="NFKD")=>t.normalize(e);const asciifold=t=>Array.from(t).reduce((
|
|
|
/**
|
|
|
* @param {string} result
|
|
|
* @param {string} char
|
|
|
*/
|
|
|
(t,e)=>t+_asciifold(e)),"");const _asciifold=t=>{t=normalize(t).toLowerCase().replace(d,(/** @type {string} */t=>c[t]||""));return normalize(t,"NFC")};function*generator(t){for(const[e,s]of t)for(let t=e;t<=s;t++){let e=String.fromCharCode(t);let s=asciifold(e);s!=e.toLowerCase()&&(s.length>h||s.length!=0&&(yield{folded:s,composed:e,code_point:t}))}}const generateSets=s=>{const n={};const addMatching=(s,r)=>{
|
|
|
/** @type {Set<string>} */
|
|
|
const o=n[s]||new Set;const l=new RegExp("^"+t(o)+"$","iu");if(!r.match(l)){o.add(e(r));n[s]=o}};for(let t of generator(s)){addMatching(t.folded,t.folded);addMatching(t.folded,t.composed)}return n};const generateMap=n=>{const r=generateSets(n);const o={};let l=[];for(let s in r){let n=r[s];n&&(o[s]=t(n));s.length>1&&l.push(e(s))}l.sort(((t,e)=>e.length-t.length));const a=s(l);u=new RegExp("^"+a,"u");return o};const mapSequence=(t,e=1)=>{let s=0;t=t.map((t=>{a[t]&&(s+=t.length);return a[t]||t}));return s>=e?n(t):""};const substringsToPattern=(t,e=1)=>{e=Math.max(e,t.length-1);return s(r(t).map((t=>mapSequence(t,e))))};const sequencesToPattern=(t,e=true)=>{let r=t.length>1?1:0;return s(t.map((t=>{let s=[];const o=e?t.length():t.length()-1;for(let e=0;e<o;e++)s.push(substringsToPattern(t.substrs[e]||"",r));return n(s)})))};const inSequences=(t,e)=>{for(const s of e){if(s.start!=t.start||s.end!=t.end)continue;if(s.substrs.join("")!==t.substrs.join(""))continue;let e=t.parts;const filter=t=>{for(const s of e){if(s.start===t.start&&s.substr===t.substr)return false;if(t.length!=1&&s.length!=1){if(t.start<s.start&&t.end>s.start)return true;if(s.start<t.start&&s.end>t.start)return true}}return false};let n=s.parts.filter(filter);if(!(n.length>0))return true}return false};class Sequence{parts;substrs;start;end;constructor(){this.parts=[];this.substrs=[];this.start=0;this.end=0}add(t){if(t){this.parts.push(t);this.substrs.push(t.substr);this.start=Math.min(t.start,this.start);this.end=Math.max(t.end,this.end)}}last(){return this.parts[this.parts.length-1]}length(){return this.parts.length}clone(t,e){let s=new Sequence;let n=JSON.parse(JSON.stringify(this.parts));let r=n.pop();for(const t of n)s.add(t);let o=e.substr.substring(0,t-r.start);let l=o.length;s.add({start:r.start,end:r.start+l,length:l,substr:o});return s}}const getPattern=t=>{initialize();t=asciifold(t);let e="";let s=[new Sequence];for(let n=0;n<t.length;n++){let r=t.substring(n);let o=r.match(u);const l=t.substring(n,n+1);const a=o?o[0]:null;let h=[];let c=new Set;for(const t of s){const e=t.last();if(!e||e.length==1||e.end<=n)if(a){const e=a.length;t.add({start:n,end:n+e,length:e,substr:a});c.add("1")}else{t.add({start:n,end:n+1,length:1,substr:l});c.add("2")}else if(a){let s=t.clone(n,e);const r=a.length;s.add({start:n,end:n+r,length:r,substr:a});h.push(s)}else c.add("3")}if(h.length>0){h=h.sort(((t,e)=>t.length()-e.length()));for(let t of h)inSequences(t,s)||s.push(t)}else if(n>0&&c.size==1&&!c.has("3")){e+=sequencesToPattern(s,false);let t=new Sequence;const n=s[0];n&&t.add(n.last());s=[t]}}e+=sequencesToPattern(s,true);return e};export{_asciifold,asciifold,o as code_points,e as escape_regex,generateMap,generateSets,generator,getPattern,initialize,mapSequence,normalize,substringsToPattern,a as unicode_map};
|
|
|
|