Spaces:
Paused
Paused
| /** | |
| * Dashycode! | |
| * | |
| * Encodes a string in a restricted string containing only alphanumeric | |
| * characters and dashes. | |
| * | |
| * (The name is a riff on Punycode, which is what I originally wanted | |
| * to use for this purpose, but it turns out Punycode does not work on | |
| * arbitrary strings.) | |
| * | |
| * @author Guangcong Luo <guangcongluo@gmail.com> | |
| * @license MIT | |
| */ | |
| const CODE_MAP = "23456789abcdefghijkmnpqrstuvwxyz"; | |
| const UNSAFE_MAP = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; | |
| // chunk types: | |
| // 0b00 = 0x0 = block separator ("wait until next dash") | |
| // 0b01 = 0x1 = capitalize | |
| // 0b1_01 = 0x5 = specify capitalization of next 8 alphabetic chars of safe part (8 more bits) | |
| // 0b0_01 = 0x1 = short for 0b00000001_1_01 (capitalize first letter of next 8 alpha chars of safe part) | |
| // 0b10 = 0x2 = ASCII in UNSAFE_MAP (5 more bits) | |
| // 0b11 = 0x3 = other | |
| // 0b0_11 = 0x3 = space | |
| // 0b1_11 = 0x7 = UTF-16 (16 more bits) | |
| /** | |
| * An object representing a Dashycode bitstream. | |
| * The stream can be either a read stream or a write stream, but not | |
| * both simultaneously. | |
| */ | |
| interface DashyStream { | |
| codeBuf: string; | |
| buf: number; | |
| bufLength: number; | |
| } | |
| function streamWrite(stream: DashyStream, writeBufLength: number, writeBuf: number) { | |
| stream.buf += (writeBuf << stream.bufLength); | |
| stream.bufLength += writeBufLength; | |
| while (stream.bufLength >= 5) { | |
| stream.codeBuf += CODE_MAP.charAt(stream.buf & 0x1F); | |
| stream.buf >>= 5; | |
| stream.bufLength -= 5; | |
| } | |
| } | |
| function streamGetCode(stream: DashyStream) { | |
| const buf = stream.codeBuf + CODE_MAP.charAt(stream.buf); | |
| // truncate trailing `2`s (0b00000 chunks) | |
| let end2Len = 0; | |
| while (buf.charAt(buf.length - 1 - end2Len) === '2') end2Len++; | |
| return end2Len ? buf.slice(0, -end2Len) : buf; | |
| } | |
| function streamPeek(stream: DashyStream, readLength: number, readMask: number = 0xFFFF >> (16 - readLength)) { | |
| while (stream.bufLength < readLength && stream.codeBuf.length) { | |
| const next5Bits = CODE_MAP.indexOf(stream.codeBuf.charAt(0)); | |
| if (next5Bits < 0) throw new Error("Invalid character in coded buffer"); | |
| stream.codeBuf = stream.codeBuf.slice(1); | |
| stream.buf += next5Bits << stream.bufLength; | |
| stream.bufLength += 5; | |
| } | |
| return stream.buf & readMask; | |
| } | |
| function streamRead(stream: DashyStream, readLength: number, readMask: number = 0xFFFF >> (16 - readLength)) { | |
| const output = streamPeek(stream, readLength, readMask); | |
| // Note: bufLength can go negative! Streams have infinite trailing 0s | |
| stream.buf >>= readLength; | |
| stream.bufLength -= readLength; | |
| return output; | |
| } | |
| export function encode(str: string, allowCaps = false) { | |
| if (!str) return '0--0'; | |
| let safePart = ''; | |
| const unsafeStream: DashyStream = { | |
| codeBuf: '', | |
| buf: 0x0, | |
| bufLength: 0, | |
| }; | |
| let isSafe = true; | |
| let alphaIndex = 0; | |
| let capBuffer = 0x0; | |
| for (let i = 0; i < str.length + 1; i++) { | |
| let curCharCode = i !== str.length ? str.charCodeAt(i) : -1; | |
| const isLowercase = (97 <= curCharCode && curCharCode <= 122); // a-z | |
| const isUppercase = (65 <= curCharCode && curCharCode <= 90); // A-Z | |
| const isNumeric = (48 <= curCharCode && curCharCode <= 57); // 0-9 | |
| if (capBuffer && ( | |
| !(isLowercase || isUppercase || isNumeric) || | |
| alphaIndex >= 8 || | |
| i === str.length | |
| )) { | |
| // flush cap buffer | |
| if (capBuffer === 0xD) { | |
| streamWrite(unsafeStream, 3, 0x1); | |
| } else { | |
| streamWrite(unsafeStream, 11, capBuffer); | |
| } | |
| alphaIndex -= 8; | |
| capBuffer = 0x0; | |
| } | |
| if (i === str.length) break; | |
| if (isLowercase || isUppercase || isNumeric) { | |
| if (alphaIndex < 0) throw new Error("alphaIndex should be non-negative here"); | |
| if (!isSafe) { | |
| if (capBuffer) throw new Error("capBuffer shouldn't exist here"); | |
| streamWrite(unsafeStream, 2, 0x0); | |
| isSafe = true; | |
| } | |
| if (isUppercase && !allowCaps) { | |
| safePart += String.fromCharCode(curCharCode + 32); | |
| while (alphaIndex >= 8) { | |
| if (capBuffer) throw new Error("capBuffer shouldn't exist here"); | |
| alphaIndex -= 8; | |
| streamWrite(unsafeStream, 11, 0x5); | |
| } | |
| if (!capBuffer) capBuffer = 0x5; | |
| capBuffer += 1 << (alphaIndex + 3); | |
| } else { | |
| safePart += str.charAt(i); | |
| } | |
| if (isUppercase || isLowercase) alphaIndex++; | |
| continue; | |
| } | |
| if (capBuffer) throw new Error("capBuffer shouldn't exist here"); | |
| alphaIndex = 0; | |
| if (isSafe && curCharCode === 32) { // space | |
| const nextCharCode = str.charCodeAt(i + 1); | |
| if ((97 <= nextCharCode && nextCharCode <= 122) || // a-z | |
| (65 <= nextCharCode && nextCharCode <= 90) || // A-Z | |
| (48 <= nextCharCode && nextCharCode <= 57)) { // 0-9 | |
| safePart += '-'; | |
| streamWrite(unsafeStream, 2, 0x0); | |
| continue; | |
| } | |
| } | |
| if (isSafe) { | |
| safePart += '-'; | |
| isSafe = false; | |
| } | |
| let unsafeMapIndex = -1; | |
| if (curCharCode === -1) { | |
| streamWrite(unsafeStream, 2, 0x0); | |
| } else if (curCharCode === 32) { // space | |
| streamWrite(unsafeStream, 3, 0x3); | |
| } else if ((unsafeMapIndex = UNSAFE_MAP.indexOf(str.charAt(i))) >= 0) { | |
| curCharCode = (unsafeMapIndex << 2) + 0x2; | |
| streamWrite(unsafeStream, 7, curCharCode); | |
| } else { | |
| curCharCode = (curCharCode << 3) + 0x7; | |
| streamWrite(unsafeStream, 19, curCharCode); | |
| } | |
| } | |
| let unsafePart = streamGetCode(unsafeStream); | |
| if (safePart.startsWith('-')) { | |
| safePart = safePart.slice(1); | |
| unsafePart = `${unsafePart}2`; | |
| } | |
| if (safePart.endsWith('-')) { | |
| safePart = safePart.slice(0, -1); | |
| } | |
| if (!safePart) { | |
| safePart = '0'; | |
| unsafePart = `0${unsafePart}`; | |
| if (unsafePart.endsWith('2')) unsafePart = unsafePart.slice(0, -1); | |
| } | |
| if (!unsafePart) return safePart; | |
| return `${safePart}--${unsafePart}`; | |
| } | |
| export function decode(codedStr: string) { | |
| let str = ''; | |
| let lastDashIndex = codedStr.lastIndexOf('--'); | |
| if (lastDashIndex < 0) { | |
| // the regular decoder can also handle this case; but this should | |
| // be faster | |
| return codedStr.replace(/-/g, ' '); | |
| } | |
| if (codedStr.charAt(lastDashIndex + 2) === '0') { | |
| if (!codedStr.startsWith('0') || lastDashIndex !== 1) { | |
| throw new Error("Invalid Dashycode"); | |
| } | |
| lastDashIndex -= 1; | |
| codedStr = '--' + codedStr.slice(4); | |
| } | |
| if (codedStr.endsWith('2')) { | |
| codedStr = '-' + codedStr.slice(0, -1); | |
| lastDashIndex += 1; | |
| } | |
| const unsafeStream: DashyStream = { | |
| codeBuf: codedStr.slice(lastDashIndex + 2), | |
| buf: 0x0, | |
| bufLength: 0, | |
| }; | |
| /** | |
| * Status: | |
| * 1 : awaiting next read | |
| * 0 : assume all-lowercase | |
| * other: 1 followed by n bits, describing the capitalization of the | |
| * next n bits of alphabetic characters | |
| */ | |
| let capBuffer = 1; | |
| for (let i = 0; i < lastDashIndex + 1; i++) { | |
| let curChar = codedStr.charAt(i); | |
| if (curChar !== '-') { | |
| // safe char | |
| const curCharCode = codedStr.charCodeAt(i); | |
| const isLowercase = (97 <= curCharCode && curCharCode <= 122); // a-z | |
| if (isLowercase) { | |
| if (capBuffer === 1) { | |
| capBuffer = 0; | |
| if (streamPeek(unsafeStream, 2, 0x3) === 0x1) { | |
| switch (streamRead(unsafeStream, 3, 0x7)) { | |
| case 0x5: | |
| capBuffer = streamRead(unsafeStream, 8, 0xFF) + 0x100; | |
| break; | |
| case 0x1: | |
| capBuffer = 0x101; | |
| break; | |
| } | |
| } | |
| } | |
| const toCapitalize = capBuffer & 0x1; | |
| capBuffer >>= 1; | |
| if (toCapitalize) { | |
| curChar = String.fromCharCode(curCharCode - 32); | |
| } | |
| } | |
| str += curChar; | |
| } else { | |
| capBuffer = 1; | |
| // pull out the next unsafe string | |
| let isEmpty = true; | |
| do { | |
| switch (streamRead(unsafeStream, 2, 0x3)) { | |
| case 0x0: | |
| // go back to parsing safe chars | |
| curChar = ''; | |
| break; | |
| case 0x1: | |
| throw new Error("Invalid capitalization token"); | |
| case 0x2: | |
| curChar = UNSAFE_MAP.charAt(streamRead(unsafeStream, 5, 0x1F)); | |
| isEmpty = false; | |
| break; | |
| case 0x3: | |
| if (streamRead(unsafeStream, 1, 0x1)) { | |
| curChar = String.fromCharCode(streamRead(unsafeStream, 16, 0xFFFF)); | |
| } else { | |
| curChar = ' '; | |
| } | |
| isEmpty = false; | |
| break; | |
| } | |
| str += curChar; | |
| } while (curChar); | |
| if (isEmpty && i !== lastDashIndex) str += ' '; | |
| } | |
| } | |
| return str; | |
| } | |
| export function vizStream(codeBuf: string, translate = true) { | |
| let spacedStream = ''; | |
| if (codeBuf.startsWith('0')) { | |
| codeBuf = codeBuf.slice(1); | |
| spacedStream = ' [no safe chars]' + spacedStream; | |
| } | |
| if (codeBuf.endsWith('2')) { | |
| codeBuf = codeBuf.slice(0, -1); | |
| spacedStream = ' [start unsafe]' + spacedStream; | |
| } | |
| const stream: DashyStream = { | |
| codeBuf, | |
| buf: 0x0, | |
| bufLength: 0, | |
| }; | |
| function vizBlock(s: DashyStream, bufLen: number) { | |
| const buf = streamRead(s, bufLen); | |
| return buf.toString(2).padStart(bufLen, '0'); | |
| } | |
| while (stream.bufLength > 0 || stream.codeBuf) { | |
| switch (streamRead(stream, 2)) { | |
| case 0x0: | |
| spacedStream = (translate ? ' |' : ' 00') + spacedStream; | |
| break; | |
| case 0x1: | |
| if (streamRead(stream, 1)) { | |
| spacedStream = ' ' + vizBlock(stream, 8) + (translate ? '-cap' : '_1_01') + spacedStream; | |
| } else { | |
| spacedStream = (translate ? ' capfirst' : ' 0_01') + spacedStream; | |
| } | |
| break; | |
| case 0x2: | |
| spacedStream = ' ' + vizBlock(stream, 5) + (translate ? '-ascii' : '_10') + spacedStream; | |
| break; | |
| case 0x3: | |
| if (streamRead(stream, 1)) { | |
| spacedStream = ' ' + vizBlock(stream, 16) + (translate ? '-utf' : '_1_11') + spacedStream; | |
| } else { | |
| spacedStream = (translate ? ' space' : ' 0_11') + spacedStream; | |
| } | |
| break; | |
| } | |
| } | |
| return spacedStream; | |
| } | |