diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index ac1f81d6a8..d3b3007705 100644 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -70,6 +70,7 @@ "Avro to JSON", "CBOR Encode", "CBOR Decode", + "Caret/M-decode", "Rison Encode", "Rison Decode" ] diff --git a/src/core/operations/CaretMdecode.mjs b/src/core/operations/CaretMdecode.mjs new file mode 100644 index 0000000000..68c6dacbac --- /dev/null +++ b/src/core/operations/CaretMdecode.mjs @@ -0,0 +1,98 @@ +/** + * @author tedk [tedk@ted.do] + * @copyright Crown Copyright 2024 + * @license Apache-2.0 + */ + +import Operation from "../Operation.mjs"; + +/** + * Caret/M-decode operation + * + * https://gist.githubusercontent.com/JaHIY/3c91bbf7bea5661e6abfbd1349ee81a2/raw/c7b480e9ff24bcb8f5287a8a8a2dcb9bf5628506/decode_m_notation.cpp + */ +class CaretMdecode extends Operation { + + /** + * CaretMdecode constructor + */ + constructor() { + super(); + + this.name = "Caret/M-decode"; + this.module = "Default"; + this.description = "Decodes caret or M-encoded strings, i.e. ^M turns into a newline, M-^] turns into 0x9d. Sources such as `cat -v`.\n\nPlease be aware that when using `cat -v` ^_ (caret-underscore) will not be encoded, but represents a valid encoding (namely that of 0x1f)."; + this.infoURL = "https://en.wikipedia.org/wiki/Caret_notation"; + this.inputType = "string"; + this.outputType = "byteArray"; + this.args = []; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {byteArray} + */ + run(input, args) { + + const bytes = []; + + let prev = ""; + + for (let i = 0; i < input.length; i++) { + + const charCode = input.charCodeAt(i); + const curChar = input.charAt(i); + + if (prev === "M-^") { + if (charCode > 63 && charCode <= 95) { + bytes.push(charCode + 64); + } else if (charCode === 63) { + bytes.push(255); + } else { + bytes.push(77, 45, 94, charCode); + } + prev = ""; + } else if (prev === "M-") { + if (curChar === "^") { + prev = prev + "^"; + } else if (charCode >= 32 && charCode <= 126) { + bytes.push(charCode + 128); + prev = ""; + } else { + bytes.push(77, 45, charCode); + prev = ""; + } + } else if (prev === "M") { + if (curChar === "-") { + prev = prev + "-"; + } else { + bytes.push(77, charCode); + prev = ""; + } + } else if (prev === "^") { + if (charCode > 63 && charCode <= 126) { + bytes.push(charCode - 64); + } else if (charCode === 63) { + bytes.push(127); + } else { + bytes.push(94, charCode); + } + prev = ""; + } else { + if (curChar === "M") { + prev = "M"; + } else if (curChar === "^") { + prev = "^"; + } else { + bytes.push(charCode); + } + } + + } + return bytes; + } + +} + +export default CaretMdecode; diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index 98374650d3..757d6e9de6 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -34,6 +34,7 @@ import "./tests/Bombe.mjs"; import "./tests/BSON.mjs"; import "./tests/ByteRepr.mjs"; import "./tests/CaesarBoxCipher.mjs"; +import "./tests/CaretMdecode.mjs"; import "./tests/CartesianProduct.mjs"; import "./tests/CBORDecode.mjs"; import "./tests/CBOREncode.mjs"; diff --git a/tests/operations/tests/CaretMdecode.mjs b/tests/operations/tests/CaretMdecode.mjs new file mode 100644 index 0000000000..7c4f228ef5 --- /dev/null +++ b/tests/operations/tests/CaretMdecode.mjs @@ -0,0 +1,39 @@ +/** + * Caesar Box Cipher tests. + * + * @author tedk [tedk@ted.do] + * + * @copyright Crown Copyright 2020 + * @license Apache-2.0 + */ +import TestRegister from "../../lib/TestRegister.mjs"; + +TestRegister.addTests([ + { + name: "Caret/M-decode: nothing", + input: "", + expectedOutput: "", + recipeConfig: [ + { + op: "Caret/M-decode", + args: [], + }, + ], + }, + { + /* + * Tests the full range. + * Everything except "^_" (\x5e\x5f) will decode correctly. + */ + name: "Caret/M-decode: Full set", + input: "^@^A^B^C^D^E^F^G^H^I^J^K^L^M^N^O^P^Q^R^S^T^U^V^W^X^Y^Z^[^\\^]^^^_ !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^KM-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\\M-^]M-^^M-^_M- M-!M-\"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3M-4M-5M-6M-7M-8M-9M-:M-;M-M-?M-@M-AM-BM-CM-DM-EM-FM-GM-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[M-\\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-oM-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^?", + expectedOutput: "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c\x5d\x1f\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\x8d\x2d\x5f\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", + recipeConfig: [ + { + op: "Caret/M-decode", + args: [], + }, + ], + }, +]); +