From 92a0118d6ea695c09b0083e732bb29f5c5e73c03 Mon Sep 17 00:00:00 2001 From: apainintheneck Date: Mon, 26 Feb 2024 23:27:00 -0800 Subject: [PATCH 1/6] Merge in support for escape sequences and the ground work for UTF8 This is the first batch of changes from an upstream PR that provides UTF8 support. As a side effect, it means that we now will have the correct support for escape sequences in single and multiline mode. We previously merged in a smaller change which provided support for single line mode with escape sequences by fixing the cursor position but this is more comprehensive. The next commit will add the UTF8 library. Author: https://github.com/yhirose PR: https://github.com/antirez/linenoise/pull/187 --- ext/linenoise.c | 242 ++++++++++++++++++++++++++++++++++-------------- ext/linenoise.h | 11 ++- 2 files changed, 184 insertions(+), 69 deletions(-) diff --git a/ext/linenoise.c b/ext/linenoise.c index 5c81f4b..684210f 100644 --- a/ext/linenoise.c +++ b/ext/linenoise.c @@ -122,6 +122,7 @@ #define LINENOISE_DEFAULT_HISTORY_MAX_LEN 100 #define LINENOISE_MAX_LINE 4096 +#define UNUSED(x) (void)(x) static char *unsupported_term[] = {"dumb","cons25","emacs",NULL}; static linenoiseCompletionCallback *completionCallback = NULL; static linenoiseHintsCallback *hintsCallback = NULL; @@ -177,7 +178,7 @@ FILE *lndebug_fp = NULL; lndebug_fp = fopen("/tmp/lndebug.txt","a"); \ fprintf(lndebug_fp, \ "[%d %d %d] p: %d, rows: %d, rpos: %d, max: %d, oldmax: %d\n", \ - (int)l->len,(int)l->pos,(int)l->oldpos,plen,rows,rpos, \ + (int)l->len,(int)l->pos,(int)l->oldcolpos,plen,rows,rpos, \ (int)l->oldrows,old_rows); \ } \ fprintf(lndebug_fp, ", " __VA_ARGS__); \ @@ -187,6 +188,85 @@ FILE *lndebug_fp = NULL; #define lndebug(...) #endif +/* ========================== Encoding functions ============================= */ +/* Get byte length and column length of the previous character */ +static size_t defaultPrevCharLen(const char *buf, size_t buf_len, size_t pos, size_t *col_len) { + UNUSED(buf); UNUSED(buf_len); UNUSED(pos); + if (col_len != NULL) *col_len = 1; + return 1; +} + +/* Get byte length and column length of the next character */ +static size_t defaultNextCharLen(const char *buf, size_t buf_len, size_t pos, size_t *col_len) { + UNUSED(buf); UNUSED(buf_len); UNUSED(pos); + if (col_len != NULL) *col_len = 1; + return 1; +} + +/* Read bytes of the next character */ +static size_t defaultReadCode(int fd, char *buf, size_t buf_len, int* c) { + if (buf_len < 1) return -1; + int nread = read(fd,&buf[0],1); + if (nread == 1) *c = buf[0]; + return nread; +} + +/* Set default encoding functions */ +static linenoisePrevCharLen *prevCharLen = defaultPrevCharLen; +static linenoiseNextCharLen *nextCharLen = defaultNextCharLen; +static linenoiseReadCode *readCode = defaultReadCode; + +/* Set used defined encoding functions */ +void linenoiseSetEncodingFunctions( + linenoisePrevCharLen *prevCharLenFunc, + linenoiseNextCharLen *nextCharLenFunc, + linenoiseReadCode *readCodeFunc) { + prevCharLen = prevCharLenFunc; + nextCharLen = nextCharLenFunc; + readCode = readCodeFunc; +} + +/* Get column length from begining of buffer to current byte position */ +static size_t columnPos(const char *buf, size_t buf_len, size_t pos) { + size_t ret = 0; + size_t off = 0; + while (off < pos) { + size_t col_len; + size_t len = nextCharLen(buf,buf_len,off,&col_len); + off += len; + ret += col_len; + } + return ret; +} + +/* Get column length from begining of buffer to current byte position for multiline mode*/ +static size_t columnPosForMultiLine(const char *buf, size_t buf_len, size_t pos, size_t cols, size_t ini_pos) { + size_t ret = 0; + size_t colwid = ini_pos; + + size_t off = 0; + while (off < buf_len) { + size_t col_len; + size_t len = nextCharLen(buf,buf_len,off,&col_len); + + int dif = (int)(colwid + col_len) - (int)cols; + if (dif > 0) { + ret += dif; + colwid = col_len; + } else if (dif == 0) { + colwid = 0; + } else { + colwid += col_len; + } + + if (off >= pos) break; + off += len; + ret += col_len; + } + + return ret; +} + /* ======================= Low level terminal handling ====================== */ /* Enable "mask mode". When it is enabled, instead of the input that @@ -321,29 +401,6 @@ static int getColumns(int ifd, int ofd) { return 80; } -/* Get the length of the string ignoring escape-sequences */ - static int strlenPerceived(const char* str) { - int len = 0; - if (str) { - int escaping = 0; - while(*str) { - if (escaping) { /* was terminating char reached? */ - if(*str >= 0x40 && *str <= 0x7E) - escaping = 0; - } - else if(*str == '\x1b') { - escaping = 1; - if (str[1] == '[') str++; - } - else { - len++; - } - str++; - } - } - return len; - } - /* Clear the screen. Used to handle ctrl+l */ void linenoiseClearScreen(void) { if (write(STDOUT_FILENO,"\x1b[H\x1b[2J",7) <= 0) { @@ -532,14 +589,15 @@ static void abFree(struct abuf *ab) { /* Helper of refreshSingleLine() and refreshMultiLine() to show hints * to the right of the prompt. */ -void refreshShowHints(struct abuf *ab, struct linenoiseState *l, int plen) { +void refreshShowHints(struct abuf *ab, struct linenoiseState *l, int pcollen) { char seq[64]; - if (hintsCallback && plen+l->len < l->cols) { + size_t collen = pcollen+columnPos(l->buf,l->len,l->len); + if (hintsCallback && collen < l->cols) { int color = -1, bold = 0; char *hint = hintsCallback(l->buf,&color,&bold); if (hint) { int hintlen = strlen(hint); - int hintmaxlen = l->cols-(plen+l->len); + int hintmaxlen = l->cols-collen; if (hintlen > hintmaxlen) hintlen = hintmaxlen; if (bold == 1 && color == -1) color = 37; if (color != -1 || bold != 0) @@ -556,6 +614,41 @@ void refreshShowHints(struct abuf *ab, struct linenoiseState *l, int plen) { } } +/* Check if text is an ANSI escape sequence + */ +static int isAnsiEscape(const char *buf, size_t buf_len, size_t* len) { + if (buf_len > 2 && !memcmp("\033[", buf, 2)) { + size_t off = 2; + while (off < buf_len) { + switch (buf[off++]) { + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'J': case 'K': + case 'S': case 'T': case 'f': case 'm': + *len = off; + return 1; + } + } + } + return 0; +} + +/* Get column length of prompt text + */ +static size_t promptTextColumnLen(const char *prompt, size_t plen) { + char buf[LINENOISE_MAX_LINE]; + size_t buf_len = 0; + size_t off = 0; + while (off < plen) { + size_t len; + if (isAnsiEscape(prompt + off, plen - off, &len)) { + off += len; + continue; + } + buf[buf_len++] = prompt[off++]; + } + return columnPos(buf,buf_len,buf_len); +} + /* Single line low level line refresh. * * Rewrite the currently edited line accordingly to the buffer content, @@ -565,20 +658,21 @@ void refreshShowHints(struct abuf *ab, struct linenoiseState *l, int plen) { * prompt, just write it, or both. */ static void refreshSingleLine(struct linenoiseState *l, int flags) { char seq[64]; - size_t plen = strlen(l->prompt); + size_t pcollen = promptTextColumnLen(l->prompt,strlen(l->prompt)); int fd = l->ofd; char *buf = l->buf; size_t len = l->len; size_t pos = l->pos; struct abuf ab; - while((plen+pos) >= l->cols) { - buf++; - len--; - pos--; + while((pcollen+columnPos(buf,len,pos)) >= l->cols) { + int chlen = nextCharLen(buf,len,0,NULL); + buf += chlen; + len -= chlen; + pos -= chlen; } - while (plen+len > l->cols) { - len--; + while (pcollen+columnPos(buf,len,len) > l->cols) { + len -= prevCharLen(buf,len,len,NULL); } abInit(&ab); @@ -595,7 +689,7 @@ static void refreshSingleLine(struct linenoiseState *l, int flags) { abAppend(&ab,buf,len); } /* Show hits if any. */ - refreshShowHints(&ab,l,plen); + refreshShowHints(&ab,l,pcollen); } /* Erase to right */ @@ -604,7 +698,7 @@ static void refreshSingleLine(struct linenoiseState *l, int flags) { if (flags & REFRESH_WRITE) { /* Move cursor to original position. */ - snprintf(seq,sizeof(seq),"\r\x1b[%dC", (int)(pos+strlenPerceived(l->prompt))); + snprintf(seq,sizeof(seq),"\r\x1b[%dC", (int)(columnPos(buf,len,pos)+pcollen)); abAppend(&ab,seq,strlen(seq)); } @@ -621,9 +715,11 @@ static void refreshSingleLine(struct linenoiseState *l, int flags) { * prompt, just write it, or both. */ static void refreshMultiLine(struct linenoiseState *l, int flags) { char seq[64]; - int plen = strlen(l->prompt); - int rows = (plen+l->len+l->cols-1)/l->cols; /* rows used by current buf. */ - int rpos = (plen+l->oldpos+l->cols)/l->cols; /* cursor relative row. */ + size_t pcollen = promptTextColumnLen(l->prompt,strlen(l->prompt)); + int colpos = columnPosForMultiLine(l->buf, l->len, l->len, l->cols, pcollen); + int colpos2; /* cursor column position. */ + int rows = (pcollen+colpos+l->cols-1)/l->cols; /* rows used by current buf. */ + int rpos = (pcollen+l->oldcolpos+l->cols)/l->cols; /* cursor relative row. */ int rpos2; /* rpos after refresh. */ int col; /* colum position, zero-based. */ int old_rows = l->oldrows; @@ -658,6 +754,9 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) { abAppend(&ab,seq,strlen(seq)); } + /* Get column length to cursor position */ + colpos2 = columnPosForMultiLine(l->buf,l->len,l->pos,l->cols,pcollen); + if (flags & REFRESH_WRITE) { /* Write the prompt and the current buffer content */ abAppend(&ab,l->prompt,strlen(l->prompt)); @@ -669,13 +768,13 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) { } /* Show hits if any. */ - refreshShowHints(&ab,l,plen); + refreshShowHints(&ab,l,pcollen); /* If we are at the very end of the screen with our prompt, we need to * emit a newline and move the prompt to the first column. */ if (l->pos && l->pos == l->len && - (l->pos+plen) % l->cols == 0) + (colpos2+pcollen) % l->cols == 0) { lndebug(""); abAppend(&ab,"\n",1); @@ -686,7 +785,7 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) { } /* Move cursor to right position. */ - rpos2 = (plen+l->pos+l->cols)/l->cols; /* Current cursor relative row */ + rpos2 = (pcollen+colpos2+l->cols)/l->cols; /* Current cursor relative row */ lndebug("rpos2 %d", rpos2); /* Go up till we reach the expected positon. */ @@ -697,7 +796,7 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) { } /* Set column. */ - col = (plen+(int)l->pos) % (int)l->cols; + col = (pcollen+colpos2) % l->cols; lndebug("set col %d", 1+col); if (col) snprintf(seq,64,"\r\x1b[%dC", col); @@ -707,7 +806,7 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) { } lndebug("\n"); - l->oldpos = l->pos; + l->oldcolpos = colpos2; if (write(fd,ab.b,ab.len) == -1) {} /* Can't recover from write error. */ abFree(&ab); @@ -747,26 +846,30 @@ void linenoiseShow(struct linenoiseState *l) { /* Insert the character 'c' at cursor current position. * * On error writing to the terminal -1 is returned, otherwise 0. */ -int linenoiseEditInsert(struct linenoiseState *l, char c) { - if (l->len < l->buflen) { +int linenoiseEditInsert(struct linenoiseState *l, const char *cbuf, int clen) { + if (l->len+clen <= l->buflen) { if (l->len == l->pos) { - l->buf[l->pos] = c; - l->pos++; - l->len++; + memcpy(&l->buf[l->pos],cbuf,clen); + l->pos+=clen; + l->len+=clen;; l->buf[l->len] = '\0'; - if ((!mlmode && l->plen+l->len < l->cols && !hintsCallback)) { + if ((!mlmode && promptTextColumnLen(l->prompt,l->plen)+columnPos(l->buf,l->len,l->len) < l->cols && !hintsCallback)) { /* Avoid a full update of the line in the * trivial case. */ - char d = (maskmode==1) ? '*' : c; - if (write(l->ofd,&d,1) == -1) return -1; + if (maskmode == 1) { + static const char d = '*'; + if (write(l->ofd,&d,1) == -1) return -1; + } else { + if (write(l->ofd,cbuf,clen) == -1) return -1; + } } else { refreshLine(l); } } else { - memmove(l->buf+l->pos+1,l->buf+l->pos,l->len-l->pos); - l->buf[l->pos] = c; - l->len++; - l->pos++; + memmove(l->buf+l->pos+clen,l->buf+l->pos,l->len-l->pos); + memcpy(&l->buf[l->pos],cbuf,clen); + l->pos+=clen; + l->len+=clen; l->buf[l->len] = '\0'; refreshLine(l); } @@ -777,7 +880,7 @@ int linenoiseEditInsert(struct linenoiseState *l, char c) { /* Move cursor on the left. */ void linenoiseEditMoveLeft(struct linenoiseState *l) { if (l->pos > 0) { - l->pos--; + l->pos -= prevCharLen(l->buf,l->len,l->pos,NULL); refreshLine(l); } } @@ -785,7 +888,7 @@ void linenoiseEditMoveLeft(struct linenoiseState *l) { /* Move cursor on the right. */ void linenoiseEditMoveRight(struct linenoiseState *l) { if (l->pos != l->len) { - l->pos++; + l->pos += nextCharLen(l->buf,l->len,l->pos,NULL); refreshLine(l); } } @@ -836,8 +939,9 @@ void linenoiseEditHistoryNext(struct linenoiseState *l, int dir) { * position. Basically this is what happens with the "Delete" keyboard key. */ void linenoiseEditDelete(struct linenoiseState *l) { if (l->len > 0 && l->pos < l->len) { - memmove(l->buf+l->pos,l->buf+l->pos+1,l->len-l->pos-1); - l->len--; + int chlen = nextCharLen(l->buf,l->len,l->pos,NULL); + memmove(l->buf+l->pos,l->buf+l->pos+chlen,l->len-l->pos-chlen); + l->len-=chlen; l->buf[l->len] = '\0'; refreshLine(l); } @@ -846,9 +950,10 @@ void linenoiseEditDelete(struct linenoiseState *l) { /* Backspace implementation. */ void linenoiseEditBackspace(struct linenoiseState *l) { if (l->pos > 0 && l->len > 0) { - memmove(l->buf+l->pos-1,l->buf+l->pos,l->len-l->pos); - l->pos--; - l->len--; + int chlen = prevCharLen(l->buf,l->len,l->pos,NULL); + memmove(l->buf+l->pos-chlen,l->buf+l->pos,l->len-l->pos); + l->pos-=chlen; + l->len-=chlen; l->buf[l->len] = '\0'; refreshLine(l); } @@ -904,7 +1009,7 @@ int linenoiseEditStart(struct linenoiseState *l, int stdin_fd, int stdout_fd, ch l->buflen = buflen; l->prompt = prompt; l->plen = strlen(prompt); - l->oldpos = l->pos = 0; + l->oldcolpos = l->pos = 0; l->len = 0; l->cols = getColumns(stdin_fd, stdout_fd); l->oldrows = 0; @@ -955,11 +1060,12 @@ char *linenoiseEditFeed(struct linenoiseState *l) { * count limits. */ if (!isatty(l->ifd)) return linenoiseNoTTY(); - char c; + int c; int nread; + char cbuf[32]; // large enough for any encoding? char seq[3]; - nread = read(l->ifd,&c,1); + nread = readCode(l->ifd,cbuf,sizeof(cbuf),&c); if (nread <= 0) return NULL; /* Only autocomplete when the callback is set. It returns < 0 when @@ -1082,7 +1188,7 @@ char *linenoiseEditFeed(struct linenoiseState *l) { } break; default: - if (linenoiseEditInsert(l,c)) return NULL; + if (linenoiseEditInsert(l,cbuf,nread)) return NULL; break; case CTRL_U: /* Ctrl+u, delete the whole line. */ l->buf[0] = '\0'; @@ -1163,7 +1269,7 @@ void linenoisePrintKeyCodes(void) { if (memcmp(quit,"quit",sizeof(quit)) == 0) break; printf("'%c' %02x (%d) (type quit to exit)\n", - isprint(c) ? c : '?', (int)c, (int)c); + isprint((int)c) ? c : '?', (int)c, (int)c); printf("\r"); /* Go left edge manually, we are in raw mode. */ fflush(stdout); } diff --git a/ext/linenoise.h b/ext/linenoise.h index 3f0270e..77e5e19 100644 --- a/ext/linenoise.h +++ b/ext/linenoise.h @@ -61,7 +61,7 @@ struct linenoiseState { const char *prompt; /* Prompt to display. */ size_t plen; /* Prompt length. */ size_t pos; /* Current cursor position. */ - size_t oldpos; /* Previous refresh cursor position. */ + size_t oldcolpos; /* Previous refresh cursor column position. */ size_t len; /* Current edited line length. */ size_t cols; /* Number of columns in terminal. */ size_t oldrows; /* Rows used by last refrehsed line (multiline mode) */ @@ -106,6 +106,15 @@ void linenoisePrintKeyCodes(void); void linenoiseMaskModeEnable(void); void linenoiseMaskModeDisable(void); +typedef size_t (linenoisePrevCharLen)(const char *buf, size_t buf_len, size_t pos, size_t *col_len); +typedef size_t (linenoiseNextCharLen)(const char *buf, size_t buf_len, size_t pos, size_t *col_len); +typedef size_t (linenoiseReadCode)(int fd, char *buf, size_t buf_len, int* c); + + void linenoiseSetEncodingFunctions( + linenoisePrevCharLen *prevCharLenFunc, + linenoiseNextCharLen *nextCharLenFunc, + linenoiseReadCode *readCodeFunc); + #ifdef __cplusplus } #endif From fa5698c55dfcbd867a29166de01a8b6f0c6ddc1b Mon Sep 17 00:00:00 2001 From: apainintheneck Date: Tue, 27 Feb 2024 00:27:10 -0800 Subject: [PATCH 2/6] Add UTF8 support This is the second half of the PR that I'm merging in from upstream. It adds UTF8 13.0 support by including an extra library to get the char width of those characters (the visual width that is). During testing things seem to work quite well. I'm impressed. Author: https://github.com/yhirose PR: https://github.com/antirez/linenoise/pull/187 --- Makefile | 6 +- ext/linenoise.c | 46 ++-- ext/linenoise.h | 13 +- ext/utf8.c | 464 +++++++++++++++++++++++++++++++++++++++ ext/utf8.h | 56 +++++ src/lib/lib_linenoise.cr | 15 +- src/linenoise.cr | 19 ++ 7 files changed, 593 insertions(+), 26 deletions(-) create mode 100644 ext/utf8.c create mode 100644 ext/utf8.h diff --git a/Makefile b/Makefile index dd4f3d1..3b78e22 100644 --- a/Makefile +++ b/Makefile @@ -7,11 +7,14 @@ default: extension lint test example: extension crystal run example/example.cr -extension: src/lib/linenoise.o +extension: src/lib/linenoise.o src/lib/utf8.o src/lib/linenoise.o: ext/linenoise.c ext/linenoise.h $(CC) -o $@ $< $(CFLAGS) +src/lib/utf8.o: ext/utf8.c ext/utf8.h + $(CC) -o $@ $< $(CFLAGS) + check: extension crystal build --no-codegen -o example_program example/example.cr @@ -35,3 +38,4 @@ test: specs expect clean: rm -f src/lib/linenoise.o + rm -f src/lib/utf8.o diff --git a/ext/linenoise.c b/ext/linenoise.c index 684210f..cc69237 100644 --- a/ext/linenoise.c +++ b/ext/linenoise.c @@ -119,6 +119,7 @@ #include #include #include "linenoise.h" +#include "utf8.h" #define LINENOISE_DEFAULT_HISTORY_MAX_LEN 100 #define LINENOISE_MAX_LINE 4096 @@ -140,7 +141,7 @@ static int history_max_len = LINENOISE_DEFAULT_HISTORY_MAX_LEN; static int history_len = 0; static char **history = NULL; -enum KEY_ACTION{ +enum KEY_ACTION { KEY_NULL = 0, /* NULL */ CTRL_A = 1, /* Ctrl+a */ CTRL_B = 2, /* Ctrl-b */ @@ -189,41 +190,52 @@ FILE *lndebug_fp = NULL; #endif /* ========================== Encoding functions ============================= */ + +typedef size_t (linenoisePrevCharLen)(const char *buf, size_t buf_len, size_t pos, size_t *col_len); +typedef size_t (linenoiseNextCharLen)(const char *buf, size_t buf_len, size_t pos, size_t *col_len); +typedef size_t (linenoiseReadCode)(int fd, char *buf, size_t buf_len, int* c); + +/* Set default encoding functions */ +static linenoisePrevCharLen *prevCharLen = linenoiseUtf8PrevCharLen; +static linenoiseNextCharLen *nextCharLen = linenoiseUtf8NextCharLen; +static linenoiseReadCode *readCode = linenoiseUtf8ReadCode; + /* Get byte length and column length of the previous character */ -static size_t defaultPrevCharLen(const char *buf, size_t buf_len, size_t pos, size_t *col_len) { +static size_t asciiPrevCharLen(const char *buf, size_t buf_len, size_t pos, size_t *col_len) { UNUSED(buf); UNUSED(buf_len); UNUSED(pos); if (col_len != NULL) *col_len = 1; return 1; } /* Get byte length and column length of the next character */ -static size_t defaultNextCharLen(const char *buf, size_t buf_len, size_t pos, size_t *col_len) { +static size_t asciiNextCharLen(const char *buf, size_t buf_len, size_t pos, size_t *col_len) { UNUSED(buf); UNUSED(buf_len); UNUSED(pos); if (col_len != NULL) *col_len = 1; return 1; } /* Read bytes of the next character */ -static size_t defaultReadCode(int fd, char *buf, size_t buf_len, int* c) { +static size_t asciiReadCode(int fd, char *buf, size_t buf_len, int* c) { if (buf_len < 1) return -1; int nread = read(fd,&buf[0],1); if (nread == 1) *c = buf[0]; return nread; } -/* Set default encoding functions */ -static linenoisePrevCharLen *prevCharLen = defaultPrevCharLen; -static linenoiseNextCharLen *nextCharLen = defaultNextCharLen; -static linenoiseReadCode *readCode = defaultReadCode; - -/* Set used defined encoding functions */ -void linenoiseSetEncodingFunctions( - linenoisePrevCharLen *prevCharLenFunc, - linenoiseNextCharLen *nextCharLenFunc, - linenoiseReadCode *readCodeFunc) { - prevCharLen = prevCharLenFunc; - nextCharLen = nextCharLenFunc; - readCode = readCodeFunc; +/* Set string encoding functions */ +void linenoiseSetEncoding(enum STRING_ENCODING encoding) { + switch(encoding) { + case ASCII: + prevCharLen = asciiPrevCharLen; + nextCharLen = asciiNextCharLen; + readCode = asciiReadCode; + break; + case UTF8: + prevCharLen = linenoiseUtf8PrevCharLen; + nextCharLen = linenoiseUtf8NextCharLen; + readCode = linenoiseUtf8ReadCode; + break; + } } /* Get column length from begining of buffer to current byte position */ diff --git a/ext/linenoise.h b/ext/linenoise.h index 77e5e19..bde558d 100644 --- a/ext/linenoise.h +++ b/ext/linenoise.h @@ -106,14 +106,13 @@ void linenoisePrintKeyCodes(void); void linenoiseMaskModeEnable(void); void linenoiseMaskModeDisable(void); -typedef size_t (linenoisePrevCharLen)(const char *buf, size_t buf_len, size_t pos, size_t *col_len); -typedef size_t (linenoiseNextCharLen)(const char *buf, size_t buf_len, size_t pos, size_t *col_len); -typedef size_t (linenoiseReadCode)(int fd, char *buf, size_t buf_len, int* c); +/* String encoding. */ +enum STRING_ENCODING { + ASCII, + UTF8 // default +}; - void linenoiseSetEncodingFunctions( - linenoisePrevCharLen *prevCharLenFunc, - linenoiseNextCharLen *nextCharLenFunc, - linenoiseReadCode *readCodeFunc); +void linenoiseSetEncoding(enum STRING_ENCODING); #ifdef __cplusplus } diff --git a/ext/utf8.c b/ext/utf8.c new file mode 100644 index 0000000..13bed94 --- /dev/null +++ b/ext/utf8.c @@ -0,0 +1,464 @@ +/* encoding/utf8.c -- VERSION 1.0 + * + * Guerrilla line editing library against the idea that a line editing lib + * needs to be 20,000 lines of C code. + * + * You can find the latest source code at: + * + * http://github.com/antirez/linenoise + * + * Does a number of crazy assumptions that happen to be true in 99.9999% of + * the 2010 UNIX computers around. + * + * ------------------------------------------------------------------------ + * + * Copyright (c) 2010-2014, Salvatore Sanfilippo + * Copyright (c) 2010-2013, Pieter Noordhuis + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#define UNUSED(x) (void)(x) + +/* ============================ UTF8 utilities ============================== */ + +static unsigned long wideCharTable[][2] = { + { 0x1100, 0x115F }, { 0x231A, 0x231B }, { 0x2329, 0x232A }, { 0x23E9, 0x23EC }, + { 0x23F0, 0x23F0 }, { 0x23F3, 0x23F3 }, { 0x25FD, 0x25FE }, { 0x2614, 0x2615 }, + { 0x2648, 0x2653 }, { 0x267F, 0x267F }, { 0x2693, 0x2693 }, { 0x26A1, 0x26A1 }, + { 0x26AA, 0x26AB }, { 0x26BD, 0x26BE }, { 0x26C4, 0x26C5 }, { 0x26CE, 0x26CE }, + { 0x26D4, 0x26D4 }, { 0x26EA, 0x26EA }, { 0x26F2, 0x26F3 }, { 0x26F5, 0x26F5 }, + { 0x26FA, 0x26FA }, { 0x26FD, 0x26FD }, { 0x2705, 0x2705 }, { 0x270A, 0x270B }, + { 0x2728, 0x2728 }, { 0x274C, 0x274C }, { 0x274E, 0x274E }, { 0x2753, 0x2755 }, + { 0x2757, 0x2757 }, { 0x2795, 0x2797 }, { 0x27B0, 0x27B0 }, { 0x27BF, 0x27BF }, + { 0x2B1B, 0x2B1C }, { 0x2B50, 0x2B50 }, { 0x2B55, 0x2B55 }, { 0x2E80, 0x2E99 }, + { 0x2E9B, 0x2EF3 }, { 0x2F00, 0x2FD5 }, { 0x2FF0, 0x2FFB }, { 0x3000, 0x303E }, + { 0x3041, 0x3096 }, { 0x3099, 0x30FF }, { 0x3105, 0x312F }, { 0x3131, 0x318E }, + { 0x3190, 0x31E3 }, { 0x31F0, 0x321E }, { 0x3220, 0x3247 }, { 0x3250, 0x4DBF }, + { 0x4E00, 0xA48C }, { 0xA490, 0xA4C6 }, { 0xA960, 0xA97C }, { 0xAC00, 0xD7A3 }, + { 0xF900, 0xFAFF }, { 0xFE10, 0xFE19 }, { 0xFE30, 0xFE52 }, { 0xFE54, 0xFE66 }, + { 0xFE68, 0xFE6B }, { 0xFF01, 0xFF60 }, { 0xFFE0, 0xFFE6 }, { 0x16FE0, 0x16FE4 }, + { 0x16FF0, 0x16FF1 }, { 0x17000, 0x187F7 }, { 0x18800, 0x18CD5 }, { 0x18D00, 0x18D08 }, + { 0x1AFF0, 0x1AFF3 }, { 0x1AFF5, 0x1AFFB }, { 0x1AFFD, 0x1AFFE }, { 0x1B000, 0x1B122 }, + { 0x1B150, 0x1B152 }, { 0x1B164, 0x1B167 }, { 0x1B170, 0x1B2FB }, { 0x1F004, 0x1F004 }, + { 0x1F0CF, 0x1F0CF }, { 0x1F18E, 0x1F18E }, { 0x1F191, 0x1F19A }, { 0x1F200, 0x1F202 }, + { 0x1F210, 0x1F23B }, { 0x1F240, 0x1F248 }, { 0x1F250, 0x1F251 }, { 0x1F260, 0x1F265 }, + { 0x1F300, 0x1F320 }, { 0x1F32D, 0x1F335 }, { 0x1F337, 0x1F37C }, { 0x1F37E, 0x1F393 }, + { 0x1F3A0, 0x1F3CA }, { 0x1F3CF, 0x1F3D3 }, { 0x1F3E0, 0x1F3F0 }, { 0x1F3F4, 0x1F3F4 }, + { 0x1F3F8, 0x1F43E }, { 0x1F440, 0x1F440 }, { 0x1F442, 0x1F4FC }, { 0x1F4FF, 0x1F53D }, + { 0x1F54B, 0x1F54E }, { 0x1F550, 0x1F567 }, { 0x1F57A, 0x1F57A }, { 0x1F595, 0x1F596 }, + { 0x1F5A4, 0x1F5A4 }, { 0x1F5FB, 0x1F64F }, { 0x1F680, 0x1F6C5 }, { 0x1F6CC, 0x1F6CC }, + { 0x1F6D0, 0x1F6D2 }, { 0x1F6D5, 0x1F6D7 }, { 0x1F6DD, 0x1F6DF }, { 0x1F6EB, 0x1F6EC }, + { 0x1F6F4, 0x1F6FC }, { 0x1F7E0, 0x1F7EB }, { 0x1F7F0, 0x1F7F0 }, { 0x1F90C, 0x1F93A }, + { 0x1F93C, 0x1F945 }, { 0x1F947, 0x1F9FF }, { 0x1FA70, 0x1FA74 }, { 0x1FA78, 0x1FA7C }, + { 0x1FA80, 0x1FA86 }, { 0x1FA90, 0x1FAAC }, { 0x1FAB0, 0x1FABA }, { 0x1FAC0, 0x1FAC5 }, + { 0x1FAD0, 0x1FAD9 }, { 0x1FAE0, 0x1FAE7 }, { 0x1FAF0, 0x1FAF6 }, { 0x20000, 0x2FFFD }, + { 0x30000, 0x3FFFD }, +}; + +static size_t wideCharTableSize = sizeof(wideCharTable) / sizeof(wideCharTable[0]); + +static unsigned long combiningCharTable[] = { + 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, + 0x0308, 0x0309, 0x030A, 0x030B, 0x030C, 0x030D, 0x030E, 0x030F, + 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, + 0x0318, 0x0319, 0x031A, 0x031B, 0x031C, 0x031D, 0x031E, 0x031F, + 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, + 0x0328, 0x0329, 0x032A, 0x032B, 0x032C, 0x032D, 0x032E, 0x032F, + 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, + 0x0338, 0x0339, 0x033A, 0x033B, 0x033C, 0x033D, 0x033E, 0x033F, + 0x0340, 0x0341, 0x0342, 0x0343, 0x0344, 0x0345, 0x0346, 0x0347, + 0x0348, 0x0349, 0x034A, 0x034B, 0x034C, 0x034D, 0x034E, 0x034F, + 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, + 0x0358, 0x0359, 0x035A, 0x035B, 0x035C, 0x035D, 0x035E, 0x035F, + 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, + 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F, + 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 0x0591, 0x0592, 0x0593, + 0x0594, 0x0595, 0x0596, 0x0597, 0x0598, 0x0599, 0x059A, 0x059B, + 0x059C, 0x059D, 0x059E, 0x059F, 0x05A0, 0x05A1, 0x05A2, 0x05A3, + 0x05A4, 0x05A5, 0x05A6, 0x05A7, 0x05A8, 0x05A9, 0x05AA, 0x05AB, + 0x05AC, 0x05AD, 0x05AE, 0x05AF, 0x05B0, 0x05B1, 0x05B2, 0x05B3, + 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BA, 0x05BB, + 0x05BC, 0x05BD, 0x05BF, 0x05C1, 0x05C2, 0x05C4, 0x05C5, 0x05C7, + 0x0610, 0x0611, 0x0612, 0x0613, 0x0614, 0x0615, 0x0616, 0x0617, + 0x0618, 0x0619, 0x061A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F, + 0x0650, 0x0651, 0x0652, 0x0653, 0x0654, 0x0655, 0x0656, 0x0657, + 0x0658, 0x0659, 0x065A, 0x065B, 0x065C, 0x065D, 0x065E, 0x065F, + 0x0670, 0x06D6, 0x06D7, 0x06D8, 0x06D9, 0x06DA, 0x06DB, 0x06DC, + 0x06DF, 0x06E0, 0x06E1, 0x06E2, 0x06E3, 0x06E4, 0x06E7, 0x06E8, + 0x06EA, 0x06EB, 0x06EC, 0x06ED, 0x0711, 0x0730, 0x0731, 0x0732, + 0x0733, 0x0734, 0x0735, 0x0736, 0x0737, 0x0738, 0x0739, 0x073A, + 0x073B, 0x073C, 0x073D, 0x073E, 0x073F, 0x0740, 0x0741, 0x0742, + 0x0743, 0x0744, 0x0745, 0x0746, 0x0747, 0x0748, 0x0749, 0x074A, + 0x07A6, 0x07A7, 0x07A8, 0x07A9, 0x07AA, 0x07AB, 0x07AC, 0x07AD, + 0x07AE, 0x07AF, 0x07B0, 0x07EB, 0x07EC, 0x07ED, 0x07EE, 0x07EF, + 0x07F0, 0x07F1, 0x07F2, 0x07F3, 0x07FD, 0x0816, 0x0817, 0x0818, + 0x0819, 0x081B, 0x081C, 0x081D, 0x081E, 0x081F, 0x0820, 0x0821, + 0x0822, 0x0823, 0x0825, 0x0826, 0x0827, 0x0829, 0x082A, 0x082B, + 0x082C, 0x082D, 0x0859, 0x085A, 0x085B, 0x0898, 0x0899, 0x089A, + 0x089B, 0x089C, 0x089D, 0x089E, 0x089F, 0x08CA, 0x08CB, 0x08CC, + 0x08CD, 0x08CE, 0x08CF, 0x08D0, 0x08D1, 0x08D2, 0x08D3, 0x08D4, + 0x08D5, 0x08D6, 0x08D7, 0x08D8, 0x08D9, 0x08DA, 0x08DB, 0x08DC, + 0x08DD, 0x08DE, 0x08DF, 0x08E0, 0x08E1, 0x08E3, 0x08E4, 0x08E5, + 0x08E6, 0x08E7, 0x08E8, 0x08E9, 0x08EA, 0x08EB, 0x08EC, 0x08ED, + 0x08EE, 0x08EF, 0x08F0, 0x08F1, 0x08F2, 0x08F3, 0x08F4, 0x08F5, + 0x08F6, 0x08F7, 0x08F8, 0x08F9, 0x08FA, 0x08FB, 0x08FC, 0x08FD, + 0x08FE, 0x08FF, 0x0900, 0x0901, 0x0902, 0x093A, 0x093C, 0x0941, + 0x0942, 0x0943, 0x0944, 0x0945, 0x0946, 0x0947, 0x0948, 0x094D, + 0x0951, 0x0952, 0x0953, 0x0954, 0x0955, 0x0956, 0x0957, 0x0962, + 0x0963, 0x0981, 0x09BC, 0x09C1, 0x09C2, 0x09C3, 0x09C4, 0x09CD, + 0x09E2, 0x09E3, 0x09FE, 0x0A01, 0x0A02, 0x0A3C, 0x0A41, 0x0A42, + 0x0A47, 0x0A48, 0x0A4B, 0x0A4C, 0x0A4D, 0x0A51, 0x0A70, 0x0A71, + 0x0A75, 0x0A81, 0x0A82, 0x0ABC, 0x0AC1, 0x0AC2, 0x0AC3, 0x0AC4, + 0x0AC5, 0x0AC7, 0x0AC8, 0x0ACD, 0x0AE2, 0x0AE3, 0x0AFA, 0x0AFB, + 0x0AFC, 0x0AFD, 0x0AFE, 0x0AFF, 0x0B01, 0x0B3C, 0x0B3F, 0x0B41, + 0x0B42, 0x0B43, 0x0B44, 0x0B4D, 0x0B55, 0x0B56, 0x0B62, 0x0B63, + 0x0B82, 0x0BC0, 0x0BCD, 0x0C00, 0x0C04, 0x0C3C, 0x0C3E, 0x0C3F, + 0x0C40, 0x0C46, 0x0C47, 0x0C48, 0x0C4A, 0x0C4B, 0x0C4C, 0x0C4D, + 0x0C55, 0x0C56, 0x0C62, 0x0C63, 0x0C81, 0x0CBC, 0x0CBF, 0x0CC6, + 0x0CCC, 0x0CCD, 0x0CE2, 0x0CE3, 0x0D00, 0x0D01, 0x0D3B, 0x0D3C, + 0x0D41, 0x0D42, 0x0D43, 0x0D44, 0x0D4D, 0x0D62, 0x0D63, 0x0D81, + 0x0DCA, 0x0DD2, 0x0DD3, 0x0DD4, 0x0DD6, 0x0E31, 0x0E34, 0x0E35, + 0x0E36, 0x0E37, 0x0E38, 0x0E39, 0x0E3A, 0x0E47, 0x0E48, 0x0E49, + 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0EB1, 0x0EB4, 0x0EB5, + 0x0EB6, 0x0EB7, 0x0EB8, 0x0EB9, 0x0EBA, 0x0EBB, 0x0EBC, 0x0EC8, + 0x0EC9, 0x0ECA, 0x0ECB, 0x0ECC, 0x0ECD, 0x0F18, 0x0F19, 0x0F35, + 0x0F37, 0x0F39, 0x0F71, 0x0F72, 0x0F73, 0x0F74, 0x0F75, 0x0F76, + 0x0F77, 0x0F78, 0x0F79, 0x0F7A, 0x0F7B, 0x0F7C, 0x0F7D, 0x0F7E, + 0x0F80, 0x0F81, 0x0F82, 0x0F83, 0x0F84, 0x0F86, 0x0F87, 0x0F8D, + 0x0F8E, 0x0F8F, 0x0F90, 0x0F91, 0x0F92, 0x0F93, 0x0F94, 0x0F95, + 0x0F96, 0x0F97, 0x0F99, 0x0F9A, 0x0F9B, 0x0F9C, 0x0F9D, 0x0F9E, + 0x0F9F, 0x0FA0, 0x0FA1, 0x0FA2, 0x0FA3, 0x0FA4, 0x0FA5, 0x0FA6, + 0x0FA7, 0x0FA8, 0x0FA9, 0x0FAA, 0x0FAB, 0x0FAC, 0x0FAD, 0x0FAE, + 0x0FAF, 0x0FB0, 0x0FB1, 0x0FB2, 0x0FB3, 0x0FB4, 0x0FB5, 0x0FB6, + 0x0FB7, 0x0FB8, 0x0FB9, 0x0FBA, 0x0FBB, 0x0FBC, 0x0FC6, 0x102D, + 0x102E, 0x102F, 0x1030, 0x1032, 0x1033, 0x1034, 0x1035, 0x1036, + 0x1037, 0x1039, 0x103A, 0x103D, 0x103E, 0x1058, 0x1059, 0x105E, + 0x105F, 0x1060, 0x1071, 0x1072, 0x1073, 0x1074, 0x1082, 0x1085, + 0x1086, 0x108D, 0x109D, 0x135D, 0x135E, 0x135F, 0x1712, 0x1713, + 0x1714, 0x1732, 0x1733, 0x1752, 0x1753, 0x1772, 0x1773, 0x17B4, + 0x17B5, 0x17B7, 0x17B8, 0x17B9, 0x17BA, 0x17BB, 0x17BC, 0x17BD, + 0x17C6, 0x17C9, 0x17CA, 0x17CB, 0x17CC, 0x17CD, 0x17CE, 0x17CF, + 0x17D0, 0x17D1, 0x17D2, 0x17D3, 0x17DD, 0x180B, 0x180C, 0x180D, + 0x180F, 0x1885, 0x1886, 0x18A9, 0x1920, 0x1921, 0x1922, 0x1927, + 0x1928, 0x1932, 0x1939, 0x193A, 0x193B, 0x1A17, 0x1A18, 0x1A1B, + 0x1A56, 0x1A58, 0x1A59, 0x1A5A, 0x1A5B, 0x1A5C, 0x1A5D, 0x1A5E, + 0x1A60, 0x1A62, 0x1A65, 0x1A66, 0x1A67, 0x1A68, 0x1A69, 0x1A6A, + 0x1A6B, 0x1A6C, 0x1A73, 0x1A74, 0x1A75, 0x1A76, 0x1A77, 0x1A78, + 0x1A79, 0x1A7A, 0x1A7B, 0x1A7C, 0x1A7F, 0x1AB0, 0x1AB1, 0x1AB2, + 0x1AB3, 0x1AB4, 0x1AB5, 0x1AB6, 0x1AB7, 0x1AB8, 0x1AB9, 0x1ABA, + 0x1ABB, 0x1ABC, 0x1ABD, 0x1ABF, 0x1AC0, 0x1AC1, 0x1AC2, 0x1AC3, + 0x1AC4, 0x1AC5, 0x1AC6, 0x1AC7, 0x1AC8, 0x1AC9, 0x1ACA, 0x1ACB, + 0x1ACC, 0x1ACD, 0x1ACE, 0x1B00, 0x1B01, 0x1B02, 0x1B03, 0x1B34, + 0x1B36, 0x1B37, 0x1B38, 0x1B39, 0x1B3A, 0x1B3C, 0x1B42, 0x1B6B, + 0x1B6C, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73, + 0x1B80, 0x1B81, 0x1BA2, 0x1BA3, 0x1BA4, 0x1BA5, 0x1BA8, 0x1BA9, + 0x1BAB, 0x1BAC, 0x1BAD, 0x1BE6, 0x1BE8, 0x1BE9, 0x1BED, 0x1BEF, + 0x1BF0, 0x1BF1, 0x1C2C, 0x1C2D, 0x1C2E, 0x1C2F, 0x1C30, 0x1C31, + 0x1C32, 0x1C33, 0x1C36, 0x1C37, 0x1CD0, 0x1CD1, 0x1CD2, 0x1CD4, + 0x1CD5, 0x1CD6, 0x1CD7, 0x1CD8, 0x1CD9, 0x1CDA, 0x1CDB, 0x1CDC, + 0x1CDD, 0x1CDE, 0x1CDF, 0x1CE0, 0x1CE2, 0x1CE3, 0x1CE4, 0x1CE5, + 0x1CE6, 0x1CE7, 0x1CE8, 0x1CED, 0x1CF4, 0x1CF8, 0x1CF9, 0x1DC0, + 0x1DC1, 0x1DC2, 0x1DC3, 0x1DC4, 0x1DC5, 0x1DC6, 0x1DC7, 0x1DC8, + 0x1DC9, 0x1DCA, 0x1DCB, 0x1DCC, 0x1DCD, 0x1DCE, 0x1DCF, 0x1DD0, + 0x1DD1, 0x1DD2, 0x1DD3, 0x1DD4, 0x1DD5, 0x1DD6, 0x1DD7, 0x1DD8, + 0x1DD9, 0x1DDA, 0x1DDB, 0x1DDC, 0x1DDD, 0x1DDE, 0x1DDF, 0x1DE0, + 0x1DE1, 0x1DE2, 0x1DE3, 0x1DE4, 0x1DE5, 0x1DE6, 0x1DE7, 0x1DE8, + 0x1DE9, 0x1DEA, 0x1DEB, 0x1DEC, 0x1DED, 0x1DEE, 0x1DEF, 0x1DF0, + 0x1DF1, 0x1DF2, 0x1DF3, 0x1DF4, 0x1DF5, 0x1DF6, 0x1DF7, 0x1DF8, + 0x1DF9, 0x1DFA, 0x1DFB, 0x1DFC, 0x1DFD, 0x1DFE, 0x1DFF, 0x20D0, + 0x20D1, 0x20D2, 0x20D3, 0x20D4, 0x20D5, 0x20D6, 0x20D7, 0x20D8, + 0x20D9, 0x20DA, 0x20DB, 0x20DC, 0x20E1, 0x20E5, 0x20E6, 0x20E7, + 0x20E8, 0x20E9, 0x20EA, 0x20EB, 0x20EC, 0x20ED, 0x20EE, 0x20EF, + 0x20F0, 0x2CEF, 0x2CF0, 0x2CF1, 0x2D7F, 0x2DE0, 0x2DE1, 0x2DE2, + 0x2DE3, 0x2DE4, 0x2DE5, 0x2DE6, 0x2DE7, 0x2DE8, 0x2DE9, 0x2DEA, + 0x2DEB, 0x2DEC, 0x2DED, 0x2DEE, 0x2DEF, 0x2DF0, 0x2DF1, 0x2DF2, + 0x2DF3, 0x2DF4, 0x2DF5, 0x2DF6, 0x2DF7, 0x2DF8, 0x2DF9, 0x2DFA, + 0x2DFB, 0x2DFC, 0x2DFD, 0x2DFE, 0x2DFF, 0x302A, 0x302B, 0x302C, + 0x302D, 0x3099, 0x309A, 0xA66F, 0xA674, 0xA675, 0xA676, 0xA677, + 0xA678, 0xA679, 0xA67A, 0xA67B, 0xA67C, 0xA67D, 0xA69E, 0xA69F, + 0xA6F0, 0xA6F1, 0xA802, 0xA806, 0xA80B, 0xA825, 0xA826, 0xA82C, + 0xA8C4, 0xA8C5, 0xA8E0, 0xA8E1, 0xA8E2, 0xA8E3, 0xA8E4, 0xA8E5, + 0xA8E6, 0xA8E7, 0xA8E8, 0xA8E9, 0xA8EA, 0xA8EB, 0xA8EC, 0xA8ED, + 0xA8EE, 0xA8EF, 0xA8F0, 0xA8F1, 0xA8FF, 0xA926, 0xA927, 0xA928, + 0xA929, 0xA92A, 0xA92B, 0xA92C, 0xA92D, 0xA947, 0xA948, 0xA949, + 0xA94A, 0xA94B, 0xA94C, 0xA94D, 0xA94E, 0xA94F, 0xA950, 0xA951, + 0xA980, 0xA981, 0xA982, 0xA9B3, 0xA9B6, 0xA9B7, 0xA9B8, 0xA9B9, + 0xA9BC, 0xA9BD, 0xA9E5, 0xAA29, 0xAA2A, 0xAA2B, 0xAA2C, 0xAA2D, + 0xAA2E, 0xAA31, 0xAA32, 0xAA35, 0xAA36, 0xAA43, 0xAA4C, 0xAA7C, + 0xAAB0, 0xAAB2, 0xAAB3, 0xAAB4, 0xAAB7, 0xAAB8, 0xAABE, 0xAABF, + 0xAAC1, 0xAAEC, 0xAAED, 0xAAF6, 0xABE5, 0xABE8, 0xABED, 0xFB1E, + 0xFE00, 0xFE01, 0xFE02, 0xFE03, 0xFE04, 0xFE05, 0xFE06, 0xFE07, + 0xFE08, 0xFE09, 0xFE0A, 0xFE0B, 0xFE0C, 0xFE0D, 0xFE0E, 0xFE0F, + 0xFE20, 0xFE21, 0xFE22, 0xFE23, 0xFE24, 0xFE25, 0xFE26, 0xFE27, + 0xFE28, 0xFE29, 0xFE2A, 0xFE2B, 0xFE2C, 0xFE2D, 0xFE2E, 0xFE2F, + 0x101FD, 0x102E0, 0x10376, 0x10377, 0x10378, 0x10379, 0x1037A, 0x10A01, + 0x10A02, 0x10A03, 0x10A05, 0x10A06, 0x10A0C, 0x10A0D, 0x10A0E, 0x10A0F, + 0x10A38, 0x10A39, 0x10A3A, 0x10A3F, 0x10AE5, 0x10AE6, 0x10D24, 0x10D25, + 0x10D26, 0x10D27, 0x10EAB, 0x10EAC, 0x10F46, 0x10F47, 0x10F48, 0x10F49, + 0x10F4A, 0x10F4B, 0x10F4C, 0x10F4D, 0x10F4E, 0x10F4F, 0x10F50, 0x10F82, + 0x10F83, 0x10F84, 0x10F85, 0x11001, 0x11038, 0x11039, 0x1103A, 0x1103B, + 0x1103C, 0x1103D, 0x1103E, 0x1103F, 0x11040, 0x11041, 0x11042, 0x11043, + 0x11044, 0x11045, 0x11046, 0x11070, 0x11073, 0x11074, 0x1107F, 0x11080, + 0x11081, 0x110B3, 0x110B4, 0x110B5, 0x110B6, 0x110B9, 0x110BA, 0x110C2, + 0x11100, 0x11101, 0x11102, 0x11127, 0x11128, 0x11129, 0x1112A, 0x1112B, + 0x1112D, 0x1112E, 0x1112F, 0x11130, 0x11131, 0x11132, 0x11133, 0x11134, + 0x11173, 0x11180, 0x11181, 0x111B6, 0x111B7, 0x111B8, 0x111B9, 0x111BA, + 0x111BB, 0x111BC, 0x111BD, 0x111BE, 0x111C9, 0x111CA, 0x111CB, 0x111CC, + 0x111CF, 0x1122F, 0x11230, 0x11231, 0x11234, 0x11236, 0x11237, 0x1123E, + 0x112DF, 0x112E3, 0x112E4, 0x112E5, 0x112E6, 0x112E7, 0x112E8, 0x112E9, + 0x112EA, 0x11300, 0x11301, 0x1133B, 0x1133C, 0x11340, 0x11366, 0x11367, + 0x11368, 0x11369, 0x1136A, 0x1136B, 0x1136C, 0x11370, 0x11371, 0x11372, + 0x11373, 0x11374, 0x11438, 0x11439, 0x1143A, 0x1143B, 0x1143C, 0x1143D, + 0x1143E, 0x1143F, 0x11442, 0x11443, 0x11444, 0x11446, 0x1145E, 0x114B3, + 0x114B4, 0x114B5, 0x114B6, 0x114B7, 0x114B8, 0x114BA, 0x114BF, 0x114C0, + 0x114C2, 0x114C3, 0x115B2, 0x115B3, 0x115B4, 0x115B5, 0x115BC, 0x115BD, + 0x115BF, 0x115C0, 0x115DC, 0x115DD, 0x11633, 0x11634, 0x11635, 0x11636, + 0x11637, 0x11638, 0x11639, 0x1163A, 0x1163D, 0x1163F, 0x11640, 0x116AB, + 0x116AD, 0x116B0, 0x116B1, 0x116B2, 0x116B3, 0x116B4, 0x116B5, 0x116B7, + 0x1171D, 0x1171E, 0x1171F, 0x11722, 0x11723, 0x11724, 0x11725, 0x11727, + 0x11728, 0x11729, 0x1172A, 0x1172B, 0x1182F, 0x11830, 0x11831, 0x11832, + 0x11833, 0x11834, 0x11835, 0x11836, 0x11837, 0x11839, 0x1183A, 0x1193B, + 0x1193C, 0x1193E, 0x11943, 0x119D4, 0x119D5, 0x119D6, 0x119D7, 0x119DA, + 0x119DB, 0x119E0, 0x11A01, 0x11A02, 0x11A03, 0x11A04, 0x11A05, 0x11A06, + 0x11A07, 0x11A08, 0x11A09, 0x11A0A, 0x11A33, 0x11A34, 0x11A35, 0x11A36, + 0x11A37, 0x11A38, 0x11A3B, 0x11A3C, 0x11A3D, 0x11A3E, 0x11A47, 0x11A51, + 0x11A52, 0x11A53, 0x11A54, 0x11A55, 0x11A56, 0x11A59, 0x11A5A, 0x11A5B, + 0x11A8A, 0x11A8B, 0x11A8C, 0x11A8D, 0x11A8E, 0x11A8F, 0x11A90, 0x11A91, + 0x11A92, 0x11A93, 0x11A94, 0x11A95, 0x11A96, 0x11A98, 0x11A99, 0x11C30, + 0x11C31, 0x11C32, 0x11C33, 0x11C34, 0x11C35, 0x11C36, 0x11C38, 0x11C39, + 0x11C3A, 0x11C3B, 0x11C3C, 0x11C3D, 0x11C3F, 0x11C92, 0x11C93, 0x11C94, + 0x11C95, 0x11C96, 0x11C97, 0x11C98, 0x11C99, 0x11C9A, 0x11C9B, 0x11C9C, + 0x11C9D, 0x11C9E, 0x11C9F, 0x11CA0, 0x11CA1, 0x11CA2, 0x11CA3, 0x11CA4, + 0x11CA5, 0x11CA6, 0x11CA7, 0x11CAA, 0x11CAB, 0x11CAC, 0x11CAD, 0x11CAE, + 0x11CAF, 0x11CB0, 0x11CB2, 0x11CB3, 0x11CB5, 0x11CB6, 0x11D31, 0x11D32, + 0x11D33, 0x11D34, 0x11D35, 0x11D36, 0x11D3A, 0x11D3C, 0x11D3D, 0x11D3F, + 0x11D40, 0x11D41, 0x11D42, 0x11D43, 0x11D44, 0x11D45, 0x11D47, 0x11D90, + 0x11D91, 0x11D95, 0x11D97, 0x11EF3, 0x11EF4, 0x16AF0, 0x16AF1, 0x16AF2, + 0x16AF3, 0x16AF4, 0x16B30, 0x16B31, 0x16B32, 0x16B33, 0x16B34, 0x16B35, + 0x16B36, 0x16F4F, 0x16F8F, 0x16F90, 0x16F91, 0x16F92, 0x16FE4, 0x1BC9D, + 0x1BC9E, 0x1CF00, 0x1CF01, 0x1CF02, 0x1CF03, 0x1CF04, 0x1CF05, 0x1CF06, + 0x1CF07, 0x1CF08, 0x1CF09, 0x1CF0A, 0x1CF0B, 0x1CF0C, 0x1CF0D, 0x1CF0E, + 0x1CF0F, 0x1CF10, 0x1CF11, 0x1CF12, 0x1CF13, 0x1CF14, 0x1CF15, 0x1CF16, + 0x1CF17, 0x1CF18, 0x1CF19, 0x1CF1A, 0x1CF1B, 0x1CF1C, 0x1CF1D, 0x1CF1E, + 0x1CF1F, 0x1CF20, 0x1CF21, 0x1CF22, 0x1CF23, 0x1CF24, 0x1CF25, 0x1CF26, + 0x1CF27, 0x1CF28, 0x1CF29, 0x1CF2A, 0x1CF2B, 0x1CF2C, 0x1CF2D, 0x1CF30, + 0x1CF31, 0x1CF32, 0x1CF33, 0x1CF34, 0x1CF35, 0x1CF36, 0x1CF37, 0x1CF38, + 0x1CF39, 0x1CF3A, 0x1CF3B, 0x1CF3C, 0x1CF3D, 0x1CF3E, 0x1CF3F, 0x1CF40, + 0x1CF41, 0x1CF42, 0x1CF43, 0x1CF44, 0x1CF45, 0x1CF46, 0x1D167, 0x1D168, + 0x1D169, 0x1D17B, 0x1D17C, 0x1D17D, 0x1D17E, 0x1D17F, 0x1D180, 0x1D181, + 0x1D182, 0x1D185, 0x1D186, 0x1D187, 0x1D188, 0x1D189, 0x1D18A, 0x1D18B, + 0x1D1AA, 0x1D1AB, 0x1D1AC, 0x1D1AD, 0x1D242, 0x1D243, 0x1D244, 0x1DA00, + 0x1DA01, 0x1DA02, 0x1DA03, 0x1DA04, 0x1DA05, 0x1DA06, 0x1DA07, 0x1DA08, + 0x1DA09, 0x1DA0A, 0x1DA0B, 0x1DA0C, 0x1DA0D, 0x1DA0E, 0x1DA0F, 0x1DA10, + 0x1DA11, 0x1DA12, 0x1DA13, 0x1DA14, 0x1DA15, 0x1DA16, 0x1DA17, 0x1DA18, + 0x1DA19, 0x1DA1A, 0x1DA1B, 0x1DA1C, 0x1DA1D, 0x1DA1E, 0x1DA1F, 0x1DA20, + 0x1DA21, 0x1DA22, 0x1DA23, 0x1DA24, 0x1DA25, 0x1DA26, 0x1DA27, 0x1DA28, + 0x1DA29, 0x1DA2A, 0x1DA2B, 0x1DA2C, 0x1DA2D, 0x1DA2E, 0x1DA2F, 0x1DA30, + 0x1DA31, 0x1DA32, 0x1DA33, 0x1DA34, 0x1DA35, 0x1DA36, 0x1DA3B, 0x1DA3C, + 0x1DA3D, 0x1DA3E, 0x1DA3F, 0x1DA40, 0x1DA41, 0x1DA42, 0x1DA43, 0x1DA44, + 0x1DA45, 0x1DA46, 0x1DA47, 0x1DA48, 0x1DA49, 0x1DA4A, 0x1DA4B, 0x1DA4C, + 0x1DA4D, 0x1DA4E, 0x1DA4F, 0x1DA50, 0x1DA51, 0x1DA52, 0x1DA53, 0x1DA54, + 0x1DA55, 0x1DA56, 0x1DA57, 0x1DA58, 0x1DA59, 0x1DA5A, 0x1DA5B, 0x1DA5C, + 0x1DA5D, 0x1DA5E, 0x1DA5F, 0x1DA60, 0x1DA61, 0x1DA62, 0x1DA63, 0x1DA64, + 0x1DA65, 0x1DA66, 0x1DA67, 0x1DA68, 0x1DA69, 0x1DA6A, 0x1DA6B, 0x1DA6C, + 0x1DA75, 0x1DA84, 0x1DA9B, 0x1DA9C, 0x1DA9D, 0x1DA9E, 0x1DA9F, 0x1DAA1, + 0x1DAA2, 0x1DAA3, 0x1DAA4, 0x1DAA5, 0x1DAA6, 0x1DAA7, 0x1DAA8, 0x1DAA9, + 0x1DAAA, 0x1DAAB, 0x1DAAC, 0x1DAAD, 0x1DAAE, 0x1DAAF, 0x1E000, 0x1E001, + 0x1E002, 0x1E003, 0x1E004, 0x1E005, 0x1E006, 0x1E008, 0x1E009, 0x1E00A, + 0x1E00B, 0x1E00C, 0x1E00D, 0x1E00E, 0x1E00F, 0x1E010, 0x1E011, 0x1E012, + 0x1E013, 0x1E014, 0x1E015, 0x1E016, 0x1E017, 0x1E018, 0x1E01B, 0x1E01C, + 0x1E01D, 0x1E01E, 0x1E01F, 0x1E020, 0x1E021, 0x1E023, 0x1E024, 0x1E026, + 0x1E027, 0x1E028, 0x1E029, 0x1E02A, 0x1E130, 0x1E131, 0x1E132, 0x1E133, + 0x1E134, 0x1E135, 0x1E136, 0x1E2AE, 0x1E2EC, 0x1E2ED, 0x1E2EE, 0x1E2EF, + 0x1E8D0, 0x1E8D1, 0x1E8D2, 0x1E8D3, 0x1E8D4, 0x1E8D5, 0x1E8D6, 0x1E944, + 0x1E945, 0x1E946, 0x1E947, 0x1E948, 0x1E949, 0x1E94A, 0xE0100, 0xE0101, + 0xE0102, 0xE0103, 0xE0104, 0xE0105, 0xE0106, 0xE0107, 0xE0108, 0xE0109, + 0xE010A, 0xE010B, 0xE010C, 0xE010D, 0xE010E, 0xE010F, 0xE0110, 0xE0111, + 0xE0112, 0xE0113, 0xE0114, 0xE0115, 0xE0116, 0xE0117, 0xE0118, 0xE0119, + 0xE011A, 0xE011B, 0xE011C, 0xE011D, 0xE011E, 0xE011F, 0xE0120, 0xE0121, + 0xE0122, 0xE0123, 0xE0124, 0xE0125, 0xE0126, 0xE0127, 0xE0128, 0xE0129, + 0xE012A, 0xE012B, 0xE012C, 0xE012D, 0xE012E, 0xE012F, 0xE0130, 0xE0131, + 0xE0132, 0xE0133, 0xE0134, 0xE0135, 0xE0136, 0xE0137, 0xE0138, 0xE0139, + 0xE013A, 0xE013B, 0xE013C, 0xE013D, 0xE013E, 0xE013F, 0xE0140, 0xE0141, + 0xE0142, 0xE0143, 0xE0144, 0xE0145, 0xE0146, 0xE0147, 0xE0148, 0xE0149, + 0xE014A, 0xE014B, 0xE014C, 0xE014D, 0xE014E, 0xE014F, 0xE0150, 0xE0151, + 0xE0152, 0xE0153, 0xE0154, 0xE0155, 0xE0156, 0xE0157, 0xE0158, 0xE0159, + 0xE015A, 0xE015B, 0xE015C, 0xE015D, 0xE015E, 0xE015F, 0xE0160, 0xE0161, + 0xE0162, 0xE0163, 0xE0164, 0xE0165, 0xE0166, 0xE0167, 0xE0168, 0xE0169, + 0xE016A, 0xE016B, 0xE016C, 0xE016D, 0xE016E, 0xE016F, 0xE0170, 0xE0171, + 0xE0172, 0xE0173, 0xE0174, 0xE0175, 0xE0176, 0xE0177, 0xE0178, 0xE0179, + 0xE017A, 0xE017B, 0xE017C, 0xE017D, 0xE017E, 0xE017F, 0xE0180, 0xE0181, + 0xE0182, 0xE0183, 0xE0184, 0xE0185, 0xE0186, 0xE0187, 0xE0188, 0xE0189, + 0xE018A, 0xE018B, 0xE018C, 0xE018D, 0xE018E, 0xE018F, 0xE0190, 0xE0191, + 0xE0192, 0xE0193, 0xE0194, 0xE0195, 0xE0196, 0xE0197, 0xE0198, 0xE0199, + 0xE019A, 0xE019B, 0xE019C, 0xE019D, 0xE019E, 0xE019F, 0xE01A0, 0xE01A1, + 0xE01A2, 0xE01A3, 0xE01A4, 0xE01A5, 0xE01A6, 0xE01A7, 0xE01A8, 0xE01A9, + 0xE01AA, 0xE01AB, 0xE01AC, 0xE01AD, 0xE01AE, 0xE01AF, 0xE01B0, 0xE01B1, + 0xE01B2, 0xE01B3, 0xE01B4, 0xE01B5, 0xE01B6, 0xE01B7, 0xE01B8, 0xE01B9, + 0xE01BA, 0xE01BB, 0xE01BC, 0xE01BD, 0xE01BE, 0xE01BF, 0xE01C0, 0xE01C1, + 0xE01C2, 0xE01C3, 0xE01C4, 0xE01C5, 0xE01C6, 0xE01C7, 0xE01C8, 0xE01C9, + 0xE01CA, 0xE01CB, 0xE01CC, 0xE01CD, 0xE01CE, 0xE01CF, 0xE01D0, 0xE01D1, + 0xE01D2, 0xE01D3, 0xE01D4, 0xE01D5, 0xE01D6, 0xE01D7, 0xE01D8, 0xE01D9, + 0xE01DA, 0xE01DB, 0xE01DC, 0xE01DD, 0xE01DE, 0xE01DF, 0xE01E0, 0xE01E1, + 0xE01E2, 0xE01E3, 0xE01E4, 0xE01E5, 0xE01E6, 0xE01E7, 0xE01E8, 0xE01E9, + 0xE01EA, 0xE01EB, 0xE01EC, 0xE01ED, 0xE01EE, 0xE01EF, +}; + +static unsigned long combiningCharTableSize = sizeof(combiningCharTable) / sizeof(combiningCharTable[0]); + +/* Check if the code is a wide character + */ +static int isWideChar(unsigned long cp) { + size_t i; + for (i = 0; i < wideCharTableSize; i++) + if (wideCharTable[i][0] <= cp && cp <= wideCharTable[i][1]) return 1; + return 0; +} + +/* Check if the code is a combining character + */ +static int isCombiningChar(unsigned long cp) { + size_t i; + for (i = 0; i < combiningCharTableSize; i++) + if (combiningCharTable[i] == cp) return 1; + return 0; +} + +/* Get length of previous UTF8 character + */ +static size_t prevUtf8CharLen(const char* buf, int pos) { + int end = pos--; + while (pos >= 0 && ((unsigned char)buf[pos] & 0xC0) == 0x80) + pos--; + return end - pos; +} + +/* Convert UTF8 to Unicode code point + */ +static size_t utf8BytesToCodePoint(const char* buf, size_t len, int* cp) { + if (len) { + unsigned char byte = buf[0]; + if ((byte & 0x80) == 0) { + *cp = byte; + return 1; + } else if ((byte & 0xE0) == 0xC0) { + if (len >= 2) { + *cp = (((unsigned long)(buf[0] & 0x1F)) << 6) | + ((unsigned long)(buf[1] & 0x3F)); + return 2; + } + } else if ((byte & 0xF0) == 0xE0) { + if (len >= 3) { + *cp = (((unsigned long)(buf[0] & 0x0F)) << 12) | + (((unsigned long)(buf[1] & 0x3F)) << 6) | + ((unsigned long)(buf[2] & 0x3F)); + return 3; + } + } else if ((byte & 0xF8) == 0xF0) { + if (len >= 4) { + *cp = (((unsigned long)(buf[0] & 0x07)) << 18) | + (((unsigned long)(buf[1] & 0x3F)) << 12) | + (((unsigned long)(buf[2] & 0x3F)) << 6) | + ((unsigned long)(buf[3] & 0x3F)); + return 4; + } + } + } + return 0; +} + +/* Get length of next grapheme + */ +size_t linenoiseUtf8NextCharLen(const char* buf, size_t buf_len, size_t pos, size_t *col_len) { + size_t beg = pos; + int cp; + size_t len = utf8BytesToCodePoint(buf + pos, buf_len - pos, &cp); + if (isCombiningChar(cp)) { + /* NOTREACHED */ + return 0; + } + if (col_len != NULL) *col_len = isWideChar(cp) ? 2 : 1; + pos += len; + while (pos < buf_len) { + int cp; + len = utf8BytesToCodePoint(buf + pos, buf_len - pos, &cp); + if (!isCombiningChar(cp)) return pos - beg; + pos += len; + } + return pos - beg; +} + +/* Get length of previous grapheme + */ +size_t linenoiseUtf8PrevCharLen(const char* buf, size_t buf_len, size_t pos, size_t *col_len) { + UNUSED(buf_len); + size_t end = pos; + while (pos > 0) { + size_t len = prevUtf8CharLen(buf, pos); + pos -= len; + int cp; + utf8BytesToCodePoint(buf + pos, len, &cp); + if (!isCombiningChar(cp)) { + if (col_len != NULL) *col_len = isWideChar(cp) ? 2 : 1; + return end - pos; + } + } + /* NOTREACHED */ + return 0; +} + +/* Read a Unicode from file. + */ +size_t linenoiseUtf8ReadCode(int fd, char* buf, size_t buf_len, int* cp) { + if (buf_len < 1) return -1; + size_t nread = read(fd,&buf[0],1); + if (nread <= 0) return nread; + + unsigned char byte = buf[0]; + if ((byte & 0x80) == 0) { + ; + } else if ((byte & 0xE0) == 0xC0) { + if (buf_len < 2) return -1; + nread = read(fd,&buf[1],1); + if (nread <= 0) return nread; + } else if ((byte & 0xF0) == 0xE0) { + if (buf_len < 3) return -1; + nread = read(fd,&buf[1],2); + if (nread <= 0) return nread; + } else if ((byte & 0xF8) == 0xF0) { + if (buf_len < 3) return -1; + nread = read(fd,&buf[1],3); + if (nread <= 0) return nread; + } else { + return -1; + } + + return utf8BytesToCodePoint(buf, buf_len, cp); +} diff --git a/ext/utf8.h b/ext/utf8.h new file mode 100644 index 0000000..db80009 --- /dev/null +++ b/ext/utf8.h @@ -0,0 +1,56 @@ +/* encodings/utf8.h -- VERSION 1.0 + * + * Guerrilla line editing library against the idea that a line editing lib + * needs to be 20,000 lines of C code. + * + * See linenoise.c for more information. + * + * ------------------------------------------------------------------------ + * + * Copyright (c) 2010-2014, Salvatore Sanfilippo + * Copyright (c) 2010-2013, Pieter Noordhuis + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __LINENOISE_ENCODINGS_UTF8_H +#define __LINENOISE_ENCODINGS_UTF8_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include /* For size_t. */ + +size_t linenoiseUtf8PrevCharLen(const char* buf, size_t buf_len, size_t pos, size_t *col_len); +size_t linenoiseUtf8NextCharLen(const char* buf, size_t buf_len, size_t pos, size_t *col_len); +size_t linenoiseUtf8ReadCode(int fd, char* buf, size_t buf_len, int* cp); + +#ifdef __cplusplus +} +#endif + +#endif /* __LINENOISE_ENCODINGS_UTF8_H */ diff --git a/src/lib/lib_linenoise.cr b/src/lib/lib_linenoise.cr index 0d42775..3b0c303 100644 --- a/src/lib/lib_linenoise.cr +++ b/src/lib/lib_linenoise.cr @@ -1,6 +1,7 @@ # Crystal bindings for the Linenoise library. # Note: This is a one-for-one translation of `ext/linenoise.h`. @[Link(ldflags: "#{__DIR__}/linenoise.o")] +@[Link(ldflags: "#{__DIR__}/utf8.o")] lib LibLinenoise alias Char = LibC::Char alias Int = LibC::Int @@ -26,7 +27,7 @@ lib LibLinenoise prompt : Char* plen : SizeT pos : SizeT - oldpos : SizeT + oldcolpos : SizeT len : SizeT cols : SizeT oldrows : SizeT @@ -112,4 +113,16 @@ lib LibLinenoise # Ex. $ ********** fun mask_mode_enable = linenoiseMaskModeEnable fun mask_mode_disable = linenoiseMaskModeDisable + + # + # String encoding. + # + + enum STRING_ENCODING + ASCII + UTF8 + end + + # Choose the string encoding that will be used by linenoise. + fun set_encoding = linenoiseSetEncoding(encoding : STRING_ENCODING) end diff --git a/src/linenoise.cr b/src/linenoise.cr index 1e340a4..7b66b12 100644 --- a/src/linenoise.cr +++ b/src/linenoise.cr @@ -127,4 +127,23 @@ module Linenoise LibLinenoise.mask_mode_disable end end + + # + # String encoding. + # + + enum Encoding : UInt8 + ASCII + UTF8 # default + end + + # Set the string encoding for the linenoise session. It defaults to UTF8. + def self.set_encoding(encoding : Encoding) + case encoding + in Encoding::ASCII + LibLinenoise.set_encoding(LibLinenoise::ASCII) + in Encoding::UTF8 + LibLinenoise.set_encoding(LibLinenoise::UTF8) + end + end end From f4dc8330bbf6c11bbdd745c74d02a9cdbacb3c3d Mon Sep 17 00:00:00 2001 From: apainintheneck Date: Sun, 3 Mar 2024 18:28:10 -0800 Subject: [PATCH 3/6] Update changelogs and add attribution --- CHANGELOG.md | 10 ++++++++++ ext/CHANGES.md | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1dc01d..e2510ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog +## Unreleased + +### Added + +- Add UTF8 support (Unicode v13.0) + +### Fixed + +- Fix bug in multiline cursor offset position with escape codes + ## [0.3.0] - 2024-02-10 ### Added diff --git a/ext/CHANGES.md b/ext/CHANGES.md index 799e39b..8fe70f3 100644 --- a/ext/CHANGES.md +++ b/ext/CHANGES.md @@ -73,3 +73,23 @@ All credit goes to antirez as mentioned in the license includes in this director - `https://github.com/gdawg` - Note - This just removes an extra include. + +--- + +## Branch: Another implementation for utf8 support. + +- Last Copied + - 2024-02-26 +- Last Commit + - Date + - 2024-04-19 + - SHA + - 34fbffeef4489418527e1267e8e5c093b58aa5d4 +- Branch URL + - `https://github.com/antirez/linenoise/pull/187` +- User URL + - `https://github.com/yhirose` +- Note + - This is a massive PR which provides UTF8 support and at + the same time fixes the cursor position errors in multiline + mode. It currently supports Unicode 13.0. \ No newline at end of file From 5754332e01ce8ac51742f257d187271e0eef7e84 Mon Sep 17 00:00:00 2001 From: apainintheneck Date: Sun, 3 Mar 2024 18:42:23 -0800 Subject: [PATCH 4/6] Update version and readme --- CHANGELOG.md | 2 +- README.md | 7 +++---- shard.yml | 2 +- src/linenoise.cr | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2510ec..9a70516 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## Unreleased +## [0.4.0] - 2024-03-03 ### Added diff --git a/README.md b/README.md index 8e6e31a..1dfe542 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,9 @@ Crystal bindings for the lightweight [Linenoise](https://github.com/antirez/linenoise) line editor library written in C. It is a minimal alternative to readline and libedit. -Linenoise is written in C code that supports most distributions of the Linux, macOS and BSD operating systems. We compile the library on install so linking should not be a problem and the library is lightwieght (less than 900 lines of code) so the resulting binary should be small. +Linenoise is written in C code that supports most distributions of the Linux, macOS and BSD operating systems. We compile the library on install so linking should not be a problem and the library is lightwieght (less than 1500 lines of code) so the resulting binary should be small. + +As of `v0.4.0`, UTF-8 support has been added. This means that the cursor won't have problems when writing in languages with non-Roman alphabets or using emoji. ## Installation @@ -50,9 +52,6 @@ end For more information look at the files in the `example/` directory and the [documentation website](https://apainintheneck.github.io/crystal-linenoise/). ## Missing Features & Known Bugs -UTF-8 support is not included but there have been multiple attempts at adding that in the past. It might be worth investigating more if any of those is mature enough to be worth including here as a patch. See https://github.com/apainintheneck/crystal-linenoise/issues/21 for more info. - -The multiline mode doesn't currently support escape codes when calculating cursor position so the presence of them will mean that the cursor will end up further to the right than expected. Single line mode support for escape codes was added in https://github.com/apainintheneck/crystal-linenoise/pull/22 in v0.3.0. There is no high-level wrapper around the multiplexing API but the Crystal bindings have been added for it. See `src/lib/lib_linenoise.cr` for more details. diff --git a/shard.yml b/shard.yml index fccae58..5f111e5 100644 --- a/shard.yml +++ b/shard.yml @@ -1,5 +1,5 @@ name: linenoise -version: 0.3.0 +version: 0.4.0 authors: - Kevin Robell diff --git a/src/linenoise.cr b/src/linenoise.cr index 7b66b12..0cb1666 100644 --- a/src/linenoise.cr +++ b/src/linenoise.cr @@ -2,7 +2,7 @@ require "./lib/lib_linenoise" require "./linenoise/completion" module Linenoise - VERSION = "0.3.0" + VERSION = "0.4.0" # # Blocking API. From 1be887991149cc1baf78582f1325b3c7cfe9ddae Mon Sep 17 00:00:00 2001 From: apainintheneck Date: Sun, 3 Mar 2024 18:47:08 -0800 Subject: [PATCH 5/6] Add acknowledgments to readme --- CHANGELOG.md | 2 +- README.md | 6 ++++++ ext/CHANGES.md | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a70516..f6d19b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### Added -- Add UTF8 support (Unicode v13.0) +- Add UTF8 support (Unicode v14.0) ### Fixed diff --git a/README.md b/README.md index 1dfe542..8c90629 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,12 @@ The `crystal docs` command can be used to build the docs locally or you can visi - [apainintheneck](https://github.com/apainintheneck) - creator and maintainer +## Acknowledgments + +I have pulled in a few PRs from upstream that haven't been merged into the main Linenoise project yet. + +Check out the [extension changes](ext/CHANGES.md) file for more information. + ## License This library is released under the [MIT license](LICENSE). diff --git a/ext/CHANGES.md b/ext/CHANGES.md index 8fe70f3..8b90142 100644 --- a/ext/CHANGES.md +++ b/ext/CHANGES.md @@ -92,4 +92,4 @@ All credit goes to antirez as mentioned in the license includes in this director - Note - This is a massive PR which provides UTF8 support and at the same time fixes the cursor position errors in multiline - mode. It currently supports Unicode 13.0. \ No newline at end of file + mode. It currently supports Unicode 14.0. \ No newline at end of file From 72d6d75e48635f96a8d677be4954a8da00f55d47 Mon Sep 17 00:00:00 2001 From: apainintheneck Date: Sun, 3 Mar 2024 18:50:03 -0800 Subject: [PATCH 6/6] Add context about how mappings were made --- ext/CHANGES.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ext/CHANGES.md b/ext/CHANGES.md index 8b90142..73b54d0 100644 --- a/ext/CHANGES.md +++ b/ext/CHANGES.md @@ -92,4 +92,7 @@ All credit goes to antirez as mentioned in the license includes in this director - Note - This is a massive PR which provides UTF8 support and at the same time fixes the cursor position errors in multiline - mode. It currently supports Unicode 14.0. \ No newline at end of file + mode. It currently supports Unicode 14.0. + - The list of UTF-8 character mappings was created by parsing + the UTF-8 character files downloaded from the Unicode + Consortium website.