LCOV - code coverage report
Current view: top level - src/flisp - julia_extensions.c (source / functions) Hit Total Coverage
Test: [build process] commit ef510b1f346f4c9f9d86eaceace5ca54961a1dbc Lines: 215 256 84.0 %
Date: 2022-07-17 01:01:28 Functions: 26 31 83.9 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 256 478 53.6 %

           Branch data     Line data    Source code
       1                 :            : #include <stdlib.h>
       2                 :            : #include <stdio.h>
       3                 :            : #include <string.h>
       4                 :            : #include <assert.h>
       5                 :            : 
       6                 :            : #include "utf8proc.h"
       7                 :            : #undef JL_DLLEXPORT /* avoid conflicting definition */
       8                 :            : 
       9                 :            : #include "flisp.h"
      10                 :            : 
      11                 :            : #ifdef __cplusplus
      12                 :            : extern "C" {
      13                 :            : #endif
      14                 :            : 
      15                 :            : #define _equal_wchar_(x, y, ctx) ((x) == (y))
      16                 :            : #define _hash_wchar_(x, ctx) inthash((uint32_t) ((uintptr_t) (x)))
      17                 :            : #include "htable.inc"
      18   [ +  +  +  +  :    4811554 : HTIMPL_R(wcharhash, _hash_wchar_, _equal_wchar_)
          +  +  -  +  +  
          -  -  +  -  +  
          +  -  +  -  -  
          -  -  -  -  -  
          -  -  -  -  -  
          -  -  -  -  -  
                   -  - ]
      19                 :            : 
      20                 :   10677260 : static int is_uws(uint32_t wc)
      21                 :            : {
      22   [ +  +  +  -  :   10677160 :     return (wc==9 || wc==10 || wc==11 || wc==12 || wc==13 || wc==32 ||
          +  -  +  -  +  
                +  +  - ]
      23   [ +  +  +  -  :    5160320 :             wc==133 || wc==160 || wc==5760 || wc==6158 || wc==8192 ||
          +  -  +  -  +  
                      - ]
      24   [ +  -  +  -  :    5160320 :             wc==8193 || wc==8194 || wc==8195 || wc==8196 || wc==8197 ||
          +  -  +  -  +  
                      - ]
      25   [ +  -  +  -  :    5160320 :             wc==8198 || wc==8199 || wc==8200 || wc==8201 || wc==8202 ||
          +  -  +  -  +  
                      - ]
      26   [ +  +  +  -  :   21354400 :             wc==8232 || wc==8233 || wc==8239 || wc==8287 || wc==12288);
          +  -  +  -  -  
                      + ]
      27                 :            : }
      28                 :            : 
      29                 :    5160320 : static int is_bom(uint32_t wc)
      30                 :            : {
      31                 :    5160320 :     return wc == 0xFEFF;
      32                 :            : }
      33                 :            : 
      34                 :   22053200 : static int safe_peekutf8(fl_context_t *fl_ctx, ios_t *s, uint32_t *pwc)
      35                 :            : {
      36                 :   22053200 :     int result = ios_peekutf8(s, pwc);
      37         [ -  + ]:   22053200 :     if (result == 0)
      38                 :          0 :         lerror(fl_ctx, fl_ctx->IOError, "invalid UTF-8 sequence");
      39                 :   22053200 :     return result;
      40                 :            : }
      41                 :            : 
      42                 :    5746780 : value_t fl_skipws(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
      43                 :            : {
      44                 :    5746780 :     argcount(fl_ctx, "skip-ws", nargs, 2);
      45                 :    5746780 :     ios_t *s = fl_toiostream(fl_ctx, args[0], "skip-ws");
      46                 :    5746780 :     int newlines = (args[1]!=fl_ctx->F);
      47                 :    5746780 :     uint32_t wc=0;
      48                 :    5746780 :     value_t skipped = fl_ctx->F;
      49                 :            :     while (1) {
      50         [ +  + ]:   10691000 :         if (safe_peekutf8(fl_ctx, s, &wc) == IOS_EOF) {
      51                 :      13746 :             ios_getutf8(s, &wc);  // to set EOF flag if this is a true EOF
      52         [ -  + ]:      13746 :             if (!ios_eof(s))
      53                 :          0 :                 lerror(fl_ctx, symbol(fl_ctx, "error"), "incomplete character");
      54                 :      13746 :             return fl_ctx->FL_EOF;
      55                 :            :         }
      56   [ +  -  +  +  :   10677260 :         if (!ios_eof(s) && (is_uws(wc) || is_bom(wc)) && (newlines || wc!=10)) {
          -  +  +  +  +  
                      + ]
      57                 :    4944220 :             skipped = fl_ctx->T;
      58                 :    4944220 :             ios_getutf8(s, &wc);
      59                 :            :         }
      60                 :            :         else {
      61                 :            :             break;
      62                 :            :         }
      63                 :            :     }
      64                 :    5733040 :     return skipped;
      65                 :            : }
      66                 :            : 
      67                 :      10174 : static int is_wc_cat_id_start(uint32_t wc, utf8proc_category_t cat)
      68                 :            : {
      69   [ +  +  +  - ]:      10038 :     return (cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LL ||
      70   [ +  +  +  - ]:       6174 :             cat == UTF8PROC_CATEGORY_LT || cat == UTF8PROC_CATEGORY_LM ||
      71   [ +  -  +  - ]:       5874 :             cat == UTF8PROC_CATEGORY_LO || cat == UTF8PROC_CATEGORY_NL ||
      72         [ -  + ]:       5874 :             cat == UTF8PROC_CATEGORY_SC ||  // allow currency symbols
      73                 :            :             // other symbols, but not arrows or replacement characters
      74   [ #  #  #  #  :          0 :             (cat == UTF8PROC_CATEGORY_SO && !(wc >= 0x2190 && wc <= 0x21FF) &&
                   #  # ]
      75   [ #  #  #  # ]:          0 :              wc != 0xfffc && wc != 0xfffd &&
      76         [ #  # ]:          0 :              wc != 0x233f &&  // notslash
      77         [ +  + ]:       5874 :              wc != 0x00a6) || // broken bar
      78                 :            : 
      79                 :            :             // math symbol (category Sm) whitelist
      80   [ +  +  +  - ]:       3100 :             (wc >= 0x2140 && wc <= 0x2a1c &&
      81   [ +  -  +  - ]:       3096 :              ((wc >= 0x2140 && wc <= 0x2144) || // ⅀, ⅁, ⅂, ⅃, ⅄
      82   [ +  -  +  -  :       3096 :               wc == 0x223f || wc == 0x22be || wc == 0x22bf || // ∿, ⊾, ⊿
                   +  + ]
      83   [ +  +  +  - ]:       3056 :               wc == 0x22a4 || wc == 0x22a5 ||   // ⊤ ⊥
      84                 :            : 
      85   [ +  +  +  - ]:       3036 :               (wc >= 0x2200 && wc <= 0x2233 &&
      86   [ +  -  +  -  :        884 :                (wc == 0x2202 || wc == 0x2205 || wc == 0x2206 || // ∂, ∅, ∆
                   +  - ]
      87   [ +  -  +  -  :        884 :                 wc == 0x2207 || wc == 0x220e || wc == 0x220f || // ∇, ∎, ∏
                   +  - ]
      88   [ +  -  +  -  :        884 :                 wc == 0x2200 || wc == 0x2203 || wc == 0x2204 || // ∀, ∃, ∄
                   +  - ]
      89   [ +  -  +  + ]:        884 :                 wc == 0x2210 || wc == 0x2211 || // ∐, ∑
      90   [ +  -  +  - ]:        868 :                 wc == 0x221e || wc == 0x221f || // ∞, ∟
      91         [ +  + ]:       3020 :                 wc >= 0x222b)) || // ∫, ∬, ∭, ∮, ∯, ∰, ∱, ∲, ∳
      92                 :            : 
      93   [ +  -  -  + ]:       3020 :               (wc >= 0x22c0 && wc <= 0x22c3) ||  // N-ary big ops: ⋀, ⋁, ⋂, ⋃
      94   [ -  -  -  + ]:       3020 :               (wc >= 0x25F8 && wc <= 0x25ff) ||  // ◸, ◹, ◺, ◻, ◼, ◽, ◾, ◿
      95                 :            : 
      96         [ #  # ]:          0 :               (wc >= 0x266f &&
      97   [ #  #  #  #  :          0 :                (wc == 0x266f || wc == 0x27d8 || wc == 0x27d9 || // ♯, ⟘, ⟙
                   #  # ]
      98   [ #  #  #  # ]:          0 :                 (wc >= 0x27c0 && wc <= 0x27c1) ||  // ⟀, ⟁
      99   [ #  #  #  # ]:          0 :                 (wc >= 0x29b0 && wc <= 0x29b4) ||  // ⦰, ⦱, ⦲, ⦳, ⦴
     100   [ #  #  #  # ]:          0 :                 (wc >= 0x2a00 && wc <= 0x2a06) ||  // ⨀, ⨁, ⨂, ⨃, ⨄, ⨅, ⨆
     101   [ #  #  #  # ]:          0 :                 (wc >= 0x2a09 && wc <= 0x2a16) ||  // ⨉, ⨊, ⨋, ⨌, ⨍, ⨎, ⨏, ⨐, ⨑, ⨒, ⨓, ⨔, ⨕, ⨖
     102   [ -  -  -  + ]:       5798 :                 wc == 0x2a1b || wc == 0x2a1c)))) || // ⨛, ⨜
     103                 :            : 
     104         [ #  # ]:          0 :             (wc >= 0x1d6c1 && // variants of \nabla and \partial
     105   [ #  #  #  # ]:          0 :              (wc == 0x1d6c1 || wc == 0x1d6db ||
     106   [ #  #  #  # ]:          0 :               wc == 0x1d6fb || wc == 0x1d715 ||
     107   [ #  #  #  # ]:          0 :               wc == 0x1d735 || wc == 0x1d74f ||
     108   [ #  #  #  # ]:          0 :               wc == 0x1d76f || wc == 0x1d789 ||
     109   [ -  -  +  + ]:       5798 :               wc == 0x1d7a9 || wc == 0x1d7c3)) ||
     110                 :            : 
     111                 :            :             // super- and subscript +-=()
     112   [ +  +  +  + ]:       5798 :             (wc >= 0x207a && wc <= 0x207e) ||
     113   [ +  +  +  + ]:       5710 :             (wc >= 0x208a && wc <= 0x208e) ||
     114                 :            : 
     115                 :            :             // angle symbols
     116   [ +  -  +  + ]:       5338 :             (wc >= 0x2220 && wc <= 0x2222) || // ∠, ∡, ∢
     117   [ +  -  +  - ]:       5338 :             (wc >= 0x299b && wc <= 0x29af) || // ⦛, ⦜, ⦝, ⦞, ⦟, ⦠, ⦡, ⦢, ⦣, ⦤, ⦥, ⦦, ⦧, ⦨, ⦩, ⦪, ⦫, ⦬, ⦭, ⦮, ⦯
     118                 :            : 
     119                 :            :             // Other_ID_Start
     120   [ +  -  -  + ]:       5338 :             wc == 0x2118 || wc == 0x212E || // ℘, ℮
     121   [ +  +  -  -  :      20212 :             (wc >= 0x309B && wc <= 0x309C) || // katakana-hiragana sound marks
                   -  + ]
     122                 :            : 
     123                 :            :             // bold-digits and double-struck digits
     124         [ #  # ]:          0 :             (wc >= 0x1D7CE && wc <= 0x1D7E1)); // 𝟎 through 𝟗 (inclusive), 𝟘 through 𝟡 (inclusive)
     125                 :            : }
     126                 :            : 
     127                 :    5063140 : JL_DLLEXPORT int jl_id_start_char(uint32_t wc)
     128                 :            : {
     129   [ +  +  +  +  :    5063140 :     if ((wc >= 'A' && wc <= 'Z') || (wc >= 'a' && wc <= 'z') || wc == '_')
          +  +  +  +  +  
                      + ]
     130                 :    2229780 :         return 1;
     131   [ +  +  -  + ]:    2833360 :     if (wc < 0xA1 || wc > 0x10ffff)
     132                 :    2826020 :         return 0;
     133                 :       7326 :     return is_wc_cat_id_start(wc, utf8proc_category((utf8proc_int32_t) wc));
     134                 :            : }
     135                 :            : 
     136                 :   11660700 : JL_DLLEXPORT int jl_id_char(uint32_t wc)
     137                 :            : {
     138   [ +  +  +  +  :   11660700 :     if ((wc >= 'A' && wc <= 'Z') || (wc >= 'a' && wc <= 'z') || wc == '_' ||
          +  +  +  +  +  
                +  +  + ]
     139   [ +  +  +  + ]:    2335900 :         (wc >= '0' && wc <= '9') || wc == '!')
     140                 :    9493400 :         return 1;
     141   [ +  +  -  + ]:    2167300 :     if (wc < 0xA1 || wc > 0x10ffff)
     142                 :    2164440 :         return 0;
     143                 :       2848 :     utf8proc_category_t cat = utf8proc_category((utf8proc_int32_t) wc);
     144         [ +  + ]:       2848 :     if (is_wc_cat_id_start(wc, cat)) return 1;
     145   [ +  -  +  -  :       2008 :     if (cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_MC ||
                   +  - ]
     146   [ +  -  +  + ]:       2008 :         cat == UTF8PROC_CATEGORY_ND || cat == UTF8PROC_CATEGORY_PC ||
     147   [ +  -  +  + ]:       1922 :         cat == UTF8PROC_CATEGORY_SK || cat == UTF8PROC_CATEGORY_ME ||
     148         [ +  + ]:       1110 :         cat == UTF8PROC_CATEGORY_NO ||
     149                 :            :         // primes (single, double, triple, their reverses, and quadruple)
     150   [ +  +  -  + ]:       1110 :         (wc >= 0x2032 && wc <= 0x2037) || (wc == 0x2057))
     151                 :       1914 :         return 1;
     152                 :         94 :     return 0;
     153                 :            : }
     154                 :            : 
     155                 :            : #include "julia_opsuffs.h"
     156                 :            : 
     157                 :            : // chars that can follow an operator (e.g. +) and be parsed as part of the operator
     158                 :   11283040 : JL_DLLEXPORT int jl_op_suffix_char(uint32_t wc)
     159                 :            : {
     160                 :            :     static htable_t jl_opsuffs; // XXX: requires uv_once
     161         [ +  + ]:   11283040 :     if (!jl_opsuffs.size) { // initialize hash table of suffixes
     162                 :         30 :         size_t i, opsuffs_len = sizeof(opsuffs) / (sizeof(uint32_t));
     163                 :         30 :         htable_t *h = htable_new(&jl_opsuffs, opsuffs_len);
     164                 :            :         assert(sizeof(uint32_t) <= sizeof(void*));
     165         [ +  + ]:       3540 :         for (i = 0; i < opsuffs_len; ++i)
     166                 :       3510 :             wcharhash_put_r(h, (void*)((uintptr_t)opsuffs[i]), NULL, NULL);
     167                 :            :     }
     168   [ +  +  -  + ]:   11283040 :     if (wc < 0xA1 || wc > 0x10ffff) return 0;
     169                 :      25418 :     utf8proc_category_t cat = utf8proc_category((utf8proc_int32_t) wc);
     170   [ +  -  +  -  :      25418 :     if (cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_MC ||
                   -  + ]
     171                 :            :         cat == UTF8PROC_CATEGORY_ME)
     172                 :          0 :         return 1;
     173                 :            :     // use hash table of other allowed characters: primes and sub/superscripts
     174                 :      25418 :     return HT_NOTFOUND != wcharhash_get_r(&jl_opsuffs, (void*)((uintptr_t)wc), NULL);
     175                 :            : }
     176                 :            : 
     177                 :            : // chars that we will never allow to be part of a valid non-operator identifier
     178                 :       5410 : static int never_id_char(uint32_t wc)
     179                 :            : {
     180                 :       5410 :      utf8proc_category_t cat = utf8proc_category((utf8proc_int32_t) wc);
     181                 :            :      return (
     182                 :            :           // spaces and control characters:
     183   [ -  +  +  - ]:       5410 :           (cat >= UTF8PROC_CATEGORY_ZS && cat <= UTF8PROC_CATEGORY_CS) ||
     184                 :            : 
     185                 :            :           // ASCII and Latin1 non-connector punctuation
     186         [ +  - ]:       3564 :           (wc < 0xff &&
     187   [ +  +  -  + ]:       3564 :            cat >= UTF8PROC_CATEGORY_PD && cat <= UTF8PROC_CATEGORY_PO) ||
     188                 :            : 
     189         [ #  # ]:          0 :           wc == '`' ||
     190                 :            : 
     191                 :            :           // mathematical brackets
     192   [ #  #  #  # ]:          0 :           (wc >= 0x27e6 && wc <= 0x27ef) ||
     193                 :            :           // angle, corner, and lenticular brackets
     194   [ #  #  #  # ]:          0 :           (wc >= 0x3008 && wc <= 0x3011) ||
     195                 :            :           // tortoise shell, square, and more lenticular brackets
     196   [ #  #  #  # ]:          0 :           (wc >= 0x3014 && wc <= 0x301b) ||
     197                 :            :           // fullwidth parens
     198   [ +  +  -  -  :      10820 :           (wc == 0xff08 || wc == 0xff09) ||
                   -  - ]
     199                 :            :           // fullwidth square brackets
     200         [ #  # ]:          0 :           (wc == 0xff3b || wc == 0xff3d));
     201                 :            : }
     202                 :            : 
     203                 :          0 : value_t fl_julia_identifier_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     204                 :            : {
     205                 :          0 :     argcount(fl_ctx, "identifier-char?", nargs, 1);
     206   [ #  #  #  # ]:          0 :     if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != fl_ctx->wchartype)
     207                 :          0 :         type_error(fl_ctx, "identifier-char?", "wchar", args[0]);
     208                 :          0 :     uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0]));
     209         [ #  # ]:          0 :     return jl_id_char(wc) ? fl_ctx->T : fl_ctx->F;
     210                 :            : }
     211                 :            : 
     212                 :    4997500 : value_t fl_julia_identifier_start_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     213                 :            : {
     214                 :    4997500 :     argcount(fl_ctx, "identifier-start-char?", nargs, 1);
     215   [ +  -  -  + ]:    4997500 :     if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != fl_ctx->wchartype)
     216                 :          0 :         type_error(fl_ctx, "identifier-start-char?", "wchar", args[0]);
     217                 :    4997500 :     uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0]));
     218         [ +  + ]:    4997500 :     return jl_id_start_char(wc) ? fl_ctx->T : fl_ctx->F;
     219                 :            : }
     220                 :            : 
     221                 :       5410 : value_t fl_julia_never_identifier_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     222                 :            : {
     223                 :       5410 :     argcount(fl_ctx, "never-identifier-char?", nargs, 1);
     224   [ +  -  -  + ]:       5410 :     if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != fl_ctx->wchartype)
     225                 :          0 :         type_error(fl_ctx, "never-identifier-char?", "wchar", args[0]);
     226                 :       5410 :     uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0]));
     227         [ +  - ]:       5410 :     return never_id_char(wc) ? fl_ctx->T : fl_ctx->F;
     228                 :            : }
     229                 :            : 
     230                 :            : 
     231                 :    1267948 : value_t fl_julia_op_suffix_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     232                 :            : {
     233                 :    1267948 :     argcount(fl_ctx, "op-suffix-char?", nargs, 1);
     234   [ +  -  -  + ]:    1267948 :     if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != fl_ctx->wchartype)
     235                 :          0 :         type_error(fl_ctx, "op-suffix-char?", "wchar", args[0]);
     236                 :    1267948 :     uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0]));
     237         [ +  + ]:    1267948 :     return jl_op_suffix_char(wc) ? fl_ctx->T : fl_ctx->F;
     238                 :            : }
     239                 :            : 
     240                 :    4742820 : value_t fl_julia_strip_op_suffix(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     241                 :            : {
     242                 :    4742820 :     argcount(fl_ctx, "strip-op-suffix", nargs, 1);
     243         [ -  + ]:    4742820 :     if (!issymbol(args[0]))
     244                 :          0 :         type_error(fl_ctx, "strip-op-suffix", "symbol", args[0]);
     245                 :    4742820 :     char *op = symbol_name(fl_ctx, args[0]);
     246                 :    4742820 :     size_t i = 0;
     247         [ +  + ]:   14757600 :     while (op[i]) {
     248                 :   10015100 :         size_t j = i;
     249         [ +  + ]:   10015100 :         if (jl_op_suffix_char(u8_nextchar(op, &j)))
     250                 :        334 :             break;
     251                 :   10014760 :         i = j;
     252                 :            :     }
     253         [ +  + ]:    4742820 :     if (!op[i]) return args[0]; // no suffix to strip
     254         [ -  + ]:        334 :     if (!i) return args[0]; // only suffix chars --- might still be a valid identifier
     255                 :        334 :     char *opnew = strncpy((char*)malloc(i+1), op, i);
     256                 :            :     // TODO: if argument to opnew == NULL
     257                 :        334 :     opnew[i] = 0;
     258                 :        334 :     value_t opnew_symbol = symbol(fl_ctx, opnew);
     259                 :        334 :     free(opnew);
     260                 :        334 :     return opnew_symbol;
     261                 :            : }
     262                 :            : 
     263                 :            : /* check whether arg is a symbol that consists solely of underscores. */
     264                 :   31443600 : value_t fl_julia_underscore_symbolp(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     265                 :            : {
     266                 :   31443600 :     argcount(fl_ctx, "underscore-symbol?", nargs, 1);
     267         [ +  + ]:   31443600 :     if (!issymbol(args[0])) return fl_ctx->F;
     268                 :    8013020 :     char *op = symbol_name(fl_ctx, args[0]);
     269         [ -  + ]:    8013020 :     if (*op == '\0') return fl_ctx->F; // return false for empty symbol
     270         [ +  + ]:    8106600 :     while (*op == '_') ++op;
     271         [ +  + ]:    8013020 :     return *op ? fl_ctx->F : fl_ctx->T;
     272                 :            : }
     273                 :            : 
     274                 :            : #include "julia_charmap.h"
     275                 :            : 
     276                 :    1967496 : utf8proc_int32_t jl_charmap_map(utf8proc_int32_t c, void *ctx)
     277                 :            : {
     278                 :            :     static htable_t jl_charmap; // XXX: requires uv_once
     279         [ +  + ]:    1967496 :     if (!jl_charmap.size) { // initialize hash table
     280                 :         30 :         size_t i, charmap_len = sizeof(charmap) / (2*sizeof(uint32_t));
     281                 :         30 :         htable_t *h = htable_new(&jl_charmap, charmap_len);
     282                 :            :         assert(sizeof(uint32_t) <= sizeof(void*));
     283         [ +  + ]:        180 :         for (i = 0; i < charmap_len; ++i) {
     284                 :            :             /* Store charmap in a hash table.  Typecasting codepoints
     285                 :            :                directly to pointer keys works because pointers are at
     286                 :            :                least 32 bits on all Julia-supported systems, and because
     287                 :            :                we never map anything to U+0001 (since HT_NOTFOUND is (void*)1). */
     288         [ -  + ]:        150 :             assert((void*)(uintptr_t)charmap[i][1] != HT_NOTFOUND);
     289                 :        150 :             wcharhash_put_r(h, (void*)((uintptr_t)charmap[i][0]),
     290                 :        150 :                                (void*)((uintptr_t)charmap[i][1]), NULL);
     291                 :            :         }
     292                 :            :     }
     293                 :    1967496 :     void *v = wcharhash_get_r(&jl_charmap, (void*)((uintptr_t)c), NULL);
     294         [ -  + ]:    1967496 :     return v == HT_NOTFOUND ? c : (utf8proc_int32_t) ((uintptr_t) v);
     295                 :            : }
     296                 :            : 
     297                 :            : // return NFC-normalized UTF8-encoded version of s, with
     298                 :            : // additional custom normalizations defined by jl_charmap above.
     299                 :     660282 : static char *normalize(fl_context_t *fl_ctx, char *s)
     300                 :            : {
     301                 :            :     // options equivalent to utf8proc_NFC:
     302                 :     660282 :     const int options = UTF8PROC_NULLTERM|UTF8PROC_STABLE|UTF8PROC_COMPOSE;
     303                 :            :     ssize_t result;
     304                 :            :     size_t newlen;
     305                 :     660282 :     result = utf8proc_decompose_custom((uint8_t*) s, 0, NULL, 0, (utf8proc_option_t)options,
     306                 :            :                                        jl_charmap_map, NULL);
     307         [ -  + ]:     660282 :     if (result < 0) goto error;
     308                 :     660282 :     newlen = result * sizeof(int32_t) + 1;
     309         [ +  + ]:     660282 :     if (newlen > fl_ctx->jlbuflen) {
     310                 :         40 :         fl_ctx->jlbuflen = newlen * 2;
     311                 :         40 :         fl_ctx->jlbuf = realloc(fl_ctx->jlbuf, fl_ctx->jlbuflen);
     312         [ -  + ]:         40 :         if (!fl_ctx->jlbuf) lerror(fl_ctx, fl_ctx->OutOfMemoryError, "error allocating UTF8 buffer");
     313                 :            :     }
     314                 :     660282 :     result = utf8proc_decompose_custom((uint8_t*)s,0, (int32_t*)fl_ctx->jlbuf,result, (utf8proc_option_t)options,
     315                 :            :                                        jl_charmap_map, NULL);
     316         [ -  + ]:     660282 :     if (result < 0) goto error;
     317                 :     660282 :     result = utf8proc_reencode((int32_t*)fl_ctx->jlbuf,result, (utf8proc_option_t)options);
     318         [ -  + ]:     660282 :     if (result < 0) goto error;
     319                 :     660282 :     return (char*) fl_ctx->jlbuf;
     320                 :          0 : error:
     321                 :          0 :     lerrorf(fl_ctx, symbol(fl_ctx, "error"), "error normalizing identifier %s: %s", s,
     322                 :            :             utf8proc_errmsg(result));
     323                 :            : }
     324                 :            : 
     325                 :    2165000 : value_t fl_accum_julia_symbol(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     326                 :            : {
     327                 :    2165000 :     argcount(fl_ctx, "accum-julia-symbol", nargs, 2);
     328                 :    2165000 :     ios_t *s = fl_toiostream(fl_ctx, args[1], "accum-julia-symbol");
     329   [ +  -  -  + ]:    2165000 :     if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != fl_ctx->wchartype)
     330                 :          0 :         type_error(fl_ctx, "accum-julia-symbol", "wchar", args[0]);
     331                 :    2165000 :     uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0])); // peek the first character we'll read
     332                 :            :     ios_t str;
     333                 :    2165000 :     int allascii = 1;
     334                 :    2165000 :     ios_mem(&str, 0);
     335                 :            :     do {
     336                 :   11362140 :         ios_getutf8(s, &wc);
     337         [ +  + ]:   11362140 :         if (wc == '!') {
     338                 :      32454 :             uint32_t nwc = 0;
     339                 :      32454 :             ios_peekutf8(s, &nwc);
     340                 :            :             // make sure != is always an operator
     341         [ +  + ]:      32454 :             if (nwc == '=') {
     342                 :         46 :                 ios_skip(s, -1);
     343                 :         46 :                 break;
     344                 :            :             }
     345                 :            :         }
     346                 :   11362100 :         allascii &= (wc <= 0x7f);
     347                 :   11362100 :         ios_pututf8(&str, wc);
     348         [ +  + ]:   11362100 :         if (safe_peekutf8(fl_ctx, s, &wc) == IOS_EOF)
     349                 :        432 :             break;
     350         [ +  + ]:   11361680 :     } while (jl_id_char(wc));
     351                 :    2165000 :     ios_pututf8(&str, 0);
     352         [ +  + ]:    2165000 :     return symbol(fl_ctx, allascii ? str.buf : normalize(fl_ctx, str.buf));
     353                 :            : }
     354                 :            : 
     355                 :            : /* convert a string to a symbol, first applying normalization */
     356                 :     654072 : value_t fl_string2normsymbol(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     357                 :            : {
     358                 :     654072 :     argcount(fl_ctx, "string->normsymbol", nargs, 1);
     359         [ -  + ]:     654072 :     if (!fl_isstring(fl_ctx, args[0]))
     360                 :          0 :         type_error(fl_ctx, "string->normsymbol", "string", args[0]);
     361                 :     654072 :     return symbol(fl_ctx, normalize(fl_ctx, (char*)cvalue_data(args[0])));
     362                 :            : }
     363                 :            : 
     364                 :         40 : static uint32_t _iterate_continued(uint8_t *s, size_t n, size_t *i, uint32_t u) {
     365         [ -  + ]:         40 :     if (u < 0xc0000000) { ++*i; return u; }
     366                 :            :     uint8_t b;
     367                 :            : 
     368         [ -  + ]:         40 :     if (++*i >= n) return u;
     369                 :         40 :     b = s[*i]; // cont byte 1
     370         [ -  + ]:         40 :     if ((b & 0xc0) != 0x80) return u;
     371                 :         40 :     u |= (uint32_t)b << 16;
     372                 :            : 
     373   [ +  +  -  + ]:         40 :     if (++*i >= n || u < 0xe0000000) return u;
     374                 :         28 :     b = s[*i]; // cont byte 2
     375         [ -  + ]:         28 :     if ((b & 0xc0) != 0x80) return u;
     376                 :         28 :     u |= (uint32_t)b << 8;
     377                 :            : 
     378   [ +  +  -  + ]:         28 :     if (++*i >= n || u < 0xf0000000) return u;
     379                 :         12 :     b = s[*i]; // cont byte 3
     380         [ -  + ]:         12 :     if ((b & 0xc0) != 0x80) return u;
     381                 :         12 :     u |= (uint32_t)b; ++*i;
     382                 :            : 
     383                 :         12 :     return u;
     384                 :            : }
     385                 :            : 
     386                 :       1336 : static uint32_t _string_only_julia_char(uint8_t *s, size_t n) {
     387   [ +  -  -  + ]:       1336 :     if (!(0 < n && n <= 4))
     388                 :          0 :         return -1;
     389                 :       1336 :     size_t i = 0;
     390                 :       1336 :     uint8_t b = s[i];
     391                 :       1336 :     uint32_t u = (uint32_t)b << 24;
     392   [ +  +  +  - ]:       1336 :     if (0x80 <= b && b <= 0xf7)
     393                 :         40 :         u = _iterate_continued(s, n, &i, u);
     394                 :            :     else
     395                 :       1296 :         i = 1;
     396         [ -  + ]:       1336 :     if (i < n)
     397                 :          0 :         return -1;
     398                 :       1336 :     return u;
     399                 :            : }
     400                 :            : 
     401                 :       1336 : value_t fl_string_only_julia_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) {
     402                 :       1336 :     argcount(fl_ctx, "string.only-julia-char", nargs, 1);
     403         [ -  + ]:       1336 :     if (!fl_isstring(fl_ctx, args[0]))
     404                 :          0 :         type_error(fl_ctx, "string.only-julia-char", "string", args[0]);
     405                 :       1336 :     uint8_t *s = (uint8_t*)cvalue_data(args[0]);
     406                 :       1336 :     size_t len = cv_len((cvalue_t*)ptr(args[0]));
     407                 :       1336 :     uint32_t u = _string_only_julia_char(s, len);
     408         [ -  + ]:       1336 :     if (u == (uint32_t)-1)
     409                 :          0 :         return fl_ctx->F;
     410                 :       1336 :     return fl_list2(fl_ctx, fl_ctx->jl_char_sym, mk_uint32(fl_ctx, u));
     411                 :            : }
     412                 :            : 
     413                 :            : static const builtinspec_t julia_flisp_func_info[] = {
     414                 :            :     { "skip-ws", fl_skipws },
     415                 :            :     { "accum-julia-symbol", fl_accum_julia_symbol },
     416                 :            :     { "identifier-char?", fl_julia_identifier_char },
     417                 :            :     { "identifier-start-char?", fl_julia_identifier_start_char },
     418                 :            :     { "never-identifier-char?", fl_julia_never_identifier_char },
     419                 :            :     { "op-suffix-char?", fl_julia_op_suffix_char },
     420                 :            :     { "strip-op-suffix", fl_julia_strip_op_suffix },
     421                 :            :     { "underscore-symbol?", fl_julia_underscore_symbolp },
     422                 :            :     { "string->normsymbol", fl_string2normsymbol },
     423                 :            :     { "string.only-julia-char", fl_string_only_julia_char },
     424                 :            :     { NULL, NULL }
     425                 :            : };
     426                 :            : 
     427                 :         30 : void fl_init_julia_extensions(fl_context_t *fl_ctx)
     428                 :            : {
     429                 :         30 :     assign_global_builtins(fl_ctx, julia_flisp_func_info);
     430                 :         30 : }
     431                 :            : 
     432                 :            : #ifdef __cplusplus
     433                 :            : }
     434                 :            : #endif

Generated by: LCOV version 1.14