GCC Code Coverage Report
Directory: ../src/ Exec Total Coverage
File: /home/joels/Current/lispbm/src/extensions/string_extensions.c Lines: 386 422 91.5 %
Date: 2024-12-05 14:36:58 Branches: 217 254 85.4 %

Line Branch Exec Source
1
/*
2
    Copyright 2022, 2023, 2024 Joel Svensson        svenssonjoel@yahoo.se
3
    Copyright 2022, 2023       Benjamin Vedder
4
    Copyright             2024 Rasmus Söderhielm    rasmus.soderhielm@gmail.com
5
6
    This program is free software: you can redistribute it and/or modify
7
    it under the terms of the GNU General Public License as published by
8
    the Free Software Foundation, either version 3 of the License, or
9
    (at your option) any later version.
10
11
    This program is distributed in the hope that it will be useful,
12
    but WITHOUT ANY WARRANTY; without even the implied warranty of
13
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
    GNU General Public License for more details.
15
16
    You should have received a copy of the GNU General Public License
17
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
*/
19
20
#include "extensions.h"
21
#include "lbm_memory.h"
22
#include "heap.h"
23
#include "fundamental.h"
24
#include "lbm_c_interop.h"
25
#include "eval_cps.h"
26
#include "print.h"
27
28
#include <ctype.h>
29
30
#ifndef MIN
31
#define MIN(a,b) (((a)<(b))?(a):(b))
32
#endif
33
#ifndef MAX
34
#define MAX(a,b) (((a)>(b))?(a):(b))
35
#endif
36
37
static char print_val_buffer[256];
38
39
static lbm_uint sym_left;
40
static lbm_uint sym_case_insensitive;
41
42
43
384352
static size_t strlen_max(const char *s, size_t maxlen) {
44
  size_t i;
45
1219864
  for (i = 0; i < maxlen; i ++) {
46
1219836
    if (s[i] == 0) break;
47
  }
48
384352
  return i;
49
}
50
51
192580
static bool dec_str_size(lbm_value v, char **data, size_t *size) {
52
192580
  bool result = false;
53
192580
  if (lbm_is_array_r(v)) {
54
192524
      lbm_array_header_t *array = (lbm_array_header_t*) lbm_car(v);
55
192524
      *data = (char*)array->data;
56
192524
      *size = array->size;
57
192524
      result = true;
58
  }
59
192580
  return result;
60
}
61
62
40154
static lbm_value ext_str_from_n(lbm_value *args, lbm_uint argn) {
63

40154
  if (argn != 1 && argn != 2) {
64
84
    lbm_set_error_reason((char*)lbm_error_str_num_args);
65
84
    return ENC_SYM_EERROR;
66
  }
67
40070
  if (!lbm_is_number(args[0])) {
68
56
    return ENC_SYM_TERROR;
69
  }
70
71

40014
  if (argn == 2 && !lbm_is_array_r(args[1])) {
72
28
    return ENC_SYM_TERROR;
73
  }
74
75
39986
  char *format = 0;
76
39986
  if (argn == 2) {
77
84
    format = lbm_dec_str(args[1]);
78
  }
79
80
  char buffer[100];
81
39986
  size_t len = 0;
82
83
39986
  switch (lbm_type_of(args[0])) {
84
224
  case LBM_TYPE_DOUBLE: /* fall through */
85
  case LBM_TYPE_FLOAT:
86
224
    if (!format) {
87
168
      format = "%g";
88
    }
89
224
    len = (size_t)snprintf(buffer, sizeof(buffer), format, lbm_dec_as_double(args[0]));
90
224
    break;
91
92
39762
  default:
93
39762
    if (!format) {
94
39734
      format = "%d";
95
    }
96
39762
    len = (size_t)snprintf(buffer, sizeof(buffer), format, lbm_dec_as_i32(args[0]));
97
39762
    break;
98
  }
99
100
39986
  len = MIN(len, sizeof(buffer));
101
102
  lbm_value res;
103
39986
  if (lbm_create_array(&res, len + 1)) {
104
39956
    lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(res);
105
39956
    memcpy(arr->data, buffer, len);
106
39956
    ((char*)(arr->data))[len] = '\0';
107
39956
    return res;
108
  } else {
109
30
    return ENC_SYM_MERROR;
110
  }
111
}
112
113
// signature: (str-join strings [delim]) -> str
114
96080
static lbm_value ext_str_join(lbm_value *args, lbm_uint argn) {
115
  // This function does not check that the string arguments contain any
116
  // terminating null bytes.
117
118

96080
  if (argn != 1 && argn != 2) {
119
28
    lbm_set_error_reason((char *)lbm_error_str_num_args);
120
28
    return ENC_SYM_EERROR;
121
  }
122
123
96052
  size_t str_len   = 0;
124
96052
  size_t str_count = 0;
125
96052
  if (!lbm_is_list(args[0])) {
126
28
    lbm_set_error_reason((char *)lbm_error_str_incorrect_arg);
127
28
    lbm_set_error_suspect(args[0]);
128
28
    return ENC_SYM_TERROR;
129
  }
130
288128
  for (lbm_value current = args[0]; lbm_is_cons(current); current = lbm_cdr(current)) {
131
192132
    lbm_value car_val = lbm_car(current);
132
192132
    char *str = NULL;
133
192132
    size_t arr_size = 0;
134
192132
    if (dec_str_size(car_val, &str, &arr_size)) {
135
192104
      str_len += strlen_max(str, arr_size);
136
192104
      str_count += 1;
137
    } else {
138
28
      lbm_set_error_reason((char *)lbm_error_str_incorrect_arg);
139
28
      lbm_set_error_suspect(args[0]);
140
28
      return ENC_SYM_TERROR;
141
    }
142
  }
143
144
95996
  const char *delim = "";
145
95996
  if (argn >= 2) {
146
84264
    delim = lbm_dec_str(args[1]);
147
84264
    if (!delim) {
148
28
      lbm_set_error_reason((char *)lbm_error_str_incorrect_arg);
149
28
      lbm_set_error_suspect(args[1]);
150
28
      return ENC_SYM_TERROR;
151
    }
152
  }
153
154
95968
  size_t delim_len = strlen(delim);
155
95968
  if (str_count > 0) {
156
67880
    str_len += (str_count - 1) * delim_len;
157
  }
158
159
  lbm_value result;
160
95968
  if (!lbm_create_array(&result, str_len + 1)) {
161
96
    return ENC_SYM_MERROR;
162
  }
163
95872
  char *result_str = lbm_dec_str(result);
164
165
95872
  size_t i      = 0;
166
95872
  size_t offset = 0;
167
287728
  for (lbm_value current = args[0]; lbm_is_cons(current); current = lbm_cdr(current)) {
168
191856
    lbm_value car_val = lbm_car(current);
169
    // All arrays have been prechecked.
170
191856
    lbm_array_header_t *array = (lbm_array_header_t*) lbm_car(car_val);
171
191856
    char *str = (char*)array->data;
172
191856
    size_t len = strlen_max(str, array->size);
173
174
191856
    memcpy(result_str + offset, str, len);
175
191856
    offset += len;
176
177
191856
    if (i != str_count - 1) {
178
124040
      memcpy(result_str + offset, delim, delim_len);
179
124040
      offset += delim_len;
180
    }
181
191856
    i++;
182
  }
183
184
95872
  result_str[str_len] = '\0';
185
186
95872
  return result;
187
}
188
189
448
static lbm_value ext_str_to_i(lbm_value *args, lbm_uint argn) {
190

448
  if (argn != 1 && argn != 2) {
191
28
    lbm_set_error_reason((char*)lbm_error_str_num_args);
192
28
    return ENC_SYM_EERROR;
193
  }
194
195
420
  char *str = lbm_dec_str(args[0]);
196
420
  if (!str) {
197
28
    return ENC_SYM_TERROR;
198
  }
199
200
392
  int base = 0;
201
392
  if (argn == 2) {
202
28
    if (!lbm_is_number(args[1])) {
203
      return ENC_SYM_TERROR;
204
    }
205
206
28
    base = (int)lbm_dec_as_u32(args[1]);
207
  }
208
209
392
  return lbm_enc_i32((int32_t)strtol(str, NULL, base));
210
}
211
212
112
static lbm_value ext_str_to_f(lbm_value *args, lbm_uint argn) {
213
112
  if (argn != 1) {
214
28
    lbm_set_error_reason((char*)lbm_error_str_num_args);
215
28
    return ENC_SYM_EERROR;
216
  }
217
218
84
  char *str = lbm_dec_str(args[0]);
219
84
  if (!str) {
220
28
    return ENC_SYM_TERROR;
221
  }
222
223
56
  return lbm_enc_float(strtof(str, NULL));
224
}
225
226
112
static lbm_value ext_str_part(lbm_value *args, lbm_uint argn) {
227

112
  if ((argn != 2 && argn != 3) || !lbm_is_number(args[1])) {
228
28
    lbm_set_error_reason((char*)lbm_error_str_num_args);
229
28
    return ENC_SYM_TERROR;
230
  }
231
232
84
  size_t str_arr_len = 0;
233
84
  char *str = NULL;//lbm_dec_str(args[0]);
234
84
  if (!dec_str_size(args[0], &str, &str_arr_len)) {
235
    return ENC_SYM_TERROR;
236
  }
237
238
84
  uint32_t len = (uint32_t)strlen_max(str, str_arr_len);
239
240
84
  uint32_t start = lbm_dec_as_u32(args[1]);
241
242
84
  if (start >= len) {
243
    return ENC_SYM_EERROR;
244
  }
245
246
84
  uint32_t n = len - start;
247
84
  if (argn == 3) {
248
56
    if (!lbm_is_number(args[2])) {
249
      return ENC_SYM_TERROR;
250
    }
251
252
56
    n = MIN(lbm_dec_as_u32(args[2]), n);
253
  }
254
255
  lbm_value res;
256
84
  if (lbm_create_array(&res, n + 1)) {
257
84
    lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(res);
258
84
    memcpy(arr->data, str + start, n);
259
84
    ((char*)(arr->data))[n] = '\0';
260
84
    return res;
261
  } else {
262
    return ENC_SYM_MERROR;
263
  }
264
}
265
266
140
static lbm_value ext_str_split(lbm_value *args, lbm_uint argn) {
267
140
  if (argn != 2) {
268
28
    lbm_set_error_reason((char*)lbm_error_str_num_args);
269
28
    return ENC_SYM_EERROR;
270
  }
271
272
112
  size_t str_arr_size = 0;
273
112
  char *str = NULL; //lbm_dec_str(args[0]);
274
112
  if (!dec_str_size(args[0], &str, &str_arr_size)) {
275
    return ENC_SYM_TERROR;
276
  }
277
278
112
  char *split = lbm_dec_str(args[1]);
279
112
  int step = 0;
280
112
  if (!split) {
281
56
    if (lbm_is_number(args[1])) {
282
56
      step = MAX(lbm_dec_as_i32(args[1]), 1);
283
56
      lbm_value res = ENC_SYM_NIL;
284
56
      int len = (int)strlen_max(str, str_arr_size);
285
616
      for (int i = len / step;i >= 0;i--) {
286
560
        int ind_now = i * step;
287
560
        if (ind_now >= len) {
288
28
          continue;
289
        }
290
291
532
        int step_now = step;
292
560
        while ((ind_now + step_now) > len) {
293
28
          step_now--;
294
        }
295
296
        lbm_value tok;
297
532
        if (lbm_create_array(&tok, (lbm_uint)step_now + 1)) {
298
532
          lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(tok);
299
532
          memcpy(arr->data, str + ind_now, (unsigned int)step_now);
300
532
          ((char*)(arr->data))[step_now] = '\0';
301
532
          res = lbm_cons(tok, res);
302
        } else {
303
          return ENC_SYM_MERROR;
304
        }
305
      }
306
56
      return res;
307
    } else {
308
      return ENC_SYM_TERROR;
309
    }
310
  } else {
311
56
     lbm_value res = ENC_SYM_NIL;
312
56
    const char *s = str;
313
280
    while (*(s += strspn(s, split)) != '\0') {
314
224
      size_t len = strcspn(s, split);
315
316
      lbm_value tok;
317
224
      if (lbm_create_array(&tok, len + 1)) {
318
224
        lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(tok);
319
224
        memcpy(arr->data, s, len);
320
224
        ((char*)(arr->data))[len] = '\0';
321
224
        res = lbm_cons(tok, res);
322
      } else {
323
        return ENC_SYM_MERROR;
324
      }
325
224
      s += len;
326
    }
327
56
    return lbm_list_destructive_reverse(res);
328
  }
329
}
330
331
// Todo: Clean this up for 64bit
332
84
static lbm_value ext_str_replace(lbm_value *args, lbm_uint argn) {
333

84
  if (argn != 2 && argn != 3) {
334
28
    lbm_set_error_reason((char*)lbm_error_str_num_args);
335
28
    return ENC_SYM_EERROR;
336
  }
337
338
56
  size_t orig_arr_size = 0;
339
56
  char *orig = NULL; // lbm_dec_str(args[0]);
340
56
  if (!dec_str_size(args[0], &orig, &orig_arr_size)) {
341
    return ENC_SYM_TERROR;
342
  }
343
344
56
  size_t rep_arr_size = 0;
345
56
  char *rep = NULL; //lbm_dec_str(args[1]);
346
56
  if (!dec_str_size(args[1], &rep, &rep_arr_size)) {
347
    return ENC_SYM_TERROR;
348
  }
349
350
56
  size_t with_arr_size = 0;
351
56
  char *with = "";
352
56
  if (argn == 3) {
353
28
    if (!dec_str_size(args[2], &with, &with_arr_size)) {
354
      return ENC_SYM_TERROR;
355
    }
356
  }
357
358
  // See https://stackoverflow.com/questions/779875/what-function-is-to-replace-a-substring-from-a-string-in-c
359
  //char *result; // the return string
360
  char *ins;    // the next insert point
361
  char *tmp;    // varies
362
  size_t len_rep;  // length of rep (the string to remove)
363
  size_t len_with; // length of with (the string to replace rep with)
364
  size_t len_front; // distance between rep and end of last rep
365
  int count;    // number of replacements
366
367
56
  len_rep = strlen_max(rep, rep_arr_size);
368
56
  if (len_rep == 0) {
369
    return args[0]; // empty rep causes infinite loop during count
370
  }
371
372
56
  len_with = strlen_max(with,with_arr_size);
373
374
  // count the number of replacements needed
375
56
  ins = orig;
376
112
  for (count = 0; (tmp = strstr(ins, rep)); ++count) {
377
56
    ins = tmp + len_rep;
378
  }
379
380
56
  size_t len_res = strlen_max(orig, orig_arr_size) + (len_with - len_rep) * (unsigned int)count + 1;
381
  lbm_value lbm_res;
382
56
  if (lbm_create_array(&lbm_res, len_res)) {
383
56
    lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(lbm_res);
384
56
    tmp = (char*)arr->data;
385
  } else {
386
    return ENC_SYM_MERROR;
387
  }
388
389
  // first time through the loop, all the variable are set correctly
390
  // from here on,
391
  //    tmp points to the end of the result string
392
  //    ins points to the next occurrence of rep in orig
393
  //    orig points to the remainder of orig after "end of rep"
394
112
  while (count--) {
395
56
    ins = strstr(orig, rep);
396
56
    len_front = (size_t)ins - (size_t)orig;
397
56
    tmp = strncpy(tmp, orig, len_front) + len_front;
398
56
    tmp = strncpy(tmp, with, len_with) + len_with;
399
56
    orig += len_front + len_rep; // move to next "end of rep"
400
  }
401
56
  strcpy(tmp, orig);
402
403
56
  return lbm_res;
404
}
405
406
112
static lbm_value change_case(lbm_value *args, lbm_uint argn, bool to_upper) {
407
112
  if (argn != 1) {
408
56
    lbm_set_error_reason((char*)lbm_error_str_num_args);
409
56
    return ENC_SYM_EERROR;
410
  }
411
412
56
  size_t orig_arr_size = 0;
413
56
  char *orig = NULL; //lbm_dec_str(args[0]);
414
56
  if (!dec_str_size(args[0], &orig, &orig_arr_size)) {
415
    return ENC_SYM_TERROR;
416
  }
417
418
56
  size_t len = strlen_max(orig,orig_arr_size);
419
  lbm_value lbm_res;
420
56
  if (lbm_create_array(&lbm_res, len + 1)) {
421
56
    lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(lbm_res);
422
336
    for (unsigned int i = 0;i < len;i++) {
423
280
      if (to_upper) {
424
140
	((char*)(arr->data))[i] = (char)toupper(orig[i]);
425
      } else {
426
140
	((char*)(arr->data))[i] = (char)tolower(orig[i]);
427
      }
428
    }
429
56
    ((char*)(arr->data))[len] = '\0';
430
56
    return lbm_res;
431
  } else {
432
    return ENC_SYM_MERROR;
433
  }
434
}
435
436
56
static lbm_value ext_str_to_lower(lbm_value *args, lbm_uint argn) {
437
56
  return change_case(args, argn, false);
438
}
439
440
56
static lbm_value ext_str_to_upper(lbm_value *args, lbm_uint argn) {
441
56
  return change_case(args,argn, true);
442
}
443
444
336
static lbm_value ext_str_cmp(lbm_value *args, lbm_uint argn) {
445

336
  if (argn != 2 && argn != 3) {
446
28
    lbm_set_error_reason((char*)lbm_error_str_num_args);
447
28
    return ENC_SYM_EERROR;
448
  }
449
450
308
  char *str1 = lbm_dec_str(args[0]);
451
308
  if (!str1) {
452
28
    return ENC_SYM_TERROR;
453
  }
454
455
280
  char *str2 = lbm_dec_str(args[1]);
456
280
  if (!str2) {
457
28
    return ENC_SYM_TERROR;
458
  }
459
460
252
  int n = -1;
461
252
  if (argn == 3) {
462
28
    if (!lbm_is_number(args[2])) {
463
28
      return ENC_SYM_TERROR;
464
    }
465
466
    n = lbm_dec_as_i32(args[2]);
467
  }
468
469
224
  if (n > 0) {
470
    return lbm_enc_i(strncmp(str1, str2, (unsigned int)n));
471
  } else {
472
224
    return lbm_enc_i(strcmp(str1, str2));
473
  }
474
}
475
476
// TODO: This is very similar to ext-print. Maybe they can share code.
477
980
static lbm_value to_str(char *delimiter, lbm_value *args, lbm_uint argn) {
478
980
  const int str_len = 300;
479
980
  char *str = lbm_malloc((lbm_uint)str_len);
480
980
  if (!str) {
481
    return ENC_SYM_MERROR;
482
  }
483
484
980
  int str_ofs = 0;
485
486
2268
  for (lbm_uint i = 0; i < argn; i ++) {
487
1288
    lbm_value t = args[i];
488
1288
    int max = str_len - str_ofs - 1;
489
490
    char *arr_str;
491
1288
    int chars = 0;
492
493
1288
    if (lbm_value_is_printable_string(t, &arr_str)) {
494
252
      if (str_ofs == 0) {
495
168
        chars = snprintf(str + str_ofs, (unsigned int)max, "%s", arr_str);
496
      } else {
497
84
        chars = snprintf(str + str_ofs, (unsigned int)max, "%s%s", delimiter, arr_str);
498
      }
499
    } else {
500
1036
      lbm_print_value(print_val_buffer, 256, t);
501
1036
      if (str_ofs == 0) {
502
812
        chars = snprintf(str + str_ofs, (unsigned int)max, "%s", print_val_buffer);
503
      } else {
504
224
        chars = snprintf(str + str_ofs, (unsigned int)max, "%s%s", delimiter, print_val_buffer);
505
      }
506
    }
507
1288
    if (chars >= max) {
508
      str_ofs += max;
509
    } else {
510
1288
      str_ofs += chars;
511
      }
512
  }
513
514
  lbm_value res;
515
980
  if (lbm_create_array(&res, (lbm_uint)str_ofs + 1)) {
516
980
    lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(res);
517
980
    strncpy((char*)arr->data, str, (unsigned int)str_ofs + 1);
518
980
    lbm_free(str);
519
980
    return res;
520
  } else {
521
    lbm_free(str);
522
    return ENC_SYM_MERROR;
523
  }
524
}
525
526
952
static lbm_value ext_to_str(lbm_value *args, lbm_uint argn) {
527
952
  return to_str(" ", args, argn);
528
}
529
530
28
static lbm_value ext_to_str_delim(lbm_value *args, lbm_uint argn) {
531
28
  if (argn < 1) {
532
    lbm_set_error_reason((char*)lbm_error_str_num_args);
533
    return ENC_SYM_EERROR;
534
  }
535
536
28
  char *delim = lbm_dec_str(args[0]);
537
28
  if (!delim) {
538
    return ENC_SYM_TERROR;
539
  }
540
541
28
  return to_str(delim, args + 1, argn - 1);
542
}
543
544
84
static lbm_value ext_str_len(lbm_value *args, lbm_uint argn) {
545
84
  LBM_CHECK_ARGN(1);
546
547
56
  size_t str_arr_size = 0;
548
56
  char *str = NULL; //lbm_dec_str(args[0]);
549
56
  if (!dec_str_size(args[0], &str, &str_arr_size)) {
550
28
    return ENC_SYM_TERROR;
551
  }
552
553
28
  return lbm_enc_i((int)strlen_max(str, str_arr_size));
554
}
555
556
84
static lbm_value ext_str_replicate(lbm_value *args, lbm_uint argn) {
557
84
  if (argn != 2) {
558
56
    lbm_set_error_reason((char*)lbm_error_str_num_args);
559
56
    return ENC_SYM_EERROR;
560
  }
561
562
28
  lbm_value res = ENC_SYM_TERROR;
563
564

56
  if (lbm_is_number(args[0]) &&
565
28
      lbm_is_number(args[1])) {
566
28
    uint32_t len = lbm_dec_as_u32(args[0]);
567
28
    uint8_t c = lbm_dec_as_char(args[1]);
568
569
    lbm_value lbm_res;
570
28
    if (lbm_create_array(&lbm_res, len + 1)) {
571
28
      lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(lbm_res);
572
140
      for (unsigned int i = 0;i < len;i++) {
573
112
        ((char*)(arr->data))[i] = (char)c;
574
      }
575
28
      ((char*)(arr->data))[len] = '\0';
576
28
      res = lbm_res;
577
    } else {
578
      res = ENC_SYM_MERROR;
579
    }
580
  }
581
28
  return res;
582
}
583
584
756
bool ci_strncmp(const char *str1, const char *str2,int n) {
585
756
  bool res = true;
586
1400
  for (int i = 0; i < n; i ++) {
587
1148
    if (tolower(str1[i]) != tolower(str2[i])) {
588
504
      res = false;
589
504
      break;
590
    }
591
  }
592
756
  return res;
593
}
594
595
// signature: (str-find str:byte-array substr [start:int] [occurrence:int] [dir] [case_sensitivity]) -> int
596
// where
597
//   seq = string|(..string)
598
//   dir = 'left|'right
599
//   case_sensitivity = 'case-sensitive | 'case-insensitive
600
952
static lbm_value ext_str_find(lbm_value *args, lbm_uint argn) {
601

952
  if (argn < 2 || 6 < argn) {
602
28
    lbm_set_error_reason((char *)lbm_error_str_num_args);
603
28
    return ENC_SYM_EERROR;
604
  }
605
924
  if (!lbm_is_array_r(args[0])) {
606
28
    lbm_set_error_suspect(args[0]);
607
28
    lbm_set_error_reason((char *)lbm_error_str_incorrect_arg);
608
28
    return ENC_SYM_TERROR;
609
  }
610
611
896
  lbm_array_header_t *str_header = (lbm_array_header_t *)lbm_car(args[0]);
612
896
  const char *str   = (const char *)str_header->data;
613
896
  lbm_int str_size = (lbm_int)str_header->size;
614
615
  // Guaranteed to be list containing strings.
616
  lbm_value substrings;
617
896
  lbm_int min_substr_len = LBM_INT_MAX;
618
896
  if (lbm_is_array_r(args[1])) {
619
728
    substrings = lbm_cons(args[1], ENC_SYM_NIL);
620
728
    if (substrings == ENC_SYM_MERROR) {
621
      return ENC_SYM_MERROR;
622
    }
623
728
    lbm_array_header_t *header = (lbm_array_header_t *)lbm_car(args[1]);
624
625
728
    lbm_int len = (lbm_int)header->size - 1;
626
728
    if (len < 0) {
627
      // substr is zero length array
628
      return lbm_enc_i(-1);
629
    }
630
728
    min_substr_len = len;
631
168
  } else if (lbm_is_list(args[1])) {
632
392
    for (lbm_value current = args[1]; lbm_is_cons(current); current = lbm_cdr(current)) {
633
224
      lbm_value car_val = lbm_car(current);
634
224
      if (!lbm_is_array_r(car_val)) {
635
        lbm_set_error_suspect(args[1]);
636
        lbm_set_error_reason((char *)lbm_error_str_incorrect_arg);
637
        return ENC_SYM_TERROR;
638
      }
639
640
224
      lbm_array_header_t *header = (lbm_array_header_t *)lbm_car(car_val);
641
642
224
      lbm_int len = (lbm_int)header->size - 1;
643
224
      if (len < 0) {
644
        // substr is zero length array
645
        continue;
646
      }
647
224
      if (len < min_substr_len) {
648
140
        min_substr_len = len;
649
      }
650
    }
651
168
    substrings = args[1];
652
  } else {
653
    lbm_set_error_suspect(args[1]);
654
    lbm_set_error_reason((char *)lbm_error_str_incorrect_arg);
655
    return ENC_SYM_TERROR;
656
  }
657
658
896
  bool to_right    = true;
659
896
  bool case_sensitive = true;
660
661
896
  int nums[2] = {0, 0};
662
896
  bool nums_set[2] = {false, false};
663
896
  int num_ix = 0;
664
665
666
3836
  for (int i = 0; i < (int)argn; i ++ ) {
667

2940
    if (lbm_is_number(args[i]) && num_ix < 2) {
668
644
      nums_set[num_ix] = true;
669
644
      nums[num_ix++] = lbm_dec_as_int(args[i]);
670
    }
671
2940
    if (lbm_is_symbol(args[i])) {
672
532
      lbm_uint symbol = lbm_dec_sym(args[i]);
673
532
      if (symbol == sym_left) {
674
280
	to_right = false;
675
252
      } else if (symbol == sym_case_insensitive) {
676
196
	case_sensitive = false;
677
      }
678
    }
679
  }
680
681
896
  uint32_t occurrence = 0;
682
896
  lbm_int start = to_right ? 0 : str_size - min_substr_len;
683
896
  if (nums_set[0]) {
684
504
    start = nums[0];
685
  }
686
896
  if (nums_set[1]) {
687
140
    occurrence = (uint32_t)nums[1];
688
  }
689
690
896
  if (start < 0) {
691
    // start: -1 starts the search at the character index before the final null
692
    // byte index.
693
224
    start = str_size - 1 + start;
694
  }
695
696

896
  if (!to_right && (start > str_size - min_substr_len)) {
697
28
    start = str_size - min_substr_len;
698
  }
699

868
  else if (to_right && (start < 0)) {
700
28
    start = 0;
701
  }
702
703
896
  lbm_int dir = to_right ? 1 : -1;
704

1960
  for (lbm_int i = start; to_right ? (i <= str_size - min_substr_len) : (i >= 0); i += dir) {
705
3136
    for (lbm_value current = substrings; lbm_is_cons(current); current = lbm_cdr(current)) {
706
2072
      lbm_array_header_t *header = (lbm_array_header_t *)lbm_car(lbm_car(current));
707
2072
      lbm_int substr_len         = (lbm_int)header->size - 1;
708
2072
      const char *substr         = (const char *)header->data;
709
710
2072
      if (
711
2072
        i > str_size - substr_len // substr length runs over str end.
712
2072
        || substr_len < 0 // empty substr substr was zero bytes in size
713
      ) {
714
        continue;
715
      }
716
717

2072
      if ((case_sensitive && memcmp(&str[i], substr, (size_t)substr_len) == 0) ||
718

1428
	  (!case_sensitive && ci_strncmp(&str[i], substr, (int)substr_len))) {
719
896
        if (occurrence == 0) {
720
756
          return lbm_enc_i(i);
721
        }
722
140
        occurrence -= 1;
723
      }
724
    }
725
  }
726
727
140
  return lbm_enc_i(-1);
728
}
729
730
21672
void lbm_string_extensions_init(void) {
731
732
21672
  lbm_add_symbol_const("left", &sym_left);
733
21672
  lbm_add_symbol_const("nocase", &sym_case_insensitive);
734
735
21672
  lbm_add_extension("str-from-n", ext_str_from_n);
736
21672
  lbm_add_extension("str-join", ext_str_join);
737
21672
  lbm_add_extension("str-to-i", ext_str_to_i);
738
21672
  lbm_add_extension("str-to-f", ext_str_to_f);
739
21672
  lbm_add_extension("str-part", ext_str_part);
740
21672
  lbm_add_extension("str-split", ext_str_split);
741
21672
  lbm_add_extension("str-replace", ext_str_replace);
742
21672
  lbm_add_extension("str-to-lower", ext_str_to_lower);
743
21672
  lbm_add_extension("str-to-upper", ext_str_to_upper);
744
21672
  lbm_add_extension("str-cmp", ext_str_cmp);
745
21672
  lbm_add_extension("to-str", ext_to_str);
746
21672
  lbm_add_extension("to-str-delim", ext_to_str_delim);
747
21672
  lbm_add_extension("str-len", ext_str_len);
748
21672
  lbm_add_extension("str-replicate", ext_str_replicate);
749
21672
  lbm_add_extension("str-find", ext_str_find);
750
21672
}