GCC Code Coverage Report
Directory: ../src/ Exec Total Coverage
File: /home/joels/Current/lispbm/src/extensions/string_extensions.c Lines: 385 421 91.4 %
Date: 2024-12-26 17:59:19 Branches: 217 254 85.4 %

Line Branch Exec Source
1
/*
2
    Copyright 2022, 2023, 2024 Joel Svensson        svenssonjoel@yahoo.se
3
    Copyright 2022, 2023       Benjamin Vedder
4
    Copyright             2024 Rasmus Söderhielm    rasmus.soderhielm@gmail.com
5
6
    This program is free software: you can redistribute it and/or modify
7
    it under the terms of the GNU General Public License as published by
8
    the Free Software Foundation, either version 3 of the License, or
9
    (at your option) any later version.
10
11
    This program is distributed in the hope that it will be useful,
12
    but WITHOUT ANY WARRANTY; without even the implied warranty of
13
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
    GNU General Public License for more details.
15
16
    You should have received a copy of the GNU General Public License
17
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
*/
19
20
#include "extensions.h"
21
#include "lbm_memory.h"
22
#include "heap.h"
23
#include "fundamental.h"
24
#include "lbm_c_interop.h"
25
#include "eval_cps.h"
26
#include "print.h"
27
28
#include <ctype.h>
29
30
#ifndef MIN
31
#define MIN(a,b) (((a)<(b))?(a):(b))
32
#endif
33
#ifndef MAX
34
#define MAX(a,b) (((a)>(b))?(a):(b))
35
#endif
36
37
static char print_val_buffer[256];
38
39
static lbm_uint sym_left;
40
static lbm_uint sym_case_insensitive;
41
42
43
384352
static size_t strlen_max(const char *s, size_t maxlen) {
44
  size_t i;
45
1219864
  for (i = 0; i < maxlen; i ++) {
46
1219836
    if (s[i] == 0) break;
47
  }
48
384352
  return i;
49
}
50
51
192580
static bool dec_str_size(lbm_value v, char **data, size_t *size) {
52
192580
  bool result = false;
53
192580
  if (lbm_is_array_r(v)) {
54
192524
      lbm_array_header_t *array = (lbm_array_header_t*) lbm_car(v);
55
192524
      *data = (char*)array->data;
56
192524
      *size = array->size;
57
192524
      result = true;
58
  }
59
192580
  return result;
60
}
61
62
40154
static lbm_value ext_str_from_n(lbm_value *args, lbm_uint argn) {
63

40154
  if (argn != 1 && argn != 2) {
64
84
    lbm_set_error_reason((char*)lbm_error_str_num_args);
65
84
    return ENC_SYM_EERROR;
66
  }
67
40070
  if (!lbm_is_number(args[0])) {
68
56
    return ENC_SYM_TERROR;
69
  }
70
71

40014
  if (argn == 2 && !lbm_is_array_r(args[1])) {
72
28
    return ENC_SYM_TERROR;
73
  }
74
75
39986
  char *format = 0;
76
39986
  if (argn == 2) {
77
84
    format = lbm_dec_str(args[1]);
78
  }
79
80
  char buffer[100];
81
39986
  size_t len = 0;
82
83
39986
  switch (lbm_type_of(args[0])) {
84
224
  case LBM_TYPE_DOUBLE: /* fall through */
85
  case LBM_TYPE_FLOAT:
86
224
    if (!format) {
87
168
      format = "%g";
88
    }
89
224
    len = (size_t)snprintf(buffer, sizeof(buffer), format, lbm_dec_as_double(args[0]));
90
224
    break;
91
92
39762
  default:
93
39762
    if (!format) {
94
39734
      format = "%d";
95
    }
96
39762
    len = (size_t)snprintf(buffer, sizeof(buffer), format, lbm_dec_as_i32(args[0]));
97
39762
    break;
98
  }
99
100
39986
  len = MIN(len, sizeof(buffer));
101
102
  lbm_value res;
103
39986
  if (lbm_create_array(&res, len + 1)) {
104
39956
    lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(res);
105
39956
    memcpy(arr->data, buffer, len);
106
39956
    ((char*)(arr->data))[len] = '\0';
107
39956
    return res;
108
  } else {
109
30
    return ENC_SYM_MERROR;
110
  }
111
}
112
113
// signature: (str-join strings [delim]) -> str
114
96080
static lbm_value ext_str_join(lbm_value *args, lbm_uint argn) {
115
  // This function does not check that the string arguments contain any
116
  // terminating null bytes.
117
118

96080
  if (argn != 1 && argn != 2) {
119
28
    lbm_set_error_reason((char *)lbm_error_str_num_args);
120
28
    return ENC_SYM_EERROR;
121
  }
122
123
96052
  size_t str_len   = 0;
124
96052
  size_t str_count = 0;
125
96052
  if (!lbm_is_list(args[0])) {
126
28
    lbm_set_error_reason((char *)lbm_error_str_incorrect_arg);
127
28
    lbm_set_error_suspect(args[0]);
128
28
    return ENC_SYM_TERROR;
129
  }
130
288128
  for (lbm_value current = args[0]; lbm_is_cons(current); current = lbm_cdr(current)) {
131
192132
    lbm_value car_val = lbm_car(current);
132
192132
    char *str = NULL;
133
192132
    size_t arr_size = 0;
134
192132
    if (dec_str_size(car_val, &str, &arr_size)) {
135
192104
      str_len += strlen_max(str, arr_size);
136
192104
      str_count += 1;
137
    } else {
138
28
      lbm_set_error_reason((char *)lbm_error_str_incorrect_arg);
139
28
      lbm_set_error_suspect(args[0]);
140
28
      return ENC_SYM_TERROR;
141
    }
142
  }
143
144
95996
  const char *delim = "";
145
95996
  if (argn >= 2) {
146
84264
    delim = lbm_dec_str(args[1]);
147
84264
    if (!delim) {
148
28
      lbm_set_error_reason((char *)lbm_error_str_incorrect_arg);
149
28
      lbm_set_error_suspect(args[1]);
150
28
      return ENC_SYM_TERROR;
151
    }
152
  }
153
154
95968
  size_t delim_len = strlen(delim);
155
95968
  if (str_count > 0) {
156
67880
    str_len += (str_count - 1) * delim_len;
157
  }
158
159
  lbm_value result;
160
95968
  if (!lbm_create_array(&result, str_len + 1)) {
161
96
    return ENC_SYM_MERROR;
162
  }
163
95872
  char *result_str = lbm_dec_str(result);
164
165
95872
  size_t i      = 0;
166
95872
  size_t offset = 0;
167
287728
  for (lbm_value current = args[0]; lbm_is_cons(current); current = lbm_cdr(current)) {
168
191856
    lbm_value car_val = lbm_car(current);
169
    // All arrays have been prechecked.
170
191856
    lbm_array_header_t *array = (lbm_array_header_t*) lbm_car(car_val);
171
191856
    char *str = (char*)array->data;
172
191856
    size_t len = strlen_max(str, array->size);
173
174
191856
    memcpy(result_str + offset, str, len);
175
191856
    offset += len;
176
177
191856
    if (i != str_count - 1) {
178
124040
      memcpy(result_str + offset, delim, delim_len);
179
124040
      offset += delim_len;
180
    }
181
191856
    i++;
182
  }
183
184
95872
  result_str[str_len] = '\0';
185
186
95872
  return result;
187
}
188
189
448
static lbm_value ext_str_to_i(lbm_value *args, lbm_uint argn) {
190

448
  if (argn != 1 && argn != 2) {
191
28
    lbm_set_error_reason((char*)lbm_error_str_num_args);
192
28
    return ENC_SYM_EERROR;
193
  }
194
195
420
  char *str = lbm_dec_str(args[0]);
196
420
  if (!str) {
197
28
    return ENC_SYM_TERROR;
198
  }
199
200
392
  int base = 0;
201
392
  if (argn == 2) {
202
28
    if (!lbm_is_number(args[1])) {
203
      return ENC_SYM_TERROR;
204
    }
205
206
28
    base = (int)lbm_dec_as_u32(args[1]);
207
  }
208
209
392
  return lbm_enc_i32((int32_t)strtol(str, NULL, base));
210
}
211
212
112
static lbm_value ext_str_to_f(lbm_value *args, lbm_uint argn) {
213
112
  if (argn != 1) {
214
28
    lbm_set_error_reason((char*)lbm_error_str_num_args);
215
28
    return ENC_SYM_EERROR;
216
  }
217
218
84
  char *str = lbm_dec_str(args[0]);
219
84
  if (!str) {
220
28
    return ENC_SYM_TERROR;
221
  }
222
223
56
  return lbm_enc_float(strtof(str, NULL));
224
}
225
226
112
static lbm_value ext_str_part(lbm_value *args, lbm_uint argn) {
227

112
  if ((argn != 2 && argn != 3) || !lbm_is_number(args[1])) {
228
28
    lbm_set_error_reason((char*)lbm_error_str_num_args);
229
28
    return ENC_SYM_TERROR;
230
  }
231
232
84
  size_t str_arr_len = 0;
233
84
  char *str = NULL;//lbm_dec_str(args[0]);
234
84
  if (!dec_str_size(args[0], &str, &str_arr_len)) {
235
    return ENC_SYM_TERROR;
236
  }
237
238
84
  uint32_t len = (uint32_t)strlen_max(str, str_arr_len);
239
240
84
  uint32_t start = lbm_dec_as_u32(args[1]);
241
242
84
  if (start >= len) {
243
    return ENC_SYM_EERROR;
244
  }
245
246
84
  uint32_t n = len - start;
247
84
  if (argn == 3) {
248
56
    if (!lbm_is_number(args[2])) {
249
      return ENC_SYM_TERROR;
250
    }
251
252
56
    n = MIN(lbm_dec_as_u32(args[2]), n);
253
  }
254
255
  lbm_value res;
256
84
  if (lbm_create_array(&res, n + 1)) {
257
84
    lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(res);
258
84
    memcpy(arr->data, str + start, n);
259
84
    ((char*)(arr->data))[n] = '\0';
260
84
    return res;
261
  } else {
262
    return ENC_SYM_MERROR;
263
  }
264
}
265
266
140
static lbm_value ext_str_split(lbm_value *args, lbm_uint argn) {
267
140
  if (argn != 2) {
268
28
    lbm_set_error_reason((char*)lbm_error_str_num_args);
269
28
    return ENC_SYM_EERROR;
270
  }
271
272
112
  size_t str_arr_size = 0;
273
112
  char *str = NULL; //lbm_dec_str(args[0]);
274
112
  if (!dec_str_size(args[0], &str, &str_arr_size)) {
275
    return ENC_SYM_TERROR;
276
  }
277
278
112
  char *split = lbm_dec_str(args[1]);
279
112
  if (!split) {
280
56
    if (lbm_is_number(args[1])) {
281
56
      int step = MAX(lbm_dec_as_i32(args[1]), 1);
282
56
      lbm_value res = ENC_SYM_NIL;
283
56
      int len = (int)strlen_max(str, str_arr_size);
284
616
      for (int i = len / step;i >= 0;i--) {
285
560
        int ind_now = i * step;
286
560
        if (ind_now >= len) {
287
28
          continue;
288
        }
289
290
532
        int step_now = step;
291
560
        while ((ind_now + step_now) > len) {
292
28
          step_now--;
293
        }
294
295
        lbm_value tok;
296
532
        if (lbm_create_array(&tok, (lbm_uint)step_now + 1)) {
297
532
          lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(tok);
298
532
          memcpy(arr->data, str + ind_now, (unsigned int)step_now);
299
532
          ((char*)(arr->data))[step_now] = '\0';
300
532
          res = lbm_cons(tok, res);
301
        } else {
302
          return ENC_SYM_MERROR;
303
        }
304
      }
305
56
      return res;
306
    } else {
307
      return ENC_SYM_TERROR;
308
    }
309
  } else {
310
56
     lbm_value res = ENC_SYM_NIL;
311
56
    const char *s = str;
312
280
    while (*(s += strspn(s, split)) != '\0') {
313
224
      size_t len = strcspn(s, split);
314
315
      lbm_value tok;
316
224
      if (lbm_create_array(&tok, len + 1)) {
317
224
        lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(tok);
318
224
        memcpy(arr->data, s, len);
319
224
        ((char*)(arr->data))[len] = '\0';
320
224
        res = lbm_cons(tok, res);
321
      } else {
322
        return ENC_SYM_MERROR;
323
      }
324
224
      s += len;
325
    }
326
56
    return lbm_list_destructive_reverse(res);
327
  }
328
}
329
330
// Todo: Clean this up for 64bit
331
84
static lbm_value ext_str_replace(lbm_value *args, lbm_uint argn) {
332

84
  if (argn != 2 && argn != 3) {
333
28
    lbm_set_error_reason((char*)lbm_error_str_num_args);
334
28
    return ENC_SYM_EERROR;
335
  }
336
337
56
  size_t orig_arr_size = 0;
338
56
  char *orig = NULL; // lbm_dec_str(args[0]);
339
56
  if (!dec_str_size(args[0], &orig, &orig_arr_size)) {
340
    return ENC_SYM_TERROR;
341
  }
342
343
56
  size_t rep_arr_size = 0;
344
56
  char *rep = NULL; //lbm_dec_str(args[1]);
345
56
  if (!dec_str_size(args[1], &rep, &rep_arr_size)) {
346
    return ENC_SYM_TERROR;
347
  }
348
349
56
  size_t with_arr_size = 0;
350
56
  char *with = "";
351
56
  if (argn == 3) {
352
28
    if (!dec_str_size(args[2], &with, &with_arr_size)) {
353
      return ENC_SYM_TERROR;
354
    }
355
  }
356
357
  // See https://stackoverflow.com/questions/779875/what-function-is-to-replace-a-substring-from-a-string-in-c
358
  //char *result; // the return string
359
  char *ins;    // the next insert point
360
  char *tmp;    // varies
361
  size_t len_rep;  // length of rep (the string to remove)
362
  size_t len_with; // length of with (the string to replace rep with)
363
  //size_t len_front; // distance between rep and end of last rep
364
  int count;    // number of replacements
365
366
56
  len_rep = strlen_max(rep, rep_arr_size);
367
56
  if (len_rep == 0) {
368
    return args[0]; // empty rep causes infinite loop during count
369
  }
370
371
56
  len_with = strlen_max(with,with_arr_size);
372
373
  // count the number of replacements needed
374
56
  ins = orig;
375
112
  for (count = 0; (tmp = strstr(ins, rep)); ++count) {
376
56
    ins = tmp + len_rep;
377
  }
378
379
56
  size_t len_res = strlen_max(orig, orig_arr_size) + (len_with - len_rep) * (unsigned int)count + 1;
380
  lbm_value lbm_res;
381
56
  if (lbm_create_array(&lbm_res, len_res)) {
382
56
    lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(lbm_res);
383
56
    tmp = (char*)arr->data;
384
  } else {
385
    return ENC_SYM_MERROR;
386
  }
387
388
  // first time through the loop, all the variable are set correctly
389
  // from here on,
390
  //    tmp points to the end of the result string
391
  //    ins points to the next occurrence of rep in orig
392
  //    orig points to the remainder of orig after "end of rep"
393
112
  while (count--) {
394
56
    ins = strstr(orig, rep);
395
56
    size_t len_front = (size_t)ins - (size_t)orig;
396
56
    tmp = strncpy(tmp, orig, len_front) + len_front;
397
56
    tmp = strncpy(tmp, with, len_with) + len_with;
398
56
    orig += len_front + len_rep; // move to next "end of rep"
399
  }
400
56
  strcpy(tmp, orig);
401
402
56
  return lbm_res;
403
}
404
405
112
static lbm_value change_case(lbm_value *args, lbm_uint argn, bool to_upper) {
406
112
  if (argn != 1) {
407
56
    lbm_set_error_reason((char*)lbm_error_str_num_args);
408
56
    return ENC_SYM_EERROR;
409
  }
410
411
56
  size_t orig_arr_size = 0;
412
56
  char *orig = NULL; //lbm_dec_str(args[0]);
413
56
  if (!dec_str_size(args[0], &orig, &orig_arr_size)) {
414
    return ENC_SYM_TERROR;
415
  }
416
417
56
  size_t len = strlen_max(orig,orig_arr_size);
418
  lbm_value lbm_res;
419
56
  if (lbm_create_array(&lbm_res, len + 1)) {
420
56
    lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(lbm_res);
421
336
    for (unsigned int i = 0;i < len;i++) {
422
280
      if (to_upper) {
423
140
	((char*)(arr->data))[i] = (char)toupper(orig[i]);
424
      } else {
425
140
	((char*)(arr->data))[i] = (char)tolower(orig[i]);
426
      }
427
    }
428
56
    ((char*)(arr->data))[len] = '\0';
429
56
    return lbm_res;
430
  } else {
431
    return ENC_SYM_MERROR;
432
  }
433
}
434
435
56
static lbm_value ext_str_to_lower(lbm_value *args, lbm_uint argn) {
436
56
  return change_case(args, argn, false);
437
}
438
439
56
static lbm_value ext_str_to_upper(lbm_value *args, lbm_uint argn) {
440
56
  return change_case(args,argn, true);
441
}
442
443
336
static lbm_value ext_str_cmp(lbm_value *args, lbm_uint argn) {
444

336
  if (argn != 2 && argn != 3) {
445
28
    lbm_set_error_reason((char*)lbm_error_str_num_args);
446
28
    return ENC_SYM_EERROR;
447
  }
448
449
308
  char *str1 = lbm_dec_str(args[0]);
450
308
  if (!str1) {
451
28
    return ENC_SYM_TERROR;
452
  }
453
454
280
  char *str2 = lbm_dec_str(args[1]);
455
280
  if (!str2) {
456
28
    return ENC_SYM_TERROR;
457
  }
458
459
252
  int n = -1;
460
252
  if (argn == 3) {
461
28
    if (!lbm_is_number(args[2])) {
462
28
      return ENC_SYM_TERROR;
463
    }
464
465
    n = lbm_dec_as_i32(args[2]);
466
  }
467
468
224
  if (n > 0) {
469
    return lbm_enc_i(strncmp(str1, str2, (unsigned int)n));
470
  } else {
471
224
    return lbm_enc_i(strcmp(str1, str2));
472
  }
473
}
474
475
// TODO: This is very similar to ext-print. Maybe they can share code.
476
980
static lbm_value to_str(char *delimiter, lbm_value *args, lbm_uint argn) {
477
980
  const int str_len = 300;
478
980
  char *str = lbm_malloc((lbm_uint)str_len);
479
980
  if (!str) {
480
    return ENC_SYM_MERROR;
481
  }
482
483
980
  int str_ofs = 0;
484
485
2268
  for (lbm_uint i = 0; i < argn; i ++) {
486
1288
    lbm_value t = args[i];
487
1288
    int max = str_len - str_ofs - 1;
488
489
    char *arr_str;
490
1288
    int chars = 0;
491
492
1288
    if (lbm_value_is_printable_string(t, &arr_str)) {
493
252
      if (str_ofs == 0) {
494
168
        chars = snprintf(str + str_ofs, (unsigned int)max, "%s", arr_str);
495
      } else {
496
84
        chars = snprintf(str + str_ofs, (unsigned int)max, "%s%s", delimiter, arr_str);
497
      }
498
    } else {
499
1036
      lbm_print_value(print_val_buffer, 256, t);
500
1036
      if (str_ofs == 0) {
501
812
        chars = snprintf(str + str_ofs, (unsigned int)max, "%s", print_val_buffer);
502
      } else {
503
224
        chars = snprintf(str + str_ofs, (unsigned int)max, "%s%s", delimiter, print_val_buffer);
504
      }
505
    }
506
1288
    if (chars >= max) {
507
      str_ofs += max;
508
    } else {
509
1288
      str_ofs += chars;
510
      }
511
  }
512
513
  lbm_value res;
514
980
  if (lbm_create_array(&res, (lbm_uint)str_ofs + 1)) {
515
980
    lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(res);
516
980
    strncpy((char*)arr->data, str, (unsigned int)str_ofs + 1);
517
980
    lbm_free(str);
518
980
    return res;
519
  } else {
520
    lbm_free(str);
521
    return ENC_SYM_MERROR;
522
  }
523
}
524
525
952
static lbm_value ext_to_str(lbm_value *args, lbm_uint argn) {
526
952
  return to_str(" ", args, argn);
527
}
528
529
28
static lbm_value ext_to_str_delim(lbm_value *args, lbm_uint argn) {
530
28
  if (argn < 1) {
531
    lbm_set_error_reason((char*)lbm_error_str_num_args);
532
    return ENC_SYM_EERROR;
533
  }
534
535
28
  char *delim = lbm_dec_str(args[0]);
536
28
  if (!delim) {
537
    return ENC_SYM_TERROR;
538
  }
539
540
28
  return to_str(delim, args + 1, argn - 1);
541
}
542
543
84
static lbm_value ext_str_len(lbm_value *args, lbm_uint argn) {
544
84
  LBM_CHECK_ARGN(1);
545
546
56
  size_t str_arr_size = 0;
547
56
  char *str = NULL; //lbm_dec_str(args[0]);
548
56
  if (!dec_str_size(args[0], &str, &str_arr_size)) {
549
28
    return ENC_SYM_TERROR;
550
  }
551
552
28
  return lbm_enc_i((int)strlen_max(str, str_arr_size));
553
}
554
555
84
static lbm_value ext_str_replicate(lbm_value *args, lbm_uint argn) {
556
84
  if (argn != 2) {
557
56
    lbm_set_error_reason((char*)lbm_error_str_num_args);
558
56
    return ENC_SYM_EERROR;
559
  }
560
561
28
  lbm_value res = ENC_SYM_TERROR;
562
563

56
  if (lbm_is_number(args[0]) &&
564
28
      lbm_is_number(args[1])) {
565
28
    uint32_t len = lbm_dec_as_u32(args[0]);
566
28
    uint8_t c = lbm_dec_as_char(args[1]);
567
568
    lbm_value lbm_res;
569
28
    if (lbm_create_array(&lbm_res, len + 1)) {
570
28
      lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(lbm_res);
571
140
      for (unsigned int i = 0;i < len;i++) {
572
112
        ((char*)(arr->data))[i] = (char)c;
573
      }
574
28
      ((char*)(arr->data))[len] = '\0';
575
28
      res = lbm_res;
576
    } else {
577
      res = ENC_SYM_MERROR;
578
    }
579
  }
580
28
  return res;
581
}
582
583
756
bool ci_strncmp(const char *str1, const char *str2,int n) {
584
756
  bool res = true;
585
1400
  for (int i = 0; i < n; i ++) {
586
1148
    if (tolower(str1[i]) != tolower(str2[i])) {
587
504
      res = false;
588
504
      break;
589
    }
590
  }
591
756
  return res;
592
}
593
594
// signature: (str-find str:byte-array substr [start:int] [occurrence:int] [dir] [case_sensitivity]) -> int
595
// where
596
//   seq = string|(..string)
597
//   dir = 'left|'right
598
//   case_sensitivity = 'case-sensitive | 'case-insensitive
599
952
static lbm_value ext_str_find(lbm_value *args, lbm_uint argn) {
600

952
  if (argn < 2 || 6 < argn) {
601
28
    lbm_set_error_reason((char *)lbm_error_str_num_args);
602
28
    return ENC_SYM_EERROR;
603
  }
604
924
  if (!lbm_is_array_r(args[0])) {
605
28
    lbm_set_error_suspect(args[0]);
606
28
    lbm_set_error_reason((char *)lbm_error_str_incorrect_arg);
607
28
    return ENC_SYM_TERROR;
608
  }
609
610
896
  lbm_array_header_t *str_header = (lbm_array_header_t *)lbm_car(args[0]);
611
896
  const char *str   = (const char *)str_header->data;
612
896
  lbm_int str_size = (lbm_int)str_header->size;
613
614
  // Guaranteed to be list containing strings.
615
  lbm_value substrings;
616
896
  lbm_int min_substr_len = LBM_INT_MAX;
617
896
  if (lbm_is_array_r(args[1])) {
618
728
    substrings = lbm_cons(args[1], ENC_SYM_NIL);
619
728
    if (substrings == ENC_SYM_MERROR) {
620
      return ENC_SYM_MERROR;
621
    }
622
728
    lbm_array_header_t *header = (lbm_array_header_t *)lbm_car(args[1]);
623
624
728
    lbm_int len = (lbm_int)header->size - 1;
625
728
    if (len < 0) {
626
      // substr is zero length array
627
      return lbm_enc_i(-1);
628
    }
629
728
    min_substr_len = len;
630
168
  } else if (lbm_is_list(args[1])) {
631
392
    for (lbm_value current = args[1]; lbm_is_cons(current); current = lbm_cdr(current)) {
632
224
      lbm_value car_val = lbm_car(current);
633
224
      if (!lbm_is_array_r(car_val)) {
634
        lbm_set_error_suspect(args[1]);
635
        lbm_set_error_reason((char *)lbm_error_str_incorrect_arg);
636
        return ENC_SYM_TERROR;
637
      }
638
639
224
      lbm_array_header_t *header = (lbm_array_header_t *)lbm_car(car_val);
640
641
224
      lbm_int len = (lbm_int)header->size - 1;
642
224
      if (len < 0) {
643
        // substr is zero length array
644
        continue;
645
      }
646
224
      if (len < min_substr_len) {
647
140
        min_substr_len = len;
648
      }
649
    }
650
168
    substrings = args[1];
651
  } else {
652
    lbm_set_error_suspect(args[1]);
653
    lbm_set_error_reason((char *)lbm_error_str_incorrect_arg);
654
    return ENC_SYM_TERROR;
655
  }
656
657
896
  bool to_right    = true;
658
896
  bool case_sensitive = true;
659
660
896
  int nums[2] = {0, 0};
661
896
  bool nums_set[2] = {false, false};
662
896
  int num_ix = 0;
663
664
665
3836
  for (int i = 0; i < (int)argn; i ++ ) {
666

2940
    if (lbm_is_number(args[i]) && num_ix < 2) {
667
644
      nums_set[num_ix] = true;
668
644
      nums[num_ix++] = lbm_dec_as_int(args[i]);
669
    }
670
2940
    if (lbm_is_symbol(args[i])) {
671
532
      lbm_uint symbol = lbm_dec_sym(args[i]);
672
532
      if (symbol == sym_left) {
673
280
	to_right = false;
674
252
      } else if (symbol == sym_case_insensitive) {
675
196
	case_sensitive = false;
676
      }
677
    }
678
  }
679
680
896
  uint32_t occurrence = 0;
681
896
  lbm_int start = to_right ? 0 : str_size - min_substr_len;
682
896
  if (nums_set[0]) {
683
504
    start = nums[0];
684
  }
685
896
  if (nums_set[1]) {
686
140
    occurrence = (uint32_t)nums[1];
687
  }
688
689
896
  if (start < 0) {
690
    // start: -1 starts the search at the character index before the final null
691
    // byte index.
692
224
    start = str_size - 1 + start;
693
  }
694
695

896
  if (!to_right && (start > str_size - min_substr_len)) {
696
28
    start = str_size - min_substr_len;
697
  }
698

868
  else if (to_right && (start < 0)) {
699
28
    start = 0;
700
  }
701
702
896
  lbm_int dir = to_right ? 1 : -1;
703

1960
  for (lbm_int i = start; to_right ? (i <= str_size - min_substr_len) : (i >= 0); i += dir) {
704
3136
    for (lbm_value current = substrings; lbm_is_cons(current); current = lbm_cdr(current)) {
705
2072
      lbm_array_header_t *header = (lbm_array_header_t *)lbm_car(lbm_car(current));
706
2072
      lbm_int substr_len         = (lbm_int)header->size - 1;
707
2072
      const char *substr         = (const char *)header->data;
708
709
2072
      if (
710
2072
        i > str_size - substr_len // substr length runs over str end.
711
2072
        || substr_len < 0 // empty substr substr was zero bytes in size
712
      ) {
713
        continue;
714
      }
715
716

2072
      if ((case_sensitive && memcmp(&str[i], substr, (size_t)substr_len) == 0) ||
717

1428
	  (!case_sensitive && ci_strncmp(&str[i], substr, (int)substr_len))) {
718
896
        if (occurrence == 0) {
719
756
          return lbm_enc_i(i);
720
        }
721
140
        occurrence -= 1;
722
      }
723
    }
724
  }
725
726
140
  return lbm_enc_i(-1);
727
}
728
729
21756
void lbm_string_extensions_init(void) {
730
731
21756
  lbm_add_symbol_const("left", &sym_left);
732
21756
  lbm_add_symbol_const("nocase", &sym_case_insensitive);
733
734
21756
  lbm_add_extension("str-from-n", ext_str_from_n);
735
21756
  lbm_add_extension("str-join", ext_str_join);
736
21756
  lbm_add_extension("str-to-i", ext_str_to_i);
737
21756
  lbm_add_extension("str-to-f", ext_str_to_f);
738
21756
  lbm_add_extension("str-part", ext_str_part);
739
21756
  lbm_add_extension("str-split", ext_str_split);
740
21756
  lbm_add_extension("str-replace", ext_str_replace);
741
21756
  lbm_add_extension("str-to-lower", ext_str_to_lower);
742
21756
  lbm_add_extension("str-to-upper", ext_str_to_upper);
743
21756
  lbm_add_extension("str-cmp", ext_str_cmp);
744
21756
  lbm_add_extension("to-str", ext_to_str);
745
21756
  lbm_add_extension("to-str-delim", ext_to_str_delim);
746
21756
  lbm_add_extension("str-len", ext_str_len);
747
21756
  lbm_add_extension("str-replicate", ext_str_replicate);
748
21756
  lbm_add_extension("str-find", ext_str_find);
749
21756
}