mpc.c (111471B)
1 #include "mpc.h" 2 3 /* 4 ** State Type 5 */ 6 7 static mpc_state_t mpc_state_invalid(void) { 8 mpc_state_t s; 9 s.pos = -1; 10 s.row = -1; 11 s.col = -1; 12 s.term = 0; 13 return s; 14 } 15 16 static mpc_state_t mpc_state_new(void) { 17 mpc_state_t s; 18 s.pos = 0; 19 s.row = 0; 20 s.col = 0; 21 s.term = 0; 22 return s; 23 } 24 25 /* 26 ** Input Type 27 */ 28 29 /* 30 ** In mpc the input type has three modes of 31 ** operation: String, File and Pipe. 32 ** 33 ** String is easy. The whole contents are 34 ** loaded into a buffer and scanned through. 35 ** The cursor can jump around at will making 36 ** backtracking easy. 37 ** 38 ** The second is a File which is also somewhat 39 ** easy. The contents are never loaded into 40 ** memory but backtracking can still be achieved 41 ** by seeking in the file at different positions. 42 ** 43 ** The final mode is Pipe. This is the difficult 44 ** one. As we assume pipes cannot be seeked - and 45 ** only support a single character lookahead at 46 ** any point, when the input is marked for a 47 ** potential backtracking we start buffering any 48 ** input. 49 ** 50 ** This means that if we are requested to seek 51 ** back we can simply start reading from the 52 ** buffer instead of the input. 53 ** 54 ** Of course using `mpc_predictive` will disable 55 ** backtracking and make LL(1) grammars easy 56 ** to parse for all input methods. 57 ** 58 */ 59 60 enum { 61 MPC_INPUT_STRING = 0, 62 MPC_INPUT_FILE = 1, 63 MPC_INPUT_PIPE = 2 64 }; 65 66 enum { 67 MPC_INPUT_MARKS_MIN = 32 68 }; 69 70 enum { 71 MPC_INPUT_MEM_NUM = 512 72 }; 73 74 typedef struct { 75 char mem[64]; 76 } mpc_mem_t; 77 78 typedef struct { 79 80 int type; 81 char *filename; 82 mpc_state_t state; 83 84 char *string; 85 char *buffer; 86 FILE *file; 87 88 int suppress; 89 int backtrack; 90 int marks_slots; 91 int marks_num; 92 mpc_state_t *marks; 93 94 char *lasts; 95 char last; 96 97 size_t mem_index; 98 char mem_full[MPC_INPUT_MEM_NUM]; 99 mpc_mem_t mem[MPC_INPUT_MEM_NUM]; 100 101 } mpc_input_t; 102 103 static mpc_input_t *mpc_input_new_string(const char *filename, const char *string) { 104 105 mpc_input_t *i = malloc(sizeof(mpc_input_t)); 106 107 i->filename = malloc(strlen(filename) + 1); 108 strcpy(i->filename, filename); 109 i->type = MPC_INPUT_STRING; 110 111 i->state = mpc_state_new(); 112 113 i->string = malloc(strlen(string) + 1); 114 strcpy(i->string, string); 115 i->buffer = NULL; 116 i->file = NULL; 117 118 i->suppress = 0; 119 i->backtrack = 1; 120 i->marks_num = 0; 121 i->marks_slots = MPC_INPUT_MARKS_MIN; 122 i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); 123 i->lasts = malloc(sizeof(char) * i->marks_slots); 124 i->last = '\0'; 125 126 i->mem_index = 0; 127 memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); 128 129 return i; 130 } 131 132 static mpc_input_t *mpc_input_new_nstring(const char *filename, const char *string, size_t length) { 133 134 mpc_input_t *i = malloc(sizeof(mpc_input_t)); 135 136 i->filename = malloc(strlen(filename) + 1); 137 strcpy(i->filename, filename); 138 i->type = MPC_INPUT_STRING; 139 140 i->state = mpc_state_new(); 141 142 i->string = malloc(length + 1); 143 strncpy(i->string, string, length); 144 i->string[length] = '\0'; 145 i->buffer = NULL; 146 i->file = NULL; 147 148 i->suppress = 0; 149 i->backtrack = 1; 150 i->marks_num = 0; 151 i->marks_slots = MPC_INPUT_MARKS_MIN; 152 i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); 153 i->lasts = malloc(sizeof(char) * i->marks_slots); 154 i->last = '\0'; 155 156 i->mem_index = 0; 157 memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); 158 159 return i; 160 161 } 162 163 static mpc_input_t *mpc_input_new_pipe(const char *filename, FILE *pipe) { 164 165 mpc_input_t *i = malloc(sizeof(mpc_input_t)); 166 167 i->filename = malloc(strlen(filename) + 1); 168 strcpy(i->filename, filename); 169 170 i->type = MPC_INPUT_PIPE; 171 i->state = mpc_state_new(); 172 173 i->string = NULL; 174 i->buffer = NULL; 175 i->file = pipe; 176 177 i->suppress = 0; 178 i->backtrack = 1; 179 i->marks_num = 0; 180 i->marks_slots = MPC_INPUT_MARKS_MIN; 181 i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); 182 i->lasts = malloc(sizeof(char) * i->marks_slots); 183 i->last = '\0'; 184 185 i->mem_index = 0; 186 memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); 187 188 return i; 189 190 } 191 192 static mpc_input_t *mpc_input_new_file(const char *filename, FILE *file) { 193 194 mpc_input_t *i = malloc(sizeof(mpc_input_t)); 195 196 i->filename = malloc(strlen(filename) + 1); 197 strcpy(i->filename, filename); 198 i->type = MPC_INPUT_FILE; 199 i->state = mpc_state_new(); 200 201 i->string = NULL; 202 i->buffer = NULL; 203 i->file = file; 204 205 i->suppress = 0; 206 i->backtrack = 1; 207 i->marks_num = 0; 208 i->marks_slots = MPC_INPUT_MARKS_MIN; 209 i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); 210 i->lasts = malloc(sizeof(char) * i->marks_slots); 211 i->last = '\0'; 212 213 i->mem_index = 0; 214 memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); 215 216 return i; 217 } 218 219 static void mpc_input_delete(mpc_input_t *i) { 220 221 free(i->filename); 222 223 if (i->type == MPC_INPUT_STRING) { free(i->string); } 224 if (i->type == MPC_INPUT_PIPE) { free(i->buffer); } 225 226 free(i->marks); 227 free(i->lasts); 228 free(i); 229 } 230 231 static int mpc_mem_ptr(mpc_input_t *i, void *p) { 232 return 233 (char*)p >= (char*)(i->mem) && 234 (char*)p < (char*)(i->mem) + (MPC_INPUT_MEM_NUM * sizeof(mpc_mem_t)); 235 } 236 237 static void *mpc_malloc(mpc_input_t *i, size_t n) { 238 size_t j; 239 char *p; 240 241 if (n > sizeof(mpc_mem_t)) { return malloc(n); } 242 243 j = i->mem_index; 244 do { 245 if (!i->mem_full[i->mem_index]) { 246 p = (void*)(i->mem + i->mem_index); 247 i->mem_full[i->mem_index] = 1; 248 i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; 249 return p; 250 } 251 i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; 252 } while (j != i->mem_index); 253 254 return malloc(n); 255 } 256 257 static void *mpc_calloc(mpc_input_t *i, size_t n, size_t m) { 258 char *x = mpc_malloc(i, n * m); 259 memset(x, 0, n * m); 260 return x; 261 } 262 263 static void mpc_free(mpc_input_t *i, void *p) { 264 size_t j; 265 if (!mpc_mem_ptr(i, p)) { free(p); return; } 266 j = ((size_t)(((char*)p) - ((char*)i->mem))) / sizeof(mpc_mem_t); 267 i->mem_full[j] = 0; 268 } 269 270 static void *mpc_realloc(mpc_input_t *i, void *p, size_t n) { 271 272 char *q = NULL; 273 274 if (!mpc_mem_ptr(i, p)) { return realloc(p, n); } 275 276 if (n > sizeof(mpc_mem_t)) { 277 q = malloc(n); 278 memcpy(q, p, sizeof(mpc_mem_t)); 279 mpc_free(i, p); 280 return q; 281 } 282 283 return p; 284 } 285 286 static void *mpc_export(mpc_input_t *i, void *p) { 287 char *q = NULL; 288 if (!mpc_mem_ptr(i, p)) { return p; } 289 q = malloc(sizeof(mpc_mem_t)); 290 memcpy(q, p, sizeof(mpc_mem_t)); 291 mpc_free(i, p); 292 return q; 293 } 294 295 static void mpc_input_backtrack_disable(mpc_input_t *i) { i->backtrack--; } 296 static void mpc_input_backtrack_enable(mpc_input_t *i) { i->backtrack++; } 297 298 static void mpc_input_suppress_disable(mpc_input_t *i) { i->suppress--; } 299 static void mpc_input_suppress_enable(mpc_input_t *i) { i->suppress++; } 300 301 static void mpc_input_mark(mpc_input_t *i) { 302 303 if (i->backtrack < 1) { return; } 304 305 i->marks_num++; 306 307 if (i->marks_num > i->marks_slots) { 308 i->marks_slots = i->marks_num + i->marks_num / 2; 309 i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_slots); 310 i->lasts = realloc(i->lasts, sizeof(char) * i->marks_slots); 311 } 312 313 i->marks[i->marks_num-1] = i->state; 314 i->lasts[i->marks_num-1] = i->last; 315 316 if (i->type == MPC_INPUT_PIPE && i->marks_num == 1) { 317 i->buffer = calloc(1, 1); 318 } 319 320 } 321 322 static void mpc_input_unmark(mpc_input_t *i) { 323 int j; 324 325 if (i->backtrack < 1) { return; } 326 327 i->marks_num--; 328 329 if (i->marks_slots > i->marks_num + i->marks_num / 2 330 && i->marks_slots > MPC_INPUT_MARKS_MIN) { 331 i->marks_slots = 332 i->marks_num > MPC_INPUT_MARKS_MIN ? 333 i->marks_num : MPC_INPUT_MARKS_MIN; 334 i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_slots); 335 i->lasts = realloc(i->lasts, sizeof(char) * i->marks_slots); 336 } 337 338 if (i->type == MPC_INPUT_PIPE && i->marks_num == 0) { 339 for (j = strlen(i->buffer) - 1; j >= 0; j--) 340 ungetc(i->buffer[j], i->file); 341 342 free(i->buffer); 343 i->buffer = NULL; 344 } 345 346 } 347 348 static void mpc_input_rewind(mpc_input_t *i) { 349 350 if (i->backtrack < 1) { return; } 351 352 i->state = i->marks[i->marks_num-1]; 353 i->last = i->lasts[i->marks_num-1]; 354 355 if (i->type == MPC_INPUT_FILE) { 356 fseek(i->file, i->state.pos, SEEK_SET); 357 } 358 359 mpc_input_unmark(i); 360 } 361 362 static int mpc_input_buffer_in_range(mpc_input_t *i) { 363 return i->state.pos < (long)(strlen(i->buffer) + i->marks[0].pos); 364 } 365 366 static char mpc_input_buffer_get(mpc_input_t *i) { 367 return i->buffer[i->state.pos - i->marks[0].pos]; 368 } 369 370 static char mpc_input_getc(mpc_input_t *i) { 371 372 char c = '\0'; 373 374 switch (i->type) { 375 376 case MPC_INPUT_STRING: return i->string[i->state.pos]; 377 case MPC_INPUT_FILE: c = fgetc(i->file); return c; 378 case MPC_INPUT_PIPE: 379 380 if (!i->buffer) { c = getc(i->file); return c; } 381 382 if (i->buffer && mpc_input_buffer_in_range(i)) { 383 c = mpc_input_buffer_get(i); 384 return c; 385 } else { 386 c = getc(i->file); 387 return c; 388 } 389 390 default: return c; 391 } 392 } 393 394 static char mpc_input_peekc(mpc_input_t *i) { 395 396 char c = '\0'; 397 398 switch (i->type) { 399 case MPC_INPUT_STRING: return i->string[i->state.pos]; 400 case MPC_INPUT_FILE: 401 402 c = fgetc(i->file); 403 if (feof(i->file)) { return '\0'; } 404 405 fseek(i->file, -1, SEEK_CUR); 406 return c; 407 408 case MPC_INPUT_PIPE: 409 410 if (!i->buffer) { 411 c = getc(i->file); 412 if (feof(i->file)) { return '\0'; } 413 ungetc(c, i->file); 414 return c; 415 } 416 417 if (i->buffer && mpc_input_buffer_in_range(i)) { 418 return mpc_input_buffer_get(i); 419 } else { 420 c = getc(i->file); 421 if (feof(i->file)) { return '\0'; } 422 ungetc(c, i->file); 423 return c; 424 } 425 426 default: return c; 427 } 428 429 } 430 431 static int mpc_input_terminated(mpc_input_t *i) { 432 return mpc_input_peekc(i) == '\0'; 433 } 434 435 static int mpc_input_failure(mpc_input_t *i, char c) { 436 437 switch (i->type) { 438 case MPC_INPUT_STRING: { break; } 439 case MPC_INPUT_FILE: fseek(i->file, -1, SEEK_CUR); { break; } 440 case MPC_INPUT_PIPE: { 441 442 if (!i->buffer) { ungetc(c, i->file); break; } 443 444 if (i->buffer && mpc_input_buffer_in_range(i)) { 445 break; 446 } else { 447 ungetc(c, i->file); 448 } 449 } 450 default: { break; } 451 } 452 return 0; 453 } 454 455 static int mpc_input_success(mpc_input_t *i, char c, char **o) { 456 457 if (i->type == MPC_INPUT_PIPE 458 && i->buffer && !mpc_input_buffer_in_range(i)) { 459 i->buffer = realloc(i->buffer, strlen(i->buffer) + 2); 460 i->buffer[strlen(i->buffer) + 1] = '\0'; 461 i->buffer[strlen(i->buffer) + 0] = c; 462 } 463 464 i->last = c; 465 i->state.pos++; 466 i->state.col++; 467 468 if (c == '\n') { 469 i->state.col = 0; 470 i->state.row++; 471 } 472 473 if (o) { 474 (*o) = mpc_malloc(i, 2); 475 (*o)[0] = c; 476 (*o)[1] = '\0'; 477 } 478 479 return 1; 480 } 481 482 static int mpc_input_any(mpc_input_t *i, char **o) { 483 char x; 484 if (mpc_input_terminated(i)) { return 0; } 485 x = mpc_input_getc(i); 486 return mpc_input_success(i, x, o); 487 } 488 489 static int mpc_input_char(mpc_input_t *i, char c, char **o) { 490 char x; 491 if (mpc_input_terminated(i)) { return 0; } 492 x = mpc_input_getc(i); 493 return x == c ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 494 } 495 496 static int mpc_input_range(mpc_input_t *i, char c, char d, char **o) { 497 char x; 498 if (mpc_input_terminated(i)) { return 0; } 499 x = mpc_input_getc(i); 500 return x >= c && x <= d ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 501 } 502 503 static int mpc_input_oneof(mpc_input_t *i, const char *c, char **o) { 504 char x; 505 if (mpc_input_terminated(i)) { return 0; } 506 x = mpc_input_getc(i); 507 return strchr(c, x) != 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 508 } 509 510 static int mpc_input_noneof(mpc_input_t *i, const char *c, char **o) { 511 char x; 512 if (mpc_input_terminated(i)) { return 0; } 513 x = mpc_input_getc(i); 514 return strchr(c, x) == 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 515 } 516 517 static int mpc_input_satisfy(mpc_input_t *i, int(*cond)(char), char **o) { 518 char x; 519 if (mpc_input_terminated(i)) { return 0; } 520 x = mpc_input_getc(i); 521 return cond(x) ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); 522 } 523 524 static int mpc_input_string(mpc_input_t *i, const char *c, char **o) { 525 526 const char *x = c; 527 528 mpc_input_mark(i); 529 while (*x) { 530 if (!mpc_input_char(i, *x, NULL)) { 531 mpc_input_rewind(i); 532 return 0; 533 } 534 x++; 535 } 536 mpc_input_unmark(i); 537 538 *o = mpc_malloc(i, strlen(c) + 1); 539 strcpy(*o, c); 540 return 1; 541 } 542 543 static int mpc_input_anchor(mpc_input_t* i, int(*f)(char,char), char **o) { 544 *o = NULL; 545 return f(i->last, mpc_input_peekc(i)); 546 } 547 548 static int mpc_input_soi(mpc_input_t* i, char **o) { 549 *o = NULL; 550 return i->last == '\0'; 551 } 552 553 static int mpc_input_eoi(mpc_input_t* i, char **o) { 554 *o = NULL; 555 if (i->state.term) { 556 return 0; 557 } else if (mpc_input_terminated(i)) { 558 i->state.term = 1; 559 return 1; 560 } else { 561 return 0; 562 } 563 } 564 565 static mpc_state_t *mpc_input_state_copy(mpc_input_t *i) { 566 mpc_state_t *r = mpc_malloc(i, sizeof(mpc_state_t)); 567 memcpy(r, &i->state, sizeof(mpc_state_t)); 568 return r; 569 } 570 571 /* 572 ** Error Type 573 */ 574 575 void mpc_err_delete(mpc_err_t *x) { 576 int i; 577 for (i = 0; i < x->expected_num; i++) { free(x->expected[i]); } 578 free(x->expected); 579 free(x->filename); 580 free(x->failure); 581 free(x); 582 } 583 584 void mpc_err_print(mpc_err_t *x) { 585 mpc_err_print_to(x, stdout); 586 } 587 588 void mpc_err_print_to(mpc_err_t *x, FILE *f) { 589 char *str = mpc_err_string(x); 590 fprintf(f, "%s", str); 591 free(str); 592 } 593 594 static void mpc_err_string_cat(char *buffer, int *pos, int *max, char const *fmt, ...) { 595 /* TODO: Error Checking on Length */ 596 int left = ((*max) - (*pos)); 597 va_list va; 598 va_start(va, fmt); 599 if (left < 0) { left = 0;} 600 (*pos) += vsprintf(buffer + (*pos), fmt, va); 601 va_end(va); 602 } 603 604 static const char *mpc_err_char_unescape(char c, char char_unescape_buffer[4]) { 605 606 char_unescape_buffer[0] = '\''; 607 char_unescape_buffer[1] = ' '; 608 char_unescape_buffer[2] = '\''; 609 char_unescape_buffer[3] = '\0'; 610 611 switch (c) { 612 case '\a': return "bell"; 613 case '\b': return "backspace"; 614 case '\f': return "formfeed"; 615 case '\r': return "carriage return"; 616 case '\v': return "vertical tab"; 617 case '\0': return "end of input"; 618 case '\n': return "newline"; 619 case '\t': return "tab"; 620 case ' ' : return "space"; 621 default: 622 char_unescape_buffer[1] = c; 623 return char_unescape_buffer; 624 } 625 626 } 627 628 char *mpc_err_string(mpc_err_t *x) { 629 630 int i; 631 int pos = 0; 632 int max = 1023; 633 char *buffer = calloc(1, 1024); 634 char char_unescape_buffer[4]; 635 636 if (x->failure) { 637 mpc_err_string_cat(buffer, &pos, &max, 638 "%s: error: %s\n", x->filename, x->failure); 639 return buffer; 640 } 641 642 mpc_err_string_cat(buffer, &pos, &max, 643 "%s:%li:%li: error: expected ", x->filename, x->state.row+1, x->state.col+1); 644 645 if (x->expected_num == 0) { mpc_err_string_cat(buffer, &pos, &max, "ERROR: NOTHING EXPECTED"); } 646 if (x->expected_num == 1) { mpc_err_string_cat(buffer, &pos, &max, "%s", x->expected[0]); } 647 if (x->expected_num >= 2) { 648 649 for (i = 0; i < x->expected_num-2; i++) { 650 mpc_err_string_cat(buffer, &pos, &max, "%s, ", x->expected[i]); 651 } 652 653 mpc_err_string_cat(buffer, &pos, &max, "%s or %s", 654 x->expected[x->expected_num-2], 655 x->expected[x->expected_num-1]); 656 } 657 658 mpc_err_string_cat(buffer, &pos, &max, " at "); 659 mpc_err_string_cat(buffer, &pos, &max, mpc_err_char_unescape(x->received, char_unescape_buffer)); 660 mpc_err_string_cat(buffer, &pos, &max, "\n"); 661 662 return realloc(buffer, strlen(buffer) + 1); 663 } 664 665 static mpc_err_t *mpc_err_new(mpc_input_t *i, const char *expected) { 666 mpc_err_t *x; 667 if (i->suppress) { return NULL; } 668 x = mpc_malloc(i, sizeof(mpc_err_t)); 669 x->filename = mpc_malloc(i, strlen(i->filename) + 1); 670 strcpy(x->filename, i->filename); 671 x->state = i->state; 672 x->expected_num = 1; 673 x->expected = mpc_malloc(i, sizeof(char*)); 674 x->expected[0] = mpc_malloc(i, strlen(expected) + 1); 675 strcpy(x->expected[0], expected); 676 x->failure = NULL; 677 x->received = mpc_input_peekc(i); 678 return x; 679 } 680 681 static mpc_err_t *mpc_err_fail(mpc_input_t *i, const char *failure) { 682 mpc_err_t *x; 683 if (i->suppress) { return NULL; } 684 x = mpc_malloc(i, sizeof(mpc_err_t)); 685 x->filename = mpc_malloc(i, strlen(i->filename) + 1); 686 strcpy(x->filename, i->filename); 687 x->state = i->state; 688 x->expected_num = 0; 689 x->expected = NULL; 690 x->failure = mpc_malloc(i, strlen(failure) + 1); 691 strcpy(x->failure, failure); 692 x->received = ' '; 693 return x; 694 } 695 696 static mpc_err_t *mpc_err_file(const char *filename, const char *failure) { 697 mpc_err_t *x; 698 x = malloc(sizeof(mpc_err_t)); 699 x->filename = malloc(strlen(filename) + 1); 700 strcpy(x->filename, filename); 701 x->state = mpc_state_new(); 702 x->expected_num = 0; 703 x->expected = NULL; 704 x->failure = malloc(strlen(failure) + 1); 705 strcpy(x->failure, failure); 706 x->received = ' '; 707 return x; 708 } 709 710 static void mpc_err_delete_internal(mpc_input_t *i, mpc_err_t *x) { 711 int j; 712 if (x == NULL) { return; } 713 for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } 714 mpc_free(i, x->expected); 715 mpc_free(i, x->filename); 716 mpc_free(i, x->failure); 717 mpc_free(i, x); 718 } 719 720 static mpc_err_t *mpc_err_export(mpc_input_t *i, mpc_err_t *x) { 721 int j; 722 for (j = 0; j < x->expected_num; j++) { 723 x->expected[j] = mpc_export(i, x->expected[j]); 724 } 725 x->expected = mpc_export(i, x->expected); 726 x->filename = mpc_export(i, x->filename); 727 x->failure = mpc_export(i, x->failure); 728 return mpc_export(i, x); 729 } 730 731 static int mpc_err_contains_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { 732 int j; 733 (void)i; 734 for (j = 0; j < x->expected_num; j++) { 735 if (strcmp(x->expected[j], expected) == 0) { return 1; } 736 } 737 return 0; 738 } 739 740 static void mpc_err_add_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { 741 (void)i; 742 x->expected_num++; 743 x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); 744 x->expected[x->expected_num-1] = mpc_malloc(i, strlen(expected) + 1); 745 strcpy(x->expected[x->expected_num-1], expected); 746 } 747 748 static mpc_err_t *mpc_err_or(mpc_input_t *i, mpc_err_t** x, int n) { 749 750 int j, k, fst; 751 mpc_err_t *e; 752 753 fst = -1; 754 for (j = 0; j < n; j++) { 755 if (x[j] != NULL) { fst = j; } 756 } 757 758 if (fst == -1) { return NULL; } 759 760 e = mpc_malloc(i, sizeof(mpc_err_t)); 761 e->state = mpc_state_invalid(); 762 e->expected_num = 0; 763 e->expected = NULL; 764 e->failure = NULL; 765 e->filename = mpc_malloc(i, strlen(x[fst]->filename)+1); 766 strcpy(e->filename, x[fst]->filename); 767 768 for (j = 0; j < n; j++) { 769 if (x[j] == NULL) { continue; } 770 if (x[j]->state.pos > e->state.pos) { e->state = x[j]->state; } 771 } 772 773 for (j = 0; j < n; j++) { 774 if (x[j] == NULL) { continue; } 775 if (x[j]->state.pos < e->state.pos) { continue; } 776 777 if (x[j]->failure) { 778 e->failure = mpc_malloc(i, strlen(x[j]->failure)+1); 779 strcpy(e->failure, x[j]->failure); 780 break; 781 } 782 783 e->received = x[j]->received; 784 785 for (k = 0; k < x[j]->expected_num; k++) { 786 if (!mpc_err_contains_expected(i, e, x[j]->expected[k])) { 787 mpc_err_add_expected(i, e, x[j]->expected[k]); 788 } 789 } 790 } 791 792 for (j = 0; j < n; j++) { 793 if (x[j] == NULL) { continue; } 794 mpc_err_delete_internal(i, x[j]); 795 } 796 797 return e; 798 } 799 800 static mpc_err_t *mpc_err_repeat(mpc_input_t *i, mpc_err_t *x, const char *prefix) { 801 802 int j = 0; 803 size_t l = 0; 804 char *expect = NULL; 805 806 if (x == NULL) { return NULL; } 807 808 if (x->expected_num == 0) { 809 expect = mpc_calloc(i, 1, 1); 810 x->expected_num = 1; 811 x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); 812 x->expected[0] = expect; 813 return x; 814 } 815 816 else if (x->expected_num == 1) { 817 expect = mpc_malloc(i, strlen(prefix) + strlen(x->expected[0]) + 1); 818 strcpy(expect, prefix); 819 strcat(expect, x->expected[0]); 820 mpc_free(i, x->expected[0]); 821 x->expected[0] = expect; 822 return x; 823 } 824 825 else if (x->expected_num > 1) { 826 827 l += strlen(prefix); 828 for (j = 0; j < x->expected_num-2; j++) { 829 l += strlen(x->expected[j]) + strlen(", "); 830 } 831 l += strlen(x->expected[x->expected_num-2]); 832 l += strlen(" or "); 833 l += strlen(x->expected[x->expected_num-1]); 834 835 expect = mpc_malloc(i, l + 1); 836 837 strcpy(expect, prefix); 838 for (j = 0; j < x->expected_num-2; j++) { 839 strcat(expect, x->expected[j]); strcat(expect, ", "); 840 } 841 strcat(expect, x->expected[x->expected_num-2]); 842 strcat(expect, " or "); 843 strcat(expect, x->expected[x->expected_num-1]); 844 845 for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } 846 847 x->expected_num = 1; 848 x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); 849 x->expected[0] = expect; 850 return x; 851 } 852 853 return NULL; 854 } 855 856 static mpc_err_t *mpc_err_many1(mpc_input_t *i, mpc_err_t *x) { 857 return mpc_err_repeat(i, x, "one or more of "); 858 } 859 860 static mpc_err_t *mpc_err_count(mpc_input_t *i, mpc_err_t *x, int n) { 861 mpc_err_t *y; 862 int digits = n/10 + 1; 863 char *prefix; 864 prefix = mpc_malloc(i, digits + strlen(" of ") + 1); 865 if (!prefix) { 866 return NULL; 867 } 868 sprintf(prefix, "%i of ", n); 869 y = mpc_err_repeat(i, x, prefix); 870 mpc_free(i, prefix); 871 return y; 872 } 873 874 static mpc_err_t *mpc_err_merge(mpc_input_t *i, mpc_err_t *x, mpc_err_t *y) { 875 mpc_err_t *errs[2]; 876 errs[0] = x; 877 errs[1] = y; 878 return mpc_err_or(i, errs, 2); 879 } 880 881 /* 882 ** Parser Type 883 */ 884 885 enum { 886 MPC_TYPE_UNDEFINED = 0, 887 MPC_TYPE_PASS = 1, 888 MPC_TYPE_FAIL = 2, 889 MPC_TYPE_LIFT = 3, 890 MPC_TYPE_LIFT_VAL = 4, 891 MPC_TYPE_EXPECT = 5, 892 MPC_TYPE_ANCHOR = 6, 893 MPC_TYPE_STATE = 7, 894 895 MPC_TYPE_ANY = 8, 896 MPC_TYPE_SINGLE = 9, 897 MPC_TYPE_ONEOF = 10, 898 MPC_TYPE_NONEOF = 11, 899 MPC_TYPE_RANGE = 12, 900 MPC_TYPE_SATISFY = 13, 901 MPC_TYPE_STRING = 14, 902 903 MPC_TYPE_APPLY = 15, 904 MPC_TYPE_APPLY_TO = 16, 905 MPC_TYPE_PREDICT = 17, 906 MPC_TYPE_NOT = 18, 907 MPC_TYPE_MAYBE = 19, 908 MPC_TYPE_MANY = 20, 909 MPC_TYPE_MANY1 = 21, 910 MPC_TYPE_COUNT = 22, 911 912 MPC_TYPE_OR = 23, 913 MPC_TYPE_AND = 24, 914 915 MPC_TYPE_CHECK = 25, 916 MPC_TYPE_CHECK_WITH = 26, 917 918 MPC_TYPE_SOI = 27, 919 MPC_TYPE_EOI = 28 920 }; 921 922 typedef struct { char *m; } mpc_pdata_fail_t; 923 typedef struct { mpc_ctor_t lf; void *x; } mpc_pdata_lift_t; 924 typedef struct { mpc_parser_t *x; char *m; } mpc_pdata_expect_t; 925 typedef struct { int(*f)(char,char); } mpc_pdata_anchor_t; 926 typedef struct { char x; } mpc_pdata_single_t; 927 typedef struct { char x; char y; } mpc_pdata_range_t; 928 typedef struct { int(*f)(char); } mpc_pdata_satisfy_t; 929 typedef struct { char *x; } mpc_pdata_string_t; 930 typedef struct { mpc_parser_t *x; mpc_apply_t f; } mpc_pdata_apply_t; 931 typedef struct { mpc_parser_t *x; mpc_apply_to_t f; void *d; } mpc_pdata_apply_to_t; 932 typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_check_t f; char *e; } mpc_pdata_check_t; 933 typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_check_with_t f; void *d; char *e; } mpc_pdata_check_with_t; 934 typedef struct { mpc_parser_t *x; } mpc_pdata_predict_t; 935 typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_ctor_t lf; } mpc_pdata_not_t; 936 typedef struct { int n; mpc_fold_t f; mpc_parser_t *x; mpc_dtor_t dx; } mpc_pdata_repeat_t; 937 typedef struct { int n; mpc_parser_t **xs; } mpc_pdata_or_t; 938 typedef struct { int n; mpc_fold_t f; mpc_parser_t **xs; mpc_dtor_t *dxs; } mpc_pdata_and_t; 939 940 typedef union { 941 mpc_pdata_fail_t fail; 942 mpc_pdata_lift_t lift; 943 mpc_pdata_expect_t expect; 944 mpc_pdata_anchor_t anchor; 945 mpc_pdata_single_t single; 946 mpc_pdata_range_t range; 947 mpc_pdata_satisfy_t satisfy; 948 mpc_pdata_string_t string; 949 mpc_pdata_apply_t apply; 950 mpc_pdata_apply_to_t apply_to; 951 mpc_pdata_check_t check; 952 mpc_pdata_check_with_t check_with; 953 mpc_pdata_predict_t predict; 954 mpc_pdata_not_t not; 955 mpc_pdata_repeat_t repeat; 956 mpc_pdata_and_t and; 957 mpc_pdata_or_t or; 958 } mpc_pdata_t; 959 960 struct mpc_parser_t { 961 char *name; 962 mpc_pdata_t data; 963 char type; 964 char retained; 965 }; 966 967 static mpc_val_t *mpcf_input_nth_free(mpc_input_t *i, int n, mpc_val_t **xs, int x) { 968 int j; 969 for (j = 0; j < n; j++) { if (j != x) { mpc_free(i, xs[j]); } } 970 return xs[x]; 971 } 972 973 static mpc_val_t *mpcf_input_fst_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 0); } 974 static mpc_val_t *mpcf_input_snd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 1); } 975 static mpc_val_t *mpcf_input_trd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 2); } 976 977 static mpc_val_t *mpcf_input_strfold(mpc_input_t *i, int n, mpc_val_t **xs) { 978 int j; 979 size_t l = 0; 980 if (n == 0) { return mpc_calloc(i, 1, 1); } 981 for (j = 0; j < n; j++) { l += strlen(xs[j]); } 982 xs[0] = mpc_realloc(i, xs[0], l + 1); 983 for (j = 1; j < n; j++) { strcat(xs[0], xs[j]); mpc_free(i, xs[j]); } 984 return xs[0]; 985 } 986 987 static mpc_val_t *mpcf_input_state_ast(mpc_input_t *i, int n, mpc_val_t **xs) { 988 mpc_state_t *s = ((mpc_state_t**)xs)[0]; 989 mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; 990 a = mpc_ast_state(a, *s); 991 mpc_free(i, s); 992 (void) n; 993 return a; 994 } 995 996 static mpc_val_t *mpc_parse_fold(mpc_input_t *i, mpc_fold_t f, int n, mpc_val_t **xs) { 997 int j; 998 if (f == mpcf_null) { return mpcf_null(n, xs); } 999 if (f == mpcf_fst) { return mpcf_fst(n, xs); } 1000 if (f == mpcf_snd) { return mpcf_snd(n, xs); } 1001 if (f == mpcf_trd) { return mpcf_trd(n, xs); } 1002 if (f == mpcf_fst_free) { return mpcf_input_fst_free(i, n, xs); } 1003 if (f == mpcf_snd_free) { return mpcf_input_snd_free(i, n, xs); } 1004 if (f == mpcf_trd_free) { return mpcf_input_trd_free(i, n, xs); } 1005 if (f == mpcf_strfold) { return mpcf_input_strfold(i, n, xs); } 1006 if (f == mpcf_state_ast) { return mpcf_input_state_ast(i, n, xs); } 1007 for (j = 0; j < n; j++) { xs[j] = mpc_export(i, xs[j]); } 1008 return f(j, xs); 1009 } 1010 1011 static mpc_val_t *mpcf_input_free(mpc_input_t *i, mpc_val_t *x) { 1012 mpc_free(i, x); 1013 return NULL; 1014 } 1015 1016 static mpc_val_t *mpcf_input_str_ast(mpc_input_t *i, mpc_val_t *c) { 1017 mpc_ast_t *a = mpc_ast_new("", c); 1018 mpc_free(i, c); 1019 return a; 1020 } 1021 1022 static mpc_val_t *mpc_parse_apply(mpc_input_t *i, mpc_apply_t f, mpc_val_t *x) { 1023 if (f == mpcf_free) { return mpcf_input_free(i, x); } 1024 if (f == mpcf_str_ast) { return mpcf_input_str_ast(i, x); } 1025 return f(mpc_export(i, x)); 1026 } 1027 1028 static mpc_val_t *mpc_parse_apply_to(mpc_input_t *i, mpc_apply_to_t f, mpc_val_t *x, mpc_val_t *d) { 1029 return f(mpc_export(i, x), d); 1030 } 1031 1032 static void mpc_parse_dtor(mpc_input_t *i, mpc_dtor_t d, mpc_val_t *x) { 1033 if (d == free) { mpc_free(i, x); return; } 1034 d(mpc_export(i, x)); 1035 } 1036 1037 enum { 1038 MPC_PARSE_STACK_MIN = 4 1039 }; 1040 1041 #define MPC_SUCCESS(x) r->output = x; return 1 1042 #define MPC_FAILURE(x) r->error = x; return 0 1043 #define MPC_PRIMITIVE(x) \ 1044 if (x) { MPC_SUCCESS(r->output); } \ 1045 else { MPC_FAILURE(NULL); } 1046 1047 #define MPC_MAX_RECURSION_DEPTH 1000 1048 1049 static int mpc_parse_run(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r, mpc_err_t **e, int depth) { 1050 1051 int j = 0, k = 0; 1052 mpc_result_t results_stk[MPC_PARSE_STACK_MIN]; 1053 mpc_result_t *results; 1054 int results_slots = MPC_PARSE_STACK_MIN; 1055 1056 if (depth == MPC_MAX_RECURSION_DEPTH) 1057 { 1058 MPC_FAILURE(mpc_err_fail(i, "Maximum recursion depth exceeded!")); 1059 } 1060 1061 switch (p->type) { 1062 1063 /* Basic Parsers */ 1064 1065 case MPC_TYPE_ANY: MPC_PRIMITIVE(mpc_input_any(i, (char**)&r->output)); 1066 case MPC_TYPE_SINGLE: MPC_PRIMITIVE(mpc_input_char(i, p->data.single.x, (char**)&r->output)); 1067 case MPC_TYPE_RANGE: MPC_PRIMITIVE(mpc_input_range(i, p->data.range.x, p->data.range.y, (char**)&r->output)); 1068 case MPC_TYPE_ONEOF: MPC_PRIMITIVE(mpc_input_oneof(i, p->data.string.x, (char**)&r->output)); 1069 case MPC_TYPE_NONEOF: MPC_PRIMITIVE(mpc_input_noneof(i, p->data.string.x, (char**)&r->output)); 1070 case MPC_TYPE_SATISFY: MPC_PRIMITIVE(mpc_input_satisfy(i, p->data.satisfy.f, (char**)&r->output)); 1071 case MPC_TYPE_STRING: MPC_PRIMITIVE(mpc_input_string(i, p->data.string.x, (char**)&r->output)); 1072 case MPC_TYPE_ANCHOR: MPC_PRIMITIVE(mpc_input_anchor(i, p->data.anchor.f, (char**)&r->output)); 1073 case MPC_TYPE_SOI: MPC_PRIMITIVE(mpc_input_soi(i, (char**)&r->output)); 1074 case MPC_TYPE_EOI: MPC_PRIMITIVE(mpc_input_eoi(i, (char**)&r->output)); 1075 1076 /* Other parsers */ 1077 1078 case MPC_TYPE_UNDEFINED: MPC_FAILURE(mpc_err_fail(i, "Parser Undefined!")); 1079 case MPC_TYPE_PASS: MPC_SUCCESS(NULL); 1080 case MPC_TYPE_FAIL: MPC_FAILURE(mpc_err_fail(i, p->data.fail.m)); 1081 case MPC_TYPE_LIFT: MPC_SUCCESS(p->data.lift.lf()); 1082 case MPC_TYPE_LIFT_VAL: MPC_SUCCESS(p->data.lift.x); 1083 case MPC_TYPE_STATE: MPC_SUCCESS(mpc_input_state_copy(i)); 1084 1085 /* Application Parsers */ 1086 1087 case MPC_TYPE_APPLY: 1088 if (mpc_parse_run(i, p->data.apply.x, r, e, depth+1)) { 1089 MPC_SUCCESS(mpc_parse_apply(i, p->data.apply.f, r->output)); 1090 } else { 1091 MPC_FAILURE(r->output); 1092 } 1093 1094 case MPC_TYPE_APPLY_TO: 1095 if (mpc_parse_run(i, p->data.apply_to.x, r, e, depth+1)) { 1096 MPC_SUCCESS(mpc_parse_apply_to(i, p->data.apply_to.f, r->output, p->data.apply_to.d)); 1097 } else { 1098 MPC_FAILURE(r->error); 1099 } 1100 1101 case MPC_TYPE_CHECK: 1102 if (mpc_parse_run(i, p->data.check.x, r, e, depth+1)) { 1103 if (p->data.check.f(&r->output)) { 1104 MPC_SUCCESS(r->output); 1105 } else { 1106 mpc_parse_dtor(i, p->data.check.dx, r->output); 1107 MPC_FAILURE(mpc_err_fail(i, p->data.check.e)); 1108 } 1109 } else { 1110 MPC_FAILURE(r->error); 1111 } 1112 1113 case MPC_TYPE_CHECK_WITH: 1114 if (mpc_parse_run(i, p->data.check_with.x, r, e, depth+1)) { 1115 if (p->data.check_with.f(&r->output, p->data.check_with.d)) { 1116 MPC_SUCCESS(r->output); 1117 } else { 1118 mpc_parse_dtor(i, p->data.check.dx, r->output); 1119 MPC_FAILURE(mpc_err_fail(i, p->data.check_with.e)); 1120 } 1121 } else { 1122 MPC_FAILURE(r->error); 1123 } 1124 1125 case MPC_TYPE_EXPECT: 1126 mpc_input_suppress_enable(i); 1127 if (mpc_parse_run(i, p->data.expect.x, r, e, depth+1)) { 1128 mpc_input_suppress_disable(i); 1129 MPC_SUCCESS(r->output); 1130 } else { 1131 mpc_input_suppress_disable(i); 1132 MPC_FAILURE(mpc_err_new(i, p->data.expect.m)); 1133 } 1134 1135 case MPC_TYPE_PREDICT: 1136 mpc_input_backtrack_disable(i); 1137 if (mpc_parse_run(i, p->data.predict.x, r, e, depth+1)) { 1138 mpc_input_backtrack_enable(i); 1139 MPC_SUCCESS(r->output); 1140 } else { 1141 mpc_input_backtrack_enable(i); 1142 MPC_FAILURE(r->error); 1143 } 1144 1145 /* Optional Parsers */ 1146 1147 /* TODO: Update Not Error Message */ 1148 1149 case MPC_TYPE_NOT: 1150 mpc_input_mark(i); 1151 mpc_input_suppress_enable(i); 1152 if (mpc_parse_run(i, p->data.not.x, r, e, depth+1)) { 1153 mpc_input_rewind(i); 1154 mpc_input_suppress_disable(i); 1155 mpc_parse_dtor(i, p->data.not.dx, r->output); 1156 MPC_FAILURE(mpc_err_new(i, "opposite")); 1157 } else { 1158 mpc_input_unmark(i); 1159 mpc_input_suppress_disable(i); 1160 MPC_SUCCESS(p->data.not.lf()); 1161 } 1162 1163 case MPC_TYPE_MAYBE: 1164 if (mpc_parse_run(i, p->data.not.x, r, e, depth+1)) { 1165 MPC_SUCCESS(r->output); 1166 } else { 1167 *e = mpc_err_merge(i, *e, r->error); 1168 MPC_SUCCESS(p->data.not.lf()); 1169 } 1170 1171 /* Repeat Parsers */ 1172 1173 case MPC_TYPE_MANY: 1174 1175 results = results_stk; 1176 1177 while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { 1178 j++; 1179 if (j == MPC_PARSE_STACK_MIN) { 1180 results_slots = j + j / 2; 1181 results = mpc_malloc(i, sizeof(mpc_result_t) * results_slots); 1182 memcpy(results, results_stk, sizeof(mpc_result_t) * MPC_PARSE_STACK_MIN); 1183 } else if (j >= results_slots) { 1184 results_slots = j + j / 2; 1185 results = mpc_realloc(i, results, sizeof(mpc_result_t) * results_slots); 1186 } 1187 } 1188 1189 *e = mpc_err_merge(i, *e, results[j].error); 1190 1191 MPC_SUCCESS( 1192 mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); 1193 if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1194 1195 case MPC_TYPE_MANY1: 1196 1197 results = results_stk; 1198 1199 while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { 1200 j++; 1201 if (j == MPC_PARSE_STACK_MIN) { 1202 results_slots = j + j / 2; 1203 results = mpc_malloc(i, sizeof(mpc_result_t) * results_slots); 1204 memcpy(results, results_stk, sizeof(mpc_result_t) * MPC_PARSE_STACK_MIN); 1205 } else if (j >= results_slots) { 1206 results_slots = j + j / 2; 1207 results = mpc_realloc(i, results, sizeof(mpc_result_t) * results_slots); 1208 } 1209 } 1210 1211 if (j == 0) { 1212 MPC_FAILURE( 1213 mpc_err_many1(i, results[j].error); 1214 if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1215 } else { 1216 1217 *e = mpc_err_merge(i, *e, results[j].error); 1218 1219 MPC_SUCCESS( 1220 mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); 1221 if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1222 } 1223 1224 case MPC_TYPE_COUNT: 1225 1226 results = p->data.repeat.n > MPC_PARSE_STACK_MIN 1227 ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.repeat.n) 1228 : results_stk; 1229 1230 while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { 1231 j++; 1232 if (j == p->data.repeat.n) { break; } 1233 } 1234 1235 if (j == p->data.repeat.n) { 1236 MPC_SUCCESS( 1237 mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); 1238 if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1239 } else { 1240 for (k = 0; k < j; k++) { 1241 mpc_parse_dtor(i, p->data.repeat.dx, results[k].output); 1242 } 1243 MPC_FAILURE( 1244 mpc_err_count(i, results[j].error, p->data.repeat.n); 1245 if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1246 } 1247 1248 /* Combinatory Parsers */ 1249 1250 case MPC_TYPE_OR: 1251 1252 if (p->data.or.n == 0) { MPC_SUCCESS(NULL); } 1253 1254 results = p->data.or.n > MPC_PARSE_STACK_MIN 1255 ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) 1256 : results_stk; 1257 1258 for (j = 0; j < p->data.or.n; j++) { 1259 if (mpc_parse_run(i, p->data.or.xs[j], &results[j], e, depth+1)) { 1260 MPC_SUCCESS(results[j].output; 1261 if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1262 } else { 1263 *e = mpc_err_merge(i, *e, results[j].error); 1264 } 1265 } 1266 1267 MPC_FAILURE(NULL; 1268 if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1269 1270 case MPC_TYPE_AND: 1271 1272 if (p->data.and.n == 0) { MPC_SUCCESS(NULL); } 1273 1274 results = p->data.or.n > MPC_PARSE_STACK_MIN 1275 ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) 1276 : results_stk; 1277 1278 mpc_input_mark(i); 1279 for (j = 0; j < p->data.and.n; j++) { 1280 if (!mpc_parse_run(i, p->data.and.xs[j], &results[j], e, depth+1)) { 1281 mpc_input_rewind(i); 1282 for (k = 0; k < j; k++) { 1283 mpc_parse_dtor(i, p->data.and.dxs[k], results[k].output); 1284 } 1285 MPC_FAILURE(results[j].error; 1286 if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1287 } 1288 } 1289 mpc_input_unmark(i); 1290 MPC_SUCCESS( 1291 mpc_parse_fold(i, p->data.and.f, j, (mpc_val_t**)results); 1292 if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); 1293 1294 /* End */ 1295 1296 default: 1297 1298 MPC_FAILURE(mpc_err_fail(i, "Unknown Parser Type Id!")); 1299 } 1300 1301 return 0; 1302 1303 } 1304 1305 #undef MPC_SUCCESS 1306 #undef MPC_FAILURE 1307 #undef MPC_PRIMITIVE 1308 1309 int mpc_parse_input(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r) { 1310 int x; 1311 mpc_err_t *e = mpc_err_fail(i, "Unknown Error"); 1312 e->state = mpc_state_invalid(); 1313 x = mpc_parse_run(i, p, r, &e, 0); 1314 if (x) { 1315 mpc_err_delete_internal(i, e); 1316 r->output = mpc_export(i, r->output); 1317 } else { 1318 r->error = mpc_err_export(i, mpc_err_merge(i, e, r->error)); 1319 } 1320 return x; 1321 } 1322 1323 int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r) { 1324 int x; 1325 mpc_input_t *i = mpc_input_new_string(filename, string); 1326 x = mpc_parse_input(i, p, r); 1327 mpc_input_delete(i); 1328 return x; 1329 } 1330 1331 int mpc_nparse(const char *filename, const char *string, size_t length, mpc_parser_t *p, mpc_result_t *r) { 1332 int x; 1333 mpc_input_t *i = mpc_input_new_nstring(filename, string, length); 1334 x = mpc_parse_input(i, p, r); 1335 mpc_input_delete(i); 1336 return x; 1337 } 1338 1339 int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r) { 1340 int x; 1341 mpc_input_t *i = mpc_input_new_file(filename, file); 1342 x = mpc_parse_input(i, p, r); 1343 mpc_input_delete(i); 1344 return x; 1345 } 1346 1347 int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r) { 1348 int x; 1349 mpc_input_t *i = mpc_input_new_pipe(filename, pipe); 1350 x = mpc_parse_input(i, p, r); 1351 mpc_input_delete(i); 1352 return x; 1353 } 1354 1355 int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r) { 1356 1357 FILE *f = fopen(filename, "rb"); 1358 int res; 1359 1360 if (f == NULL) { 1361 r->output = NULL; 1362 r->error = mpc_err_file(filename, "Unable to open file!"); 1363 return 0; 1364 } 1365 1366 res = mpc_parse_file(filename, f, p, r); 1367 fclose(f); 1368 return res; 1369 } 1370 1371 /* 1372 ** Building a Parser 1373 */ 1374 1375 static void mpc_undefine_unretained(mpc_parser_t *p, int force); 1376 1377 static void mpc_undefine_or(mpc_parser_t *p) { 1378 1379 int i; 1380 for (i = 0; i < p->data.or.n; i++) { 1381 mpc_undefine_unretained(p->data.or.xs[i], 0); 1382 } 1383 free(p->data.or.xs); 1384 1385 } 1386 1387 static void mpc_undefine_and(mpc_parser_t *p) { 1388 1389 int i; 1390 for (i = 0; i < p->data.and.n; i++) { 1391 mpc_undefine_unretained(p->data.and.xs[i], 0); 1392 } 1393 free(p->data.and.xs); 1394 free(p->data.and.dxs); 1395 1396 } 1397 1398 static void mpc_undefine_unretained(mpc_parser_t *p, int force) { 1399 1400 if (p->retained && !force) { return; } 1401 1402 switch (p->type) { 1403 1404 case MPC_TYPE_FAIL: free(p->data.fail.m); break; 1405 1406 case MPC_TYPE_ONEOF: 1407 case MPC_TYPE_NONEOF: 1408 case MPC_TYPE_STRING: 1409 free(p->data.string.x); 1410 break; 1411 1412 case MPC_TYPE_APPLY: mpc_undefine_unretained(p->data.apply.x, 0); break; 1413 case MPC_TYPE_APPLY_TO: mpc_undefine_unretained(p->data.apply_to.x, 0); break; 1414 case MPC_TYPE_PREDICT: mpc_undefine_unretained(p->data.predict.x, 0); break; 1415 1416 case MPC_TYPE_MAYBE: 1417 case MPC_TYPE_NOT: 1418 mpc_undefine_unretained(p->data.not.x, 0); 1419 break; 1420 1421 case MPC_TYPE_EXPECT: 1422 mpc_undefine_unretained(p->data.expect.x, 0); 1423 free(p->data.expect.m); 1424 break; 1425 1426 case MPC_TYPE_MANY: 1427 case MPC_TYPE_MANY1: 1428 case MPC_TYPE_COUNT: 1429 mpc_undefine_unretained(p->data.repeat.x, 0); 1430 break; 1431 1432 case MPC_TYPE_OR: mpc_undefine_or(p); break; 1433 case MPC_TYPE_AND: mpc_undefine_and(p); break; 1434 1435 case MPC_TYPE_CHECK: 1436 mpc_undefine_unretained(p->data.check.x, 0); 1437 free(p->data.check.e); 1438 break; 1439 1440 case MPC_TYPE_CHECK_WITH: 1441 mpc_undefine_unretained(p->data.check_with.x, 0); 1442 free(p->data.check_with.e); 1443 break; 1444 1445 default: break; 1446 } 1447 1448 if (!force) { 1449 free(p->name); 1450 free(p); 1451 } 1452 1453 } 1454 1455 void mpc_delete(mpc_parser_t *p) { 1456 if (p->retained) { 1457 1458 if (p->type != MPC_TYPE_UNDEFINED) { 1459 mpc_undefine_unretained(p, 0); 1460 } 1461 1462 free(p->name); 1463 free(p); 1464 1465 } else { 1466 mpc_undefine_unretained(p, 0); 1467 } 1468 } 1469 1470 static void mpc_soft_delete(mpc_val_t *x) { 1471 mpc_undefine_unretained(x, 0); 1472 } 1473 1474 static mpc_parser_t *mpc_undefined(void) { 1475 mpc_parser_t *p = calloc(1, sizeof(mpc_parser_t)); 1476 p->retained = 0; 1477 p->type = MPC_TYPE_UNDEFINED; 1478 p->name = NULL; 1479 return p; 1480 } 1481 1482 mpc_parser_t *mpc_new(const char *name) { 1483 mpc_parser_t *p = mpc_undefined(); 1484 p->retained = 1; 1485 p->name = realloc(p->name, strlen(name) + 1); 1486 strcpy(p->name, name); 1487 return p; 1488 } 1489 1490 mpc_parser_t *mpc_copy(mpc_parser_t *a) { 1491 int i = 0; 1492 mpc_parser_t *p; 1493 1494 if (a->retained) { return a; } 1495 1496 p = mpc_undefined(); 1497 p->retained = a->retained; 1498 p->type = a->type; 1499 p->data = a->data; 1500 1501 if (a->name) { 1502 p->name = malloc(strlen(a->name)+1); 1503 strcpy(p->name, a->name); 1504 } 1505 1506 switch (a->type) { 1507 1508 case MPC_TYPE_FAIL: 1509 p->data.fail.m = malloc(strlen(a->data.fail.m)+1); 1510 strcpy(p->data.fail.m, a->data.fail.m); 1511 break; 1512 1513 case MPC_TYPE_ONEOF: 1514 case MPC_TYPE_NONEOF: 1515 case MPC_TYPE_STRING: 1516 p->data.string.x = malloc(strlen(a->data.string.x)+1); 1517 strcpy(p->data.string.x, a->data.string.x); 1518 break; 1519 1520 case MPC_TYPE_APPLY: p->data.apply.x = mpc_copy(a->data.apply.x); break; 1521 case MPC_TYPE_APPLY_TO: p->data.apply_to.x = mpc_copy(a->data.apply_to.x); break; 1522 case MPC_TYPE_PREDICT: p->data.predict.x = mpc_copy(a->data.predict.x); break; 1523 1524 case MPC_TYPE_MAYBE: 1525 case MPC_TYPE_NOT: 1526 p->data.not.x = mpc_copy(a->data.not.x); 1527 break; 1528 1529 case MPC_TYPE_EXPECT: 1530 p->data.expect.x = mpc_copy(a->data.expect.x); 1531 p->data.expect.m = malloc(strlen(a->data.expect.m)+1); 1532 strcpy(p->data.expect.m, a->data.expect.m); 1533 break; 1534 1535 case MPC_TYPE_MANY: 1536 case MPC_TYPE_MANY1: 1537 case MPC_TYPE_COUNT: 1538 p->data.repeat.x = mpc_copy(a->data.repeat.x); 1539 break; 1540 1541 case MPC_TYPE_OR: 1542 p->data.or.xs = malloc(a->data.or.n * sizeof(mpc_parser_t*)); 1543 for (i = 0; i < a->data.or.n; i++) { 1544 p->data.or.xs[i] = mpc_copy(a->data.or.xs[i]); 1545 } 1546 break; 1547 case MPC_TYPE_AND: 1548 p->data.and.xs = malloc(a->data.and.n * sizeof(mpc_parser_t*)); 1549 for (i = 0; i < a->data.and.n; i++) { 1550 p->data.and.xs[i] = mpc_copy(a->data.and.xs[i]); 1551 } 1552 p->data.and.dxs = malloc((a->data.and.n-1) * sizeof(mpc_dtor_t)); 1553 for (i = 0; i < a->data.and.n-1; i++) { 1554 p->data.and.dxs[i] = a->data.and.dxs[i]; 1555 } 1556 break; 1557 1558 case MPC_TYPE_CHECK: 1559 p->data.check.x = mpc_copy(a->data.check.x); 1560 p->data.check.e = malloc(strlen(a->data.check.e)+1); 1561 strcpy(p->data.check.e, a->data.check.e); 1562 break; 1563 case MPC_TYPE_CHECK_WITH: 1564 p->data.check_with.x = mpc_copy(a->data.check_with.x); 1565 p->data.check_with.e = malloc(strlen(a->data.check_with.e)+1); 1566 strcpy(p->data.check_with.e, a->data.check_with.e); 1567 break; 1568 1569 default: break; 1570 } 1571 1572 1573 return p; 1574 } 1575 1576 mpc_parser_t *mpc_undefine(mpc_parser_t *p) { 1577 mpc_undefine_unretained(p, 1); 1578 p->type = MPC_TYPE_UNDEFINED; 1579 return p; 1580 } 1581 1582 mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a) { 1583 1584 if (p->retained) { 1585 p->type = a->type; 1586 p->data = a->data; 1587 } else { 1588 mpc_parser_t *a2 = mpc_failf("Attempt to assign to Unretained Parser!"); 1589 p->type = a2->type; 1590 p->data = a2->data; 1591 free(a2); 1592 } 1593 1594 free(a); 1595 return p; 1596 } 1597 1598 void mpc_cleanup(int n, ...) { 1599 int i; 1600 mpc_parser_t **list = malloc(sizeof(mpc_parser_t*) * n); 1601 1602 va_list va; 1603 va_start(va, n); 1604 for (i = 0; i < n; i++) { list[i] = va_arg(va, mpc_parser_t*); } 1605 for (i = 0; i < n; i++) { mpc_undefine(list[i]); } 1606 for (i = 0; i < n; i++) { mpc_delete(list[i]); } 1607 va_end(va); 1608 1609 free(list); 1610 } 1611 1612 mpc_parser_t *mpc_pass(void) { 1613 mpc_parser_t *p = mpc_undefined(); 1614 p->type = MPC_TYPE_PASS; 1615 return p; 1616 } 1617 1618 mpc_parser_t *mpc_fail(const char *m) { 1619 mpc_parser_t *p = mpc_undefined(); 1620 p->type = MPC_TYPE_FAIL; 1621 p->data.fail.m = malloc(strlen(m) + 1); 1622 strcpy(p->data.fail.m, m); 1623 return p; 1624 } 1625 1626 /* 1627 ** As `snprintf` is not ANSI standard this 1628 ** function `mpc_failf` should be considered 1629 ** unsafe. 1630 ** 1631 ** You have a few options if this is going to be 1632 ** trouble. 1633 ** 1634 ** - Ensure the format string does not exceed 1635 ** the buffer length using precision specifiers 1636 ** such as `%.512s`. 1637 ** 1638 ** - Patch this function in your code base to 1639 ** use `snprintf` or whatever variant your 1640 ** system supports. 1641 ** 1642 ** - Avoid it altogether. 1643 ** 1644 */ 1645 1646 mpc_parser_t *mpc_failf(const char *fmt, ...) { 1647 1648 va_list va; 1649 char *buffer; 1650 1651 mpc_parser_t *p = mpc_undefined(); 1652 p->type = MPC_TYPE_FAIL; 1653 1654 va_start(va, fmt); 1655 buffer = malloc(2048); 1656 if (!buffer) { 1657 return NULL; 1658 } 1659 vsprintf(buffer, fmt, va); 1660 va_end(va); 1661 1662 buffer = realloc(buffer, strlen(buffer) + 1); 1663 p->data.fail.m = buffer; 1664 return p; 1665 1666 } 1667 1668 mpc_parser_t *mpc_lift_val(mpc_val_t *x) { 1669 mpc_parser_t *p = mpc_undefined(); 1670 p->type = MPC_TYPE_LIFT_VAL; 1671 p->data.lift.x = x; 1672 return p; 1673 } 1674 1675 mpc_parser_t *mpc_lift(mpc_ctor_t lf) { 1676 mpc_parser_t *p = mpc_undefined(); 1677 p->type = MPC_TYPE_LIFT; 1678 p->data.lift.lf = lf; 1679 return p; 1680 } 1681 1682 mpc_parser_t *mpc_anchor(int(*f)(char,char)) { 1683 mpc_parser_t *p = mpc_undefined(); 1684 p->type = MPC_TYPE_ANCHOR; 1685 p->data.anchor.f = f; 1686 return mpc_expect(p, "anchor"); 1687 } 1688 1689 mpc_parser_t *mpc_state(void) { 1690 mpc_parser_t *p = mpc_undefined(); 1691 p->type = MPC_TYPE_STATE; 1692 return p; 1693 } 1694 1695 mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *expected) { 1696 mpc_parser_t *p = mpc_undefined(); 1697 p->type = MPC_TYPE_EXPECT; 1698 p->data.expect.x = a; 1699 p->data.expect.m = malloc(strlen(expected) + 1); 1700 strcpy(p->data.expect.m, expected); 1701 return p; 1702 } 1703 1704 /* 1705 ** As `snprintf` is not ANSI standard this 1706 ** function `mpc_expectf` should be considered 1707 ** unsafe. 1708 ** 1709 ** You have a few options if this is going to be 1710 ** trouble. 1711 ** 1712 ** - Ensure the format string does not exceed 1713 ** the buffer length using precision specifiers 1714 ** such as `%.512s`. 1715 ** 1716 ** - Patch this function in your code base to 1717 ** use `snprintf` or whatever variant your 1718 ** system supports. 1719 ** 1720 ** - Avoid it altogether. 1721 ** 1722 */ 1723 1724 mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...) { 1725 va_list va; 1726 char *buffer; 1727 1728 mpc_parser_t *p = mpc_undefined(); 1729 p->type = MPC_TYPE_EXPECT; 1730 1731 va_start(va, fmt); 1732 buffer = malloc(2048); 1733 if (!buffer) { 1734 return NULL; 1735 } 1736 vsprintf(buffer, fmt, va); 1737 va_end(va); 1738 1739 buffer = realloc(buffer, strlen(buffer) + 1); 1740 p->data.expect.x = a; 1741 p->data.expect.m = buffer; 1742 return p; 1743 } 1744 1745 /* 1746 ** Basic Parsers 1747 */ 1748 1749 mpc_parser_t *mpc_any(void) { 1750 mpc_parser_t *p = mpc_undefined(); 1751 p->type = MPC_TYPE_ANY; 1752 return mpc_expect(p, "any character"); 1753 } 1754 1755 mpc_parser_t *mpc_char(char c) { 1756 mpc_parser_t *p = mpc_undefined(); 1757 p->type = MPC_TYPE_SINGLE; 1758 p->data.single.x = c; 1759 return mpc_expectf(p, "'%c'", c); 1760 } 1761 1762 mpc_parser_t *mpc_range(char s, char e) { 1763 mpc_parser_t *p = mpc_undefined(); 1764 p->type = MPC_TYPE_RANGE; 1765 p->data.range.x = s; 1766 p->data.range.y = e; 1767 return mpc_expectf(p, "character between '%c' and '%c'", s, e); 1768 } 1769 1770 mpc_parser_t *mpc_oneof(const char *s) { 1771 mpc_parser_t *p = mpc_undefined(); 1772 p->type = MPC_TYPE_ONEOF; 1773 p->data.string.x = malloc(strlen(s) + 1); 1774 strcpy(p->data.string.x, s); 1775 return mpc_expectf(p, "one of '%s'", s); 1776 } 1777 1778 mpc_parser_t *mpc_noneof(const char *s) { 1779 mpc_parser_t *p = mpc_undefined(); 1780 p->type = MPC_TYPE_NONEOF; 1781 p->data.string.x = malloc(strlen(s) + 1); 1782 strcpy(p->data.string.x, s); 1783 return mpc_expectf(p, "none of '%s'", s); 1784 1785 } 1786 1787 mpc_parser_t *mpc_satisfy(int(*f)(char)) { 1788 mpc_parser_t *p = mpc_undefined(); 1789 p->type = MPC_TYPE_SATISFY; 1790 p->data.satisfy.f = f; 1791 return mpc_expectf(p, "character satisfying function %p", f); 1792 } 1793 1794 mpc_parser_t *mpc_string(const char *s) { 1795 mpc_parser_t *p = mpc_undefined(); 1796 p->type = MPC_TYPE_STRING; 1797 p->data.string.x = malloc(strlen(s) + 1); 1798 strcpy(p->data.string.x, s); 1799 return mpc_expectf(p, "\"%s\"", s); 1800 } 1801 1802 /* 1803 ** Core Parsers 1804 */ 1805 1806 mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f) { 1807 mpc_parser_t *p = mpc_undefined(); 1808 p->type = MPC_TYPE_APPLY; 1809 p->data.apply.x = a; 1810 p->data.apply.f = f; 1811 return p; 1812 } 1813 1814 mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x) { 1815 mpc_parser_t *p = mpc_undefined(); 1816 p->type = MPC_TYPE_APPLY_TO; 1817 p->data.apply_to.x = a; 1818 p->data.apply_to.f = f; 1819 p->data.apply_to.d = x; 1820 return p; 1821 } 1822 1823 mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e) { 1824 mpc_parser_t *p = mpc_undefined(); 1825 p->type = MPC_TYPE_CHECK; 1826 p->data.check.x = a; 1827 p->data.check.dx = da; 1828 p->data.check.f = f; 1829 p->data.check.e = malloc(strlen(e) + 1); 1830 strcpy(p->data.check.e, e); 1831 return p; 1832 } 1833 1834 mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e) { 1835 mpc_parser_t *p = mpc_undefined(); 1836 p->type = MPC_TYPE_CHECK_WITH; 1837 p->data.check_with.x = a; 1838 p->data.check_with.dx = da; 1839 p->data.check_with.f = f; 1840 p->data.check_with.d = x; 1841 p->data.check_with.e = malloc(strlen(e) + 1); 1842 strcpy(p->data.check_with.e, e); 1843 return p; 1844 } 1845 1846 mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...) { 1847 va_list va; 1848 char *buffer; 1849 mpc_parser_t *p; 1850 1851 va_start(va, fmt); 1852 buffer = malloc(2048); 1853 vsprintf(buffer, fmt, va); 1854 va_end(va); 1855 1856 p = mpc_check(a, da, f, buffer); 1857 free(buffer); 1858 1859 return p; 1860 } 1861 1862 mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...) { 1863 va_list va; 1864 char *buffer; 1865 mpc_parser_t *p; 1866 1867 va_start(va, fmt); 1868 buffer = malloc(2048); 1869 vsprintf(buffer, fmt, va); 1870 va_end(va); 1871 1872 p = mpc_check_with(a, da, f, x, buffer); 1873 free(buffer); 1874 1875 return p; 1876 } 1877 1878 mpc_parser_t *mpc_predictive(mpc_parser_t *a) { 1879 mpc_parser_t *p = mpc_undefined(); 1880 p->type = MPC_TYPE_PREDICT; 1881 p->data.predict.x = a; 1882 return p; 1883 } 1884 1885 mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf) { 1886 mpc_parser_t *p = mpc_undefined(); 1887 p->type = MPC_TYPE_NOT; 1888 p->data.not.x = a; 1889 p->data.not.dx = da; 1890 p->data.not.lf = lf; 1891 return p; 1892 } 1893 1894 mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da) { 1895 return mpc_not_lift(a, da, mpcf_ctor_null); 1896 } 1897 1898 mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf) { 1899 mpc_parser_t *p = mpc_undefined(); 1900 p->type = MPC_TYPE_MAYBE; 1901 p->data.not.x = a; 1902 p->data.not.lf = lf; 1903 return p; 1904 } 1905 1906 mpc_parser_t *mpc_maybe(mpc_parser_t *a) { 1907 return mpc_maybe_lift(a, mpcf_ctor_null); 1908 } 1909 1910 mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a) { 1911 mpc_parser_t *p = mpc_undefined(); 1912 p->type = MPC_TYPE_MANY; 1913 p->data.repeat.x = a; 1914 p->data.repeat.f = f; 1915 return p; 1916 } 1917 1918 mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a) { 1919 mpc_parser_t *p = mpc_undefined(); 1920 p->type = MPC_TYPE_MANY1; 1921 p->data.repeat.x = a; 1922 p->data.repeat.f = f; 1923 return p; 1924 } 1925 1926 mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da) { 1927 mpc_parser_t *p = mpc_undefined(); 1928 p->type = MPC_TYPE_COUNT; 1929 p->data.repeat.n = n; 1930 p->data.repeat.f = f; 1931 p->data.repeat.x = a; 1932 p->data.repeat.dx = da; 1933 return p; 1934 } 1935 1936 mpc_parser_t *mpc_or(int n, ...) { 1937 1938 int i; 1939 va_list va; 1940 1941 mpc_parser_t *p = mpc_undefined(); 1942 1943 p->type = MPC_TYPE_OR; 1944 p->data.or.n = n; 1945 p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); 1946 1947 va_start(va, n); 1948 for (i = 0; i < n; i++) { 1949 p->data.or.xs[i] = va_arg(va, mpc_parser_t*); 1950 } 1951 va_end(va); 1952 1953 return p; 1954 } 1955 1956 mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...) { 1957 1958 int i; 1959 va_list va; 1960 1961 mpc_parser_t *p = mpc_undefined(); 1962 1963 p->type = MPC_TYPE_AND; 1964 p->data.and.n = n; 1965 p->data.and.f = f; 1966 p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); 1967 p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); 1968 1969 va_start(va, f); 1970 for (i = 0; i < n; i++) { 1971 p->data.and.xs[i] = va_arg(va, mpc_parser_t*); 1972 } 1973 for (i = 0; i < (n-1); i++) { 1974 p->data.and.dxs[i] = va_arg(va, mpc_dtor_t); 1975 } 1976 va_end(va); 1977 1978 return p; 1979 } 1980 1981 /* 1982 ** Common Parsers 1983 */ 1984 1985 mpc_parser_t *mpc_soi(void) { 1986 mpc_parser_t *p = mpc_undefined(); 1987 p->type = MPC_TYPE_SOI; 1988 return mpc_expect(p, "start of input"); 1989 } 1990 1991 mpc_parser_t *mpc_eoi(void) { 1992 mpc_parser_t *p = mpc_undefined(); 1993 p->type = MPC_TYPE_EOI; 1994 return mpc_expect(p, "end of input"); 1995 } 1996 1997 static int mpc_boundary_anchor(char prev, char next) { 1998 const char* word = "abcdefghijklmnopqrstuvwxyz" 1999 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 2000 "0123456789_"; 2001 if ( strchr(word, next) && prev == '\0') { return 1; } 2002 if ( strchr(word, prev) && next == '\0') { return 1; } 2003 if ( strchr(word, next) && !strchr(word, prev)) { return 1; } 2004 if (!strchr(word, next) && strchr(word, prev)) { return 1; } 2005 return 0; 2006 } 2007 2008 static int mpc_boundary_newline_anchor(char prev, char next) { 2009 (void)next; 2010 return prev == '\n'; 2011 } 2012 2013 mpc_parser_t *mpc_boundary(void) { return mpc_expect(mpc_anchor(mpc_boundary_anchor), "word boundary"); } 2014 mpc_parser_t *mpc_boundary_newline(void) { return mpc_expect(mpc_anchor(mpc_boundary_newline_anchor), "start of newline"); } 2015 2016 mpc_parser_t *mpc_whitespace(void) { return mpc_expect(mpc_oneof(" \f\n\r\t\v"), "whitespace"); } 2017 mpc_parser_t *mpc_whitespaces(void) { return mpc_expect(mpc_many(mpcf_strfold, mpc_whitespace()), "spaces"); } 2018 mpc_parser_t *mpc_blank(void) { return mpc_expect(mpc_apply(mpc_whitespaces(), mpcf_free), "whitespace"); } 2019 2020 mpc_parser_t *mpc_newline(void) { return mpc_expect(mpc_char('\n'), "newline"); } 2021 mpc_parser_t *mpc_tab(void) { return mpc_expect(mpc_char('\t'), "tab"); } 2022 mpc_parser_t *mpc_escape(void) { return mpc_and(2, mpcf_strfold, mpc_char('\\'), mpc_any(), free); } 2023 2024 mpc_parser_t *mpc_digit(void) { return mpc_expect(mpc_oneof("0123456789"), "digit"); } 2025 mpc_parser_t *mpc_hexdigit(void) { return mpc_expect(mpc_oneof("0123456789ABCDEFabcdef"), "hex digit"); } 2026 mpc_parser_t *mpc_octdigit(void) { return mpc_expect(mpc_oneof("01234567"), "oct digit"); } 2027 mpc_parser_t *mpc_digits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_digit()), "digits"); } 2028 mpc_parser_t *mpc_hexdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_hexdigit()), "hex digits"); } 2029 mpc_parser_t *mpc_octdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_octdigit()), "oct digits"); } 2030 2031 mpc_parser_t *mpc_lower(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyz"), "lowercase letter"); } 2032 mpc_parser_t *mpc_upper(void) { return mpc_expect(mpc_oneof("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "uppercase letter"); } 2033 mpc_parser_t *mpc_alpha(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), "letter"); } 2034 mpc_parser_t *mpc_underscore(void) { return mpc_expect(mpc_char('_'), "underscore"); } 2035 mpc_parser_t *mpc_alphanum(void) { return mpc_expect(mpc_or(3, mpc_alpha(), mpc_digit(), mpc_underscore()), "alphanumeric"); } 2036 2037 mpc_parser_t *mpc_int(void) { return mpc_expect(mpc_apply(mpc_digits(), mpcf_int), "integer"); } 2038 mpc_parser_t *mpc_hex(void) { return mpc_expect(mpc_apply(mpc_hexdigits(), mpcf_hex), "hexadecimal"); } 2039 mpc_parser_t *mpc_oct(void) { return mpc_expect(mpc_apply(mpc_octdigits(), mpcf_oct), "octadecimal"); } 2040 mpc_parser_t *mpc_number(void) { return mpc_expect(mpc_or(3, mpc_int(), mpc_hex(), mpc_oct()), "number"); } 2041 2042 mpc_parser_t *mpc_real(void) { 2043 2044 /* [+-]?\d+(\.\d+)?([eE][+-]?[0-9]+)? */ 2045 2046 mpc_parser_t *p0, *p1, *p2, *p30, *p31, *p32, *p3; 2047 2048 p0 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); 2049 p1 = mpc_digits(); 2050 p2 = mpc_maybe_lift(mpc_and(2, mpcf_strfold, mpc_char('.'), mpc_digits(), free), mpcf_ctor_str); 2051 p30 = mpc_oneof("eE"); 2052 p31 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); 2053 p32 = mpc_digits(); 2054 p3 = mpc_maybe_lift(mpc_and(3, mpcf_strfold, p30, p31, p32, free, free), mpcf_ctor_str); 2055 2056 return mpc_expect(mpc_and(4, mpcf_strfold, p0, p1, p2, p3, free, free, free), "real"); 2057 2058 } 2059 2060 mpc_parser_t *mpc_float(void) { 2061 return mpc_expect(mpc_apply(mpc_real(), mpcf_float), "float"); 2062 } 2063 2064 mpc_parser_t *mpc_char_lit(void) { 2065 return mpc_expect(mpc_between(mpc_or(2, mpc_escape(), mpc_any()), free, "'", "'"), "char"); 2066 } 2067 2068 mpc_parser_t *mpc_string_lit(void) { 2069 mpc_parser_t *strchar = mpc_or(2, mpc_escape(), mpc_noneof("\"")); 2070 return mpc_expect(mpc_between(mpc_many(mpcf_strfold, strchar), free, "\"", "\""), "string"); 2071 } 2072 2073 mpc_parser_t *mpc_regex_lit(void) { 2074 mpc_parser_t *regexchar = mpc_or(2, mpc_escape(), mpc_noneof("/")); 2075 return mpc_expect(mpc_between(mpc_many(mpcf_strfold, regexchar), free, "/", "/"), "regex"); 2076 } 2077 2078 mpc_parser_t *mpc_ident(void) { 2079 mpc_parser_t *p0, *p1; 2080 p0 = mpc_or(2, mpc_alpha(), mpc_underscore()); 2081 p1 = mpc_many(mpcf_strfold, mpc_alphanum()); 2082 return mpc_and(2, mpcf_strfold, p0, p1, free); 2083 } 2084 2085 /* 2086 ** Useful Parsers 2087 */ 2088 2089 mpc_parser_t *mpc_startwith(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_soi(), a, mpcf_dtor_null); } 2090 mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(2, mpcf_fst, a, mpc_eoi(), da); } 2091 mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(3, mpcf_snd, mpc_soi(), a, mpc_eoi(), mpcf_dtor_null, da); } 2092 2093 mpc_parser_t *mpc_stripl(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_blank(), a, mpcf_dtor_null); } 2094 mpc_parser_t *mpc_stripr(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } 2095 mpc_parser_t *mpc_strip(mpc_parser_t *a) { return mpc_and(3, mpcf_snd, mpc_blank(), a, mpc_blank(), mpcf_dtor_null, mpcf_dtor_null); } 2096 mpc_parser_t *mpc_tok(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } 2097 mpc_parser_t *mpc_sym(const char *s) { return mpc_tok(mpc_string(s)); } 2098 2099 mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da) { return mpc_whole(mpc_strip(a), da); } 2100 2101 mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { 2102 return mpc_and(3, mpcf_snd_free, 2103 mpc_string(o), a, mpc_string(c), 2104 free, ad); 2105 } 2106 2107 mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "(", ")"); } 2108 mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "<", ">"); } 2109 mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "{", "}"); } 2110 mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "[", "]"); } 2111 2112 mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { 2113 return mpc_and(3, mpcf_snd_free, 2114 mpc_sym(o), mpc_tok(a), mpc_sym(c), 2115 free, ad); 2116 } 2117 2118 mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "(", ")"); } 2119 mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "<", ">"); } 2120 mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "{", "}"); } 2121 mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "[", "]"); } 2122 2123 /* 2124 ** Regular Expression Parsers 2125 */ 2126 2127 /* 2128 ** So here is a cute bootstrapping. 2129 ** 2130 ** I'm using the previously defined 2131 ** mpc constructs and functions to 2132 ** parse the user regex string and 2133 ** construct a parser from it. 2134 ** 2135 ** As it turns out lots of the standard 2136 ** mpc functions look a lot like `fold` 2137 ** functions and so can be used indirectly 2138 ** by many of the parsing functions to build 2139 ** a parser directly - as we are parsing. 2140 ** 2141 ** This is certainly something that 2142 ** would be less elegant/interesting 2143 ** in a two-phase parser which first 2144 ** builds an AST and then traverses it 2145 ** to generate the object. 2146 ** 2147 ** This whole thing acts as a great 2148 ** case study for how trivial it can be 2149 ** to write a great parser in a few 2150 ** lines of code using mpc. 2151 */ 2152 2153 /* 2154 ** 2155 ** ### Regular Expression Grammar 2156 ** 2157 ** <regex> : <term> | (<term> "|" <regex>) 2158 ** 2159 ** <term> : <factor>* 2160 ** 2161 ** <factor> : <base> 2162 ** | <base> "*" 2163 ** | <base> "+" 2164 ** | <base> "?" 2165 ** | <base> "{" <digits> "}" 2166 ** 2167 ** <base> : <char> 2168 ** | "\" <char> 2169 ** | "(" <regex> ")" 2170 ** | "[" <range> "]" 2171 */ 2172 2173 static mpc_val_t *mpcf_re_or(int n, mpc_val_t **xs) { 2174 (void) n; 2175 if (xs[1] == NULL) { return xs[0]; } 2176 else { return mpc_or(2, xs[0], xs[1]); } 2177 } 2178 2179 static mpc_val_t *mpcf_re_and(int n, mpc_val_t **xs) { 2180 int i; 2181 mpc_parser_t *p = mpc_lift(mpcf_ctor_str); 2182 for (i = 0; i < n; i++) { 2183 p = mpc_and(2, mpcf_strfold, p, xs[i], free); 2184 } 2185 return p; 2186 } 2187 2188 static mpc_val_t *mpcf_re_repeat(int n, mpc_val_t **xs) { 2189 int num; 2190 (void) n; 2191 if (xs[1] == NULL) { return xs[0]; } 2192 switch(((char*)xs[1])[0]) 2193 { 2194 case '*': { free(xs[1]); return mpc_many(mpcf_strfold, xs[0]); }; break; 2195 case '+': { free(xs[1]); return mpc_many1(mpcf_strfold, xs[0]); }; break; 2196 case '?': { free(xs[1]); return mpc_maybe_lift(xs[0], mpcf_ctor_str); }; break; 2197 default: 2198 num = *(int*)xs[1]; 2199 free(xs[1]); 2200 } 2201 2202 return mpc_count(num, mpcf_strfold, xs[0], free); 2203 } 2204 2205 static mpc_parser_t *mpc_re_escape_char(char c) { 2206 switch (c) { 2207 case 'a': return mpc_char('\a'); 2208 case 'f': return mpc_char('\f'); 2209 case 'n': return mpc_char('\n'); 2210 case 'r': return mpc_char('\r'); 2211 case 't': return mpc_char('\t'); 2212 case 'v': return mpc_char('\v'); 2213 case 'b': return mpc_and(2, mpcf_snd, mpc_boundary(), mpc_lift(mpcf_ctor_str), free); 2214 case 'B': return mpc_not_lift(mpc_boundary(), free, mpcf_ctor_str); 2215 case 'A': return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); 2216 case 'Z': return mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free); 2217 case 'd': return mpc_digit(); 2218 case 'D': return mpc_not_lift(mpc_digit(), free, mpcf_ctor_str); 2219 case 's': return mpc_whitespace(); 2220 case 'S': return mpc_not_lift(mpc_whitespace(), free, mpcf_ctor_str); 2221 case 'w': return mpc_alphanum(); 2222 case 'W': return mpc_not_lift(mpc_alphanum(), free, mpcf_ctor_str); 2223 default: return NULL; 2224 } 2225 } 2226 2227 static mpc_val_t *mpcf_re_escape(mpc_val_t *x, void* data) { 2228 2229 int mode = *((int*)data); 2230 char *s = x; 2231 mpc_parser_t *p; 2232 2233 /* Any Character */ 2234 if (s[0] == '.') { 2235 free(s); 2236 if (mode & MPC_RE_DOTALL) { 2237 return mpc_any(); 2238 } else { 2239 return mpc_expect(mpc_noneof("\n"), "any character except a newline"); 2240 } 2241 } 2242 2243 /* Start of Input */ 2244 if (s[0] == '^') { 2245 free(s); 2246 if (mode & MPC_RE_MULTILINE) { 2247 return mpc_and(2, mpcf_snd, mpc_or(2, mpc_soi(), mpc_boundary_newline()), mpc_lift(mpcf_ctor_str), free); 2248 } else { 2249 return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); 2250 } 2251 } 2252 2253 /* End of Input */ 2254 if (s[0] == '$') { 2255 free(s); 2256 if (mode & MPC_RE_MULTILINE) { 2257 return mpc_or(2, 2258 mpc_newline(), 2259 mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); 2260 } else { 2261 return mpc_or(2, 2262 mpc_and(2, mpcf_fst, mpc_newline(), mpc_eoi(), free), 2263 mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); 2264 } 2265 } 2266 2267 /* Regex Escape */ 2268 if (s[0] == '\\') { 2269 p = mpc_re_escape_char(s[1]); 2270 p = (p == NULL) ? mpc_char(s[1]) : p; 2271 free(s); 2272 return p; 2273 } 2274 2275 /* Regex Standard */ 2276 p = mpc_char(s[0]); 2277 free(s); 2278 return p; 2279 } 2280 2281 static const char *mpc_re_range_escape_char(char c) { 2282 switch (c) { 2283 case '-': return "-"; 2284 case 'a': return "\a"; 2285 case 'f': return "\f"; 2286 case 'n': return "\n"; 2287 case 'r': return "\r"; 2288 case 't': return "\t"; 2289 case 'v': return "\v"; 2290 case 'b': return "\b"; 2291 case 'd': return "0123456789"; 2292 case 's': return " \f\n\r\t\v"; 2293 case 'w': return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; 2294 default: return NULL; 2295 } 2296 } 2297 2298 static mpc_val_t *mpcf_re_range(mpc_val_t *x) { 2299 2300 mpc_parser_t *out; 2301 size_t i, j; 2302 size_t start, end; 2303 const char *tmp = NULL; 2304 const char *s = x; 2305 int comp = s[0] == '^' ? 1 : 0; 2306 char *range = calloc(1,1); 2307 2308 if (s[0] == '\0') { free(range); free(x); return mpc_fail("Invalid Regex Range Expression"); } 2309 if (s[0] == '^' && 2310 s[1] == '\0') { free(range); free(x); return mpc_fail("Invalid Regex Range Expression"); } 2311 2312 for (i = comp; i < strlen(s); i++){ 2313 2314 /* Regex Range Escape */ 2315 if (s[i] == '\\') { 2316 tmp = mpc_re_range_escape_char(s[i+1]); 2317 if (tmp != NULL) { 2318 range = realloc(range, strlen(range) + strlen(tmp) + 1); 2319 strcat(range, tmp); 2320 } else { 2321 range = realloc(range, strlen(range) + 1 + 1); 2322 range[strlen(range) + 1] = '\0'; 2323 range[strlen(range) + 0] = s[i+1]; 2324 } 2325 i++; 2326 } 2327 2328 /* Regex Range...Range */ 2329 else if (s[i] == '-') { 2330 if (s[i+1] == '\0' || i == 0) { 2331 range = realloc(range, strlen(range) + strlen("-") + 1); 2332 strcat(range, "-"); 2333 } else { 2334 start = s[i-1]+1; 2335 end = s[i+1]-1; 2336 for (j = start; j <= end; j++) { 2337 range = realloc(range, strlen(range) + 1 + 1 + 1); 2338 range[strlen(range) + 1] = '\0'; 2339 range[strlen(range) + 0] = (char)j; 2340 } 2341 } 2342 } 2343 2344 /* Regex Range Normal */ 2345 else { 2346 range = realloc(range, strlen(range) + 1 + 1); 2347 range[strlen(range) + 1] = '\0'; 2348 range[strlen(range) + 0] = s[i]; 2349 } 2350 2351 } 2352 2353 out = comp == 1 ? mpc_noneof(range) : mpc_oneof(range); 2354 2355 free(x); 2356 free(range); 2357 2358 return out; 2359 } 2360 2361 mpc_parser_t *mpc_re(const char *re) { 2362 return mpc_re_mode(re, MPC_RE_DEFAULT); 2363 } 2364 2365 mpc_parser_t *mpc_re_mode(const char *re, int mode) { 2366 2367 char *err_msg; 2368 mpc_parser_t *err_out; 2369 mpc_result_t r; 2370 mpc_parser_t *Regex, *Term, *Factor, *Base, *Range, *RegexEnclose; 2371 2372 Regex = mpc_new("regex"); 2373 Term = mpc_new("term"); 2374 Factor = mpc_new("factor"); 2375 Base = mpc_new("base"); 2376 Range = mpc_new("range"); 2377 2378 mpc_define(Regex, mpc_and(2, mpcf_re_or, 2379 Term, 2380 mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_char('|'), Regex, free)), 2381 (mpc_dtor_t)mpc_delete 2382 )); 2383 2384 mpc_define(Term, mpc_many(mpcf_re_and, Factor)); 2385 2386 mpc_define(Factor, mpc_and(2, mpcf_re_repeat, 2387 Base, 2388 mpc_or(5, 2389 mpc_char('*'), mpc_char('+'), mpc_char('?'), 2390 mpc_brackets(mpc_int(), free), 2391 mpc_pass()), 2392 (mpc_dtor_t)mpc_delete 2393 )); 2394 2395 mpc_define(Base, mpc_or(4, 2396 mpc_parens(Regex, (mpc_dtor_t)mpc_delete), 2397 mpc_squares(Range, (mpc_dtor_t)mpc_delete), 2398 mpc_apply_to(mpc_escape(), mpcf_re_escape, &mode), 2399 mpc_apply_to(mpc_noneof(")|"), mpcf_re_escape, &mode) 2400 )); 2401 2402 mpc_define(Range, mpc_apply( 2403 mpc_many(mpcf_strfold, mpc_or(2, mpc_escape(), mpc_noneof("]"))), 2404 mpcf_re_range 2405 )); 2406 2407 RegexEnclose = mpc_whole(mpc_predictive(Regex), (mpc_dtor_t)mpc_delete); 2408 2409 mpc_optimise(RegexEnclose); 2410 mpc_optimise(Regex); 2411 mpc_optimise(Term); 2412 mpc_optimise(Factor); 2413 mpc_optimise(Base); 2414 mpc_optimise(Range); 2415 2416 if(!mpc_parse("<mpc_re_compiler>", re, RegexEnclose, &r)) { 2417 err_msg = mpc_err_string(r.error); 2418 err_out = mpc_failf("Invalid Regex: %s", err_msg); 2419 mpc_err_delete(r.error); 2420 free(err_msg); 2421 r.output = err_out; 2422 } 2423 2424 mpc_cleanup(6, RegexEnclose, Regex, Term, Factor, Base, Range); 2425 2426 mpc_optimise(r.output); 2427 2428 return r.output; 2429 2430 } 2431 2432 /* 2433 ** Common Fold Functions 2434 */ 2435 2436 void mpcf_dtor_null(mpc_val_t *x) { (void) x; return; } 2437 2438 mpc_val_t *mpcf_ctor_null(void) { return NULL; } 2439 mpc_val_t *mpcf_ctor_str(void) { return calloc(1, 1); } 2440 mpc_val_t *mpcf_free(mpc_val_t *x) { free(x); return NULL; } 2441 2442 mpc_val_t *mpcf_int(mpc_val_t *x) { 2443 int *y = malloc(sizeof(int)); 2444 *y = strtol(x, NULL, 10); 2445 free(x); 2446 return y; 2447 } 2448 2449 mpc_val_t *mpcf_hex(mpc_val_t *x) { 2450 int *y = malloc(sizeof(int)); 2451 *y = strtol(x, NULL, 16); 2452 free(x); 2453 return y; 2454 } 2455 2456 mpc_val_t *mpcf_oct(mpc_val_t *x) { 2457 int *y = malloc(sizeof(int)); 2458 *y = strtol(x, NULL, 8); 2459 free(x); 2460 return y; 2461 } 2462 2463 mpc_val_t *mpcf_float(mpc_val_t *x) { 2464 float *y = malloc(sizeof(float)); 2465 *y = strtod(x, NULL); 2466 free(x); 2467 return y; 2468 } 2469 2470 mpc_val_t *mpcf_strtriml(mpc_val_t *x) { 2471 char *s = x; 2472 while (isspace((unsigned char)*s)) { 2473 memmove(s, s+1, strlen(s)); 2474 } 2475 return s; 2476 } 2477 2478 mpc_val_t *mpcf_strtrimr(mpc_val_t *x) { 2479 char *s = x; 2480 size_t l = strlen(s); 2481 while (l > 0 && isspace((unsigned char)s[l-1])) { 2482 s[l-1] = '\0'; l--; 2483 } 2484 return s; 2485 } 2486 2487 mpc_val_t *mpcf_strtrim(mpc_val_t *x) { 2488 return mpcf_strtriml(mpcf_strtrimr(x)); 2489 } 2490 2491 static const char mpc_escape_input_c[] = { 2492 '\a', '\b', '\f', '\n', '\r', 2493 '\t', '\v', '\\', '\'', '\"', '\0'}; 2494 2495 static const char *mpc_escape_output_c[] = { 2496 "\\a", "\\b", "\\f", "\\n", "\\r", "\\t", 2497 "\\v", "\\\\", "\\'", "\\\"", "\\0", NULL}; 2498 2499 static const char mpc_escape_input_raw_re[] = { '/' }; 2500 static const char *mpc_escape_output_raw_re[] = { "\\/", NULL }; 2501 2502 static const char mpc_escape_input_raw_cstr[] = { '"' }; 2503 static const char *mpc_escape_output_raw_cstr[] = { "\\\"", NULL }; 2504 2505 static const char mpc_escape_input_raw_cchar[] = { '\'' }; 2506 static const char *mpc_escape_output_raw_cchar[] = { "\\'", NULL }; 2507 2508 static mpc_val_t *mpcf_escape_new(mpc_val_t *x, const char *input, const char **output) { 2509 2510 int i; 2511 int found; 2512 char buff[2]; 2513 char *s = x; 2514 char *y = calloc(1, 1); 2515 2516 while (*s) { 2517 2518 i = 0; 2519 found = 0; 2520 2521 while (output[i]) { 2522 if (*s == input[i]) { 2523 y = realloc(y, strlen(y) + strlen(output[i]) + 1); 2524 strcat(y, output[i]); 2525 found = 1; 2526 break; 2527 } 2528 i++; 2529 } 2530 2531 if (!found) { 2532 y = realloc(y, strlen(y) + 2); 2533 buff[0] = *s; buff[1] = '\0'; 2534 strcat(y, buff); 2535 } 2536 2537 s++; 2538 } 2539 2540 2541 return y; 2542 } 2543 2544 static mpc_val_t *mpcf_unescape_new(mpc_val_t *x, const char *input, const char **output) { 2545 2546 int i; 2547 int found = 0; 2548 char buff[2]; 2549 char *s = x; 2550 char *y = calloc(1, 1); 2551 2552 while (*s) { 2553 2554 i = 0; 2555 found = 0; 2556 2557 while (output[i]) { 2558 if ((*(s+0)) == output[i][0] && 2559 (*(s+1)) == output[i][1]) { 2560 y = realloc(y, strlen(y) + 1 + 1); 2561 buff[0] = input[i]; buff[1] = '\0'; 2562 strcat(y, buff); 2563 found = 1; 2564 s++; 2565 break; 2566 } 2567 i++; 2568 } 2569 2570 if (!found) { 2571 y = realloc(y, strlen(y) + 1 + 1); 2572 buff[0] = *s; buff[1] = '\0'; 2573 strcat(y, buff); 2574 } 2575 2576 if (*s == '\0') { break; } 2577 else { s++; } 2578 } 2579 2580 return y; 2581 2582 } 2583 2584 mpc_val_t *mpcf_escape(mpc_val_t *x) { 2585 mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_c, mpc_escape_output_c); 2586 free(x); 2587 return y; 2588 } 2589 2590 mpc_val_t *mpcf_unescape(mpc_val_t *x) { 2591 mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_c, mpc_escape_output_c); 2592 free(x); 2593 return y; 2594 } 2595 2596 mpc_val_t *mpcf_escape_regex(mpc_val_t *x) { 2597 mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); 2598 free(x); 2599 return y; 2600 } 2601 2602 mpc_val_t *mpcf_unescape_regex(mpc_val_t *x) { 2603 mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); 2604 free(x); 2605 return y; 2606 } 2607 2608 mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x) { 2609 mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); 2610 free(x); 2611 return y; 2612 } 2613 2614 mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x) { 2615 mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); 2616 free(x); 2617 return y; 2618 } 2619 2620 mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x) { 2621 mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); 2622 free(x); 2623 return y; 2624 } 2625 2626 mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x) { 2627 mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); 2628 free(x); 2629 return y; 2630 } 2631 2632 mpc_val_t *mpcf_null(int n, mpc_val_t** xs) { (void) n; (void) xs; return NULL; } 2633 mpc_val_t *mpcf_fst(int n, mpc_val_t **xs) { (void) n; return xs[0]; } 2634 mpc_val_t *mpcf_snd(int n, mpc_val_t **xs) { (void) n; return xs[1]; } 2635 mpc_val_t *mpcf_trd(int n, mpc_val_t **xs) { (void) n; return xs[2]; } 2636 2637 static mpc_val_t *mpcf_nth_free(int n, mpc_val_t **xs, int x) { 2638 int i; 2639 for (i = 0; i < n; i++) { 2640 if (i != x) { free(xs[i]); } 2641 } 2642 return xs[x]; 2643 } 2644 2645 mpc_val_t *mpcf_fst_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 0); } 2646 mpc_val_t *mpcf_snd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 1); } 2647 mpc_val_t *mpcf_trd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 2); } 2648 mpc_val_t *mpcf_all_free(int n, mpc_val_t** xs) { 2649 int i; 2650 for (i = 0; i < n; i++) { 2651 free(xs[i]); 2652 } 2653 return NULL; 2654 } 2655 2656 mpc_val_t *mpcf_strfold(int n, mpc_val_t **xs) { 2657 int i; 2658 size_t l = 0; 2659 2660 if (n == 0) { return calloc(1, 1); } 2661 2662 for (i = 0; i < n; i++) { l += strlen(xs[i]); } 2663 2664 xs[0] = realloc(xs[0], l + 1); 2665 2666 for (i = 1; i < n; i++) { 2667 strcat(xs[0], xs[i]); free(xs[i]); 2668 } 2669 2670 return xs[0]; 2671 } 2672 2673 /* 2674 ** Printing 2675 */ 2676 2677 static void mpc_print_unretained(mpc_parser_t *p, int force) { 2678 2679 /* TODO: Print Everything Escaped */ 2680 2681 int i; 2682 char *s, *e; 2683 char buff[2]; 2684 2685 if (p->retained && !force) {; 2686 if (p->name) { printf("<%s>", p->name); } 2687 else { printf("<anon>"); } 2688 return; 2689 } 2690 2691 if (p->type == MPC_TYPE_UNDEFINED) { printf("<?>"); } 2692 if (p->type == MPC_TYPE_PASS) { printf("<:>"); } 2693 if (p->type == MPC_TYPE_FAIL) { printf("<!>"); } 2694 if (p->type == MPC_TYPE_LIFT) { printf("<#>"); } 2695 if (p->type == MPC_TYPE_STATE) { printf("<S>"); } 2696 if (p->type == MPC_TYPE_ANCHOR) { printf("<@>"); } 2697 if (p->type == MPC_TYPE_EXPECT) { 2698 printf("%s", p->data.expect.m); 2699 /*mpc_print_unretained(p->data.expect.x, 0);*/ 2700 } 2701 2702 if (p->type == MPC_TYPE_ANY) { printf("<.>"); } 2703 if (p->type == MPC_TYPE_SATISFY) { printf("<f>"); } 2704 2705 if (p->type == MPC_TYPE_SINGLE) { 2706 buff[0] = p->data.single.x; buff[1] = '\0'; 2707 s = mpcf_escape_new( 2708 buff, 2709 mpc_escape_input_c, 2710 mpc_escape_output_c); 2711 printf("'%s'", s); 2712 free(s); 2713 } 2714 2715 if (p->type == MPC_TYPE_RANGE) { 2716 buff[0] = p->data.range.x; buff[1] = '\0'; 2717 s = mpcf_escape_new( 2718 buff, 2719 mpc_escape_input_c, 2720 mpc_escape_output_c); 2721 buff[0] = p->data.range.y; buff[1] = '\0'; 2722 e = mpcf_escape_new( 2723 buff, 2724 mpc_escape_input_c, 2725 mpc_escape_output_c); 2726 printf("[%s-%s]", s, e); 2727 free(s); 2728 free(e); 2729 } 2730 2731 if (p->type == MPC_TYPE_ONEOF) { 2732 s = mpcf_escape_new( 2733 p->data.string.x, 2734 mpc_escape_input_c, 2735 mpc_escape_output_c); 2736 printf("[%s]", s); 2737 free(s); 2738 } 2739 2740 if (p->type == MPC_TYPE_NONEOF) { 2741 s = mpcf_escape_new( 2742 p->data.string.x, 2743 mpc_escape_input_c, 2744 mpc_escape_output_c); 2745 printf("[^%s]", s); 2746 free(s); 2747 } 2748 2749 if (p->type == MPC_TYPE_STRING) { 2750 s = mpcf_escape_new( 2751 p->data.string.x, 2752 mpc_escape_input_c, 2753 mpc_escape_output_c); 2754 printf("\"%s\"", s); 2755 free(s); 2756 } 2757 2758 if (p->type == MPC_TYPE_APPLY) { mpc_print_unretained(p->data.apply.x, 0); } 2759 if (p->type == MPC_TYPE_APPLY_TO) { mpc_print_unretained(p->data.apply_to.x, 0); } 2760 if (p->type == MPC_TYPE_PREDICT) { mpc_print_unretained(p->data.predict.x, 0); } 2761 2762 if (p->type == MPC_TYPE_NOT) { mpc_print_unretained(p->data.not.x, 0); printf("!"); } 2763 if (p->type == MPC_TYPE_MAYBE) { mpc_print_unretained(p->data.not.x, 0); printf("?"); } 2764 2765 if (p->type == MPC_TYPE_MANY) { mpc_print_unretained(p->data.repeat.x, 0); printf("*"); } 2766 if (p->type == MPC_TYPE_MANY1) { mpc_print_unretained(p->data.repeat.x, 0); printf("+"); } 2767 if (p->type == MPC_TYPE_COUNT) { mpc_print_unretained(p->data.repeat.x, 0); printf("{%i}", p->data.repeat.n); } 2768 2769 if (p->type == MPC_TYPE_OR) { 2770 printf("("); 2771 for(i = 0; i < p->data.or.n-1; i++) { 2772 mpc_print_unretained(p->data.or.xs[i], 0); 2773 printf(" | "); 2774 } 2775 mpc_print_unretained(p->data.or.xs[p->data.or.n-1], 0); 2776 printf(")"); 2777 } 2778 2779 if (p->type == MPC_TYPE_AND) { 2780 printf("("); 2781 for(i = 0; i < p->data.and.n-1; i++) { 2782 mpc_print_unretained(p->data.and.xs[i], 0); 2783 printf(" "); 2784 } 2785 mpc_print_unretained(p->data.and.xs[p->data.and.n-1], 0); 2786 printf(")"); 2787 } 2788 2789 if (p->type == MPC_TYPE_CHECK) { 2790 mpc_print_unretained(p->data.check.x, 0); 2791 printf("->?"); 2792 } 2793 if (p->type == MPC_TYPE_CHECK_WITH) { 2794 mpc_print_unretained(p->data.check_with.x, 0); 2795 printf("->?"); 2796 } 2797 2798 } 2799 2800 void mpc_print(mpc_parser_t *p) { 2801 mpc_print_unretained(p, 1); 2802 printf("\n"); 2803 } 2804 2805 /* 2806 ** Testing 2807 */ 2808 2809 /* 2810 ** These functions are slightly unwieldy and 2811 ** also the whole of the testing suite for mpc 2812 ** mpc is pretty shaky. 2813 ** 2814 ** It could do with a lot more tests and more 2815 ** precision. Currently I am only really testing 2816 ** changes off of the examples. 2817 ** 2818 */ 2819 2820 int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, 2821 int(*tester)(const void*, const void*), 2822 mpc_dtor_t destructor, 2823 void(*printer)(const void*)) { 2824 mpc_result_t r; 2825 (void) printer; 2826 if (mpc_parse("<test>", s, p, &r)) { 2827 2828 if (tester(r.output, d)) { 2829 destructor(r.output); 2830 return 0; 2831 } else { 2832 destructor(r.output); 2833 return 1; 2834 } 2835 2836 } else { 2837 mpc_err_delete(r.error); 2838 return 1; 2839 } 2840 2841 } 2842 2843 int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, 2844 int(*tester)(const void*, const void*), 2845 mpc_dtor_t destructor, 2846 void(*printer)(const void*)) { 2847 2848 mpc_result_t r; 2849 if (mpc_parse("<test>", s, p, &r)) { 2850 2851 if (tester(r.output, d)) { 2852 destructor(r.output); 2853 return 1; 2854 } else { 2855 printf("Got "); printer(r.output); printf("\n"); 2856 printf("Expected "); printer(d); printf("\n"); 2857 destructor(r.output); 2858 return 0; 2859 } 2860 2861 } else { 2862 mpc_err_print(r.error); 2863 mpc_err_delete(r.error); 2864 return 0; 2865 2866 } 2867 2868 } 2869 2870 2871 /* 2872 ** AST 2873 */ 2874 2875 void mpc_ast_delete(mpc_ast_t *a) { 2876 2877 int i; 2878 2879 if (a == NULL) { return; } 2880 2881 for (i = 0; i < a->children_num; i++) { 2882 mpc_ast_delete(a->children[i]); 2883 } 2884 2885 free(a->children); 2886 free(a->tag); 2887 free(a->contents); 2888 free(a); 2889 2890 } 2891 2892 static void mpc_ast_delete_no_children(mpc_ast_t *a) { 2893 free(a->children); 2894 free(a->tag); 2895 free(a->contents); 2896 free(a); 2897 } 2898 2899 mpc_ast_t *mpc_ast_new(const char *tag, const char *contents) { 2900 2901 mpc_ast_t *a = malloc(sizeof(mpc_ast_t)); 2902 2903 a->tag = malloc(strlen(tag) + 1); 2904 strcpy(a->tag, tag); 2905 2906 a->contents = malloc(strlen(contents) + 1); 2907 strcpy(a->contents, contents); 2908 2909 a->state = mpc_state_new(); 2910 2911 a->children_num = 0; 2912 a->children = NULL; 2913 return a; 2914 2915 } 2916 2917 mpc_ast_t *mpc_ast_build(int n, const char *tag, ...) { 2918 2919 mpc_ast_t *a = mpc_ast_new(tag, ""); 2920 2921 int i; 2922 va_list va; 2923 va_start(va, tag); 2924 2925 for (i = 0; i < n; i++) { 2926 mpc_ast_add_child(a, va_arg(va, mpc_ast_t*)); 2927 } 2928 2929 va_end(va); 2930 2931 return a; 2932 2933 } 2934 2935 mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a) { 2936 2937 mpc_ast_t *r; 2938 2939 if (a == NULL) { return a; } 2940 if (a->children_num == 0) { return a; } 2941 if (a->children_num == 1) { return a; } 2942 2943 r = mpc_ast_new(">", ""); 2944 mpc_ast_add_child(r, a); 2945 return r; 2946 } 2947 2948 int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b) { 2949 2950 int i; 2951 2952 if (strcmp(a->tag, b->tag) != 0) { return 0; } 2953 if (strcmp(a->contents, b->contents) != 0) { return 0; } 2954 if (a->children_num != b->children_num) { return 0; } 2955 2956 for (i = 0; i < a->children_num; i++) { 2957 if (!mpc_ast_eq(a->children[i], b->children[i])) { return 0; } 2958 } 2959 2960 return 1; 2961 } 2962 2963 mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a) { 2964 r->children_num++; 2965 r->children = realloc(r->children, sizeof(mpc_ast_t*) * r->children_num); 2966 r->children[r->children_num-1] = a; 2967 return r; 2968 } 2969 2970 mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t) { 2971 if (a == NULL) { return a; } 2972 a->tag = realloc(a->tag, strlen(t) + 1 + strlen(a->tag) + 1); 2973 memmove(a->tag + strlen(t) + 1, a->tag, strlen(a->tag)+1); 2974 memmove(a->tag, t, strlen(t)); 2975 memmove(a->tag + strlen(t), "|", 1); 2976 return a; 2977 } 2978 2979 mpc_ast_t *mpc_ast_add_root_tag(mpc_ast_t *a, const char *t) { 2980 if (a == NULL) { return a; } 2981 a->tag = realloc(a->tag, (strlen(t)-1) + strlen(a->tag) + 1); 2982 memmove(a->tag + (strlen(t)-1), a->tag, strlen(a->tag)+1); 2983 memmove(a->tag, t, (strlen(t)-1)); 2984 return a; 2985 } 2986 2987 mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t) { 2988 a->tag = realloc(a->tag, strlen(t) + 1); 2989 strcpy(a->tag, t); 2990 return a; 2991 } 2992 2993 mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s) { 2994 if (a == NULL) { return a; } 2995 a->state = s; 2996 return a; 2997 } 2998 2999 static void mpc_ast_print_depth(mpc_ast_t *a, int d, FILE *fp) { 3000 3001 int i; 3002 3003 if (a == NULL) { 3004 fprintf(fp, "NULL\n"); 3005 return; 3006 } 3007 3008 for (i = 0; i < d; i++) { fprintf(fp, " "); } 3009 3010 if (strlen(a->contents)) { 3011 fprintf(fp, "%s:%lu:%lu '%s'\n", a->tag, 3012 (long unsigned int)(a->state.row+1), 3013 (long unsigned int)(a->state.col+1), 3014 a->contents); 3015 } else { 3016 fprintf(fp, "%s \n", a->tag); 3017 } 3018 3019 for (i = 0; i < a->children_num; i++) { 3020 mpc_ast_print_depth(a->children[i], d+1, fp); 3021 } 3022 3023 } 3024 3025 void mpc_ast_print(mpc_ast_t *a) { 3026 mpc_ast_print_depth(a, 0, stdout); 3027 } 3028 3029 void mpc_ast_print_to(mpc_ast_t *a, FILE *fp) { 3030 mpc_ast_print_depth(a, 0, fp); 3031 } 3032 3033 int mpc_ast_get_index(mpc_ast_t *ast, const char *tag) { 3034 return mpc_ast_get_index_lb(ast, tag, 0); 3035 } 3036 3037 int mpc_ast_get_index_lb(mpc_ast_t *ast, const char *tag, int lb) { 3038 int i; 3039 3040 for(i=lb; i<ast->children_num; i++) { 3041 if(strcmp(ast->children[i]->tag, tag) == 0) { 3042 return i; 3043 } 3044 } 3045 3046 return -1; 3047 } 3048 3049 mpc_ast_t *mpc_ast_get_child(mpc_ast_t *ast, const char *tag) { 3050 return mpc_ast_get_child_lb(ast, tag, 0); 3051 } 3052 3053 mpc_ast_t *mpc_ast_get_child_lb(mpc_ast_t *ast, const char *tag, int lb) { 3054 int i; 3055 3056 for(i=lb; i<ast->children_num; i++) { 3057 if(strcmp(ast->children[i]->tag, tag) == 0) { 3058 return ast->children[i]; 3059 } 3060 } 3061 3062 return NULL; 3063 } 3064 3065 mpc_ast_trav_t *mpc_ast_traverse_start(mpc_ast_t *ast, 3066 mpc_ast_trav_order_t order) 3067 { 3068 mpc_ast_trav_t *trav, *n_trav; 3069 mpc_ast_t *cnode = ast; 3070 3071 /* Create the traversal structure */ 3072 trav = malloc(sizeof(mpc_ast_trav_t)); 3073 trav->curr_node = cnode; 3074 trav->parent = NULL; 3075 trav->curr_child = 0; 3076 trav->order = order; 3077 3078 /* Get start node */ 3079 switch(order) { 3080 case mpc_ast_trav_order_pre: 3081 /* Nothing else is needed for pre order start */ 3082 break; 3083 3084 case mpc_ast_trav_order_post: 3085 while(cnode->children_num > 0) { 3086 cnode = cnode->children[0]; 3087 3088 n_trav = malloc(sizeof(mpc_ast_trav_t)); 3089 n_trav->curr_node = cnode; 3090 n_trav->parent = trav; 3091 n_trav->curr_child = 0; 3092 n_trav->order = order; 3093 3094 trav = n_trav; 3095 } 3096 3097 break; 3098 3099 default: 3100 /* Unreachable, but compiler complaints */ 3101 break; 3102 } 3103 3104 return trav; 3105 } 3106 3107 mpc_ast_t *mpc_ast_traverse_next(mpc_ast_trav_t **trav) { 3108 mpc_ast_trav_t *n_trav, *to_free; 3109 mpc_ast_t *ret = NULL; 3110 int cchild; 3111 3112 /* The end of traversal was reached */ 3113 if(*trav == NULL) return NULL; 3114 3115 switch((*trav)->order) { 3116 case mpc_ast_trav_order_pre: 3117 ret = (*trav)->curr_node; 3118 3119 /* If there aren't any more children, go up */ 3120 while(*trav != NULL && 3121 (*trav)->curr_child >= (*trav)->curr_node->children_num) 3122 { 3123 to_free = *trav; 3124 *trav = (*trav)->parent; 3125 free(to_free); 3126 } 3127 3128 /* If trav is NULL, the end was reached */ 3129 if(*trav == NULL) { 3130 break; 3131 } 3132 3133 /* Go to next child */ 3134 n_trav = malloc(sizeof(mpc_ast_trav_t)); 3135 3136 cchild = (*trav)->curr_child; 3137 n_trav->curr_node = (*trav)->curr_node->children[cchild]; 3138 n_trav->parent = *trav; 3139 n_trav->curr_child = 0; 3140 n_trav->order = (*trav)->order; 3141 3142 (*trav)->curr_child++; 3143 *trav = n_trav; 3144 3145 break; 3146 3147 case mpc_ast_trav_order_post: 3148 ret = (*trav)->curr_node; 3149 3150 /* Move up tree to the parent If the parent doesn't have any more nodes, 3151 * then this is the current node. If it does, move down to its left most 3152 * child. Also, free the previous traversal node */ 3153 to_free = *trav; 3154 *trav = (*trav)->parent; 3155 free(to_free); 3156 3157 if(*trav == NULL) 3158 break; 3159 3160 /* Next child */ 3161 (*trav)->curr_child++; 3162 3163 /* If there aren't any more children, this is the next node */ 3164 if((*trav)->curr_child >= (*trav)->curr_node->children_num) { 3165 break; 3166 } 3167 3168 /* If there are still more children, find the leftmost child from this 3169 * node */ 3170 while((*trav)->curr_node->children_num > 0) { 3171 n_trav = malloc(sizeof(mpc_ast_trav_t)); 3172 3173 cchild = (*trav)->curr_child; 3174 n_trav->curr_node = (*trav)->curr_node->children[cchild]; 3175 n_trav->parent = *trav; 3176 n_trav->curr_child = 0; 3177 n_trav->order = (*trav)->order; 3178 3179 *trav = n_trav; 3180 } 3181 3182 default: 3183 /* Unreachable, but compiler complaints */ 3184 break; 3185 } 3186 3187 return ret; 3188 } 3189 3190 void mpc_ast_traverse_free(mpc_ast_trav_t **trav) { 3191 mpc_ast_trav_t *n_trav; 3192 3193 /* Go through parents until all are free */ 3194 while(*trav != NULL) { 3195 n_trav = (*trav)->parent; 3196 free(*trav); 3197 *trav = n_trav; 3198 } 3199 } 3200 3201 mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **xs) { 3202 3203 int i, j; 3204 mpc_ast_t** as = (mpc_ast_t**)xs; 3205 mpc_ast_t *r; 3206 3207 if (n == 0) { return NULL; } 3208 if (n == 1) { return xs[0]; } 3209 if (n == 2 && xs[1] == NULL) { return xs[0]; } 3210 if (n == 2 && xs[0] == NULL) { return xs[1]; } 3211 3212 r = mpc_ast_new(">", ""); 3213 3214 for (i = 0; i < n; i++) { 3215 3216 if (as[i] == NULL) { continue; } 3217 3218 if (as[i] && as[i]->children_num == 0) { 3219 mpc_ast_add_child(r, as[i]); 3220 } else if (as[i] && as[i]->children_num == 1) { 3221 mpc_ast_add_child(r, mpc_ast_add_root_tag(as[i]->children[0], as[i]->tag)); 3222 mpc_ast_delete_no_children(as[i]); 3223 } else if (as[i] && as[i]->children_num >= 2) { 3224 for (j = 0; j < as[i]->children_num; j++) { 3225 mpc_ast_add_child(r, as[i]->children[j]); 3226 } 3227 mpc_ast_delete_no_children(as[i]); 3228 } 3229 3230 } 3231 3232 if (r->children_num) { 3233 r->state = r->children[0]->state; 3234 } 3235 3236 return r; 3237 } 3238 3239 mpc_val_t *mpcf_str_ast(mpc_val_t *c) { 3240 mpc_ast_t *a = mpc_ast_new("", c); 3241 free(c); 3242 return a; 3243 } 3244 3245 mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs) { 3246 mpc_state_t *s = ((mpc_state_t**)xs)[0]; 3247 mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; 3248 (void)n; 3249 a = mpc_ast_state(a, *s); 3250 free(s); 3251 return a; 3252 } 3253 3254 mpc_parser_t *mpca_state(mpc_parser_t *a) { 3255 return mpc_and(2, mpcf_state_ast, mpc_state(), a, free); 3256 } 3257 3258 mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t) { 3259 return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_tag, (void*)t); 3260 } 3261 3262 mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t) { 3263 return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_add_tag, (void*)t); 3264 } 3265 3266 mpc_parser_t *mpca_root(mpc_parser_t *a) { 3267 return mpc_apply(a, (mpc_apply_t)mpc_ast_add_root); 3268 } 3269 3270 mpc_parser_t *mpca_not(mpc_parser_t *a) { return mpc_not(a, (mpc_dtor_t)mpc_ast_delete); } 3271 mpc_parser_t *mpca_maybe(mpc_parser_t *a) { return mpc_maybe(a); } 3272 mpc_parser_t *mpca_many(mpc_parser_t *a) { return mpc_many(mpcf_fold_ast, a); } 3273 mpc_parser_t *mpca_many1(mpc_parser_t *a) { return mpc_many1(mpcf_fold_ast, a); } 3274 mpc_parser_t *mpca_count(int n, mpc_parser_t *a) { return mpc_count(n, mpcf_fold_ast, a, (mpc_dtor_t)mpc_ast_delete); } 3275 3276 mpc_parser_t *mpca_or(int n, ...) { 3277 3278 int i; 3279 va_list va; 3280 3281 mpc_parser_t *p = mpc_undefined(); 3282 3283 p->type = MPC_TYPE_OR; 3284 p->data.or.n = n; 3285 p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); 3286 3287 va_start(va, n); 3288 for (i = 0; i < n; i++) { 3289 p->data.or.xs[i] = va_arg(va, mpc_parser_t*); 3290 } 3291 va_end(va); 3292 3293 return p; 3294 3295 } 3296 3297 mpc_parser_t *mpca_and(int n, ...) { 3298 3299 int i; 3300 va_list va; 3301 3302 mpc_parser_t *p = mpc_undefined(); 3303 3304 p->type = MPC_TYPE_AND; 3305 p->data.and.n = n; 3306 p->data.and.f = mpcf_fold_ast; 3307 p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); 3308 p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); 3309 3310 va_start(va, n); 3311 for (i = 0; i < n; i++) { 3312 p->data.and.xs[i] = va_arg(va, mpc_parser_t*); 3313 } 3314 for (i = 0; i < (n-1); i++) { 3315 p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; 3316 } 3317 va_end(va); 3318 3319 return p; 3320 } 3321 3322 mpc_parser_t *mpca_total(mpc_parser_t *a) { return mpc_total(a, (mpc_dtor_t)mpc_ast_delete); } 3323 3324 /* 3325 ** Grammar Parser 3326 */ 3327 3328 /* 3329 ** This is another interesting bootstrapping. 3330 ** 3331 ** Having a general purpose AST type allows 3332 ** users to specify the grammar alone and 3333 ** let all fold rules be automatically taken 3334 ** care of by existing functions. 3335 ** 3336 ** You don't get to control the type spat 3337 ** out but this means you can make a nice 3338 ** parser to take in some grammar in nice 3339 ** syntax and spit out a parser that works. 3340 ** 3341 ** The grammar for this looks surprisingly 3342 ** like regex but the main difference is that 3343 ** it is now whitespace insensitive and the 3344 ** base type takes literals of some form. 3345 */ 3346 3347 /* 3348 ** 3349 ** ### Grammar Grammar 3350 ** 3351 ** <grammar> : (<term> "|" <grammar>) | <term> 3352 ** 3353 ** <term> : <factor>* 3354 ** 3355 ** <factor> : <base> 3356 ** | <base> "*" 3357 ** | <base> "+" 3358 ** | <base> "?" 3359 ** | <base> "{" <digits> "}" 3360 ** 3361 ** <base> : "<" (<digits> | <ident>) ">" 3362 ** | <string_lit> 3363 ** | <char_lit> 3364 ** | <regex_lit> <regex_mode> 3365 ** | "(" <grammar> ")" 3366 */ 3367 3368 typedef struct { 3369 va_list *va; 3370 int parsers_num; 3371 mpc_parser_t **parsers; 3372 int flags; 3373 } mpca_grammar_st_t; 3374 3375 static mpc_val_t *mpcaf_grammar_or(int n, mpc_val_t **xs) { 3376 (void) n; 3377 if (xs[1] == NULL) { return xs[0]; } 3378 else { return mpca_or(2, xs[0], xs[1]); } 3379 } 3380 3381 static mpc_val_t *mpcaf_grammar_and(int n, mpc_val_t **xs) { 3382 int i; 3383 mpc_parser_t *p = mpc_pass(); 3384 for (i = 0; i < n; i++) { 3385 if (xs[i] != NULL) { p = mpca_and(2, p, xs[i]); } 3386 } 3387 return p; 3388 } 3389 3390 static mpc_val_t *mpcaf_grammar_repeat(int n, mpc_val_t **xs) { 3391 int num; 3392 (void) n; 3393 if (xs[1] == NULL) { return xs[0]; } 3394 switch(((char*)xs[1])[0]) 3395 { 3396 case '*': { free(xs[1]); return mpca_many(xs[0]); }; break; 3397 case '+': { free(xs[1]); return mpca_many1(xs[0]); }; break; 3398 case '?': { free(xs[1]); return mpca_maybe(xs[0]); }; break; 3399 case '!': { free(xs[1]); return mpca_not(xs[0]); }; break; 3400 default: 3401 num = *((int*)xs[1]); 3402 free(xs[1]); 3403 } 3404 return mpca_count(num, xs[0]); 3405 } 3406 3407 static mpc_val_t *mpcaf_grammar_string(mpc_val_t *x, void *s) { 3408 mpca_grammar_st_t *st = s; 3409 char *y = mpcf_unescape(x); 3410 mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_string(y) : mpc_tok(mpc_string(y)); 3411 free(y); 3412 return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "string")); 3413 } 3414 3415 static mpc_val_t *mpcaf_grammar_char(mpc_val_t *x, void *s) { 3416 mpca_grammar_st_t *st = s; 3417 char *y = mpcf_unescape(x); 3418 mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_char(y[0]) : mpc_tok(mpc_char(y[0])); 3419 free(y); 3420 return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "char")); 3421 } 3422 3423 static mpc_val_t *mpcaf_fold_regex(int n, mpc_val_t **xs) { 3424 char *y = xs[0]; 3425 char *m = xs[1]; 3426 mpca_grammar_st_t *st = xs[2]; 3427 mpc_parser_t *p; 3428 int mode = MPC_RE_DEFAULT; 3429 3430 (void)n; 3431 if (strchr(m, 'm')) { mode |= MPC_RE_MULTILINE; } 3432 if (strchr(m, 's')) { mode |= MPC_RE_DOTALL; } 3433 y = mpcf_unescape_regex(y); 3434 p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_re_mode(y, mode) : mpc_tok(mpc_re_mode(y, mode)); 3435 free(y); 3436 free(m); 3437 3438 return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "regex")); 3439 } 3440 3441 /* Should this just use `isdigit` instead? */ 3442 static int is_number(const char* s) { 3443 size_t i; 3444 for (i = 0; i < strlen(s); i++) { if (!strchr("0123456789", s[i])) { return 0; } } 3445 return 1; 3446 } 3447 3448 static mpc_parser_t *mpca_grammar_find_parser(char *x, mpca_grammar_st_t *st) { 3449 3450 int i; 3451 mpc_parser_t *p; 3452 3453 /* Case of Number */ 3454 if (is_number(x)) { 3455 3456 i = strtol(x, NULL, 10); 3457 3458 while (st->parsers_num <= i) { 3459 st->parsers_num++; 3460 st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); 3461 st->parsers[st->parsers_num-1] = va_arg(*st->va, mpc_parser_t*); 3462 if (st->parsers[st->parsers_num-1] == NULL) { 3463 return mpc_failf("No Parser in position %i! Only supplied %i Parsers!", i, st->parsers_num); 3464 } 3465 } 3466 3467 return st->parsers[st->parsers_num-1]; 3468 3469 /* Case of Identifier */ 3470 } else { 3471 3472 /* Search Existing Parsers */ 3473 for (i = 0; i < st->parsers_num; i++) { 3474 mpc_parser_t *q = st->parsers[i]; 3475 if (q == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } 3476 if (q->name && strcmp(q->name, x) == 0) { return q; } 3477 } 3478 3479 /* Search New Parsers */ 3480 while (1) { 3481 3482 p = va_arg(*st->va, mpc_parser_t*); 3483 3484 st->parsers_num++; 3485 st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); 3486 st->parsers[st->parsers_num-1] = p; 3487 3488 if (p == NULL || p->name == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } 3489 if (p->name && strcmp(p->name, x) == 0) { return p; } 3490 3491 } 3492 3493 } 3494 3495 } 3496 3497 static mpc_val_t *mpcaf_grammar_id(mpc_val_t *x, void *s) { 3498 3499 mpca_grammar_st_t *st = s; 3500 mpc_parser_t *p = mpca_grammar_find_parser(x, st); 3501 free(x); 3502 3503 if (p->name) { 3504 return mpca_state(mpca_root(mpca_add_tag(p, p->name))); 3505 } else { 3506 return mpca_state(mpca_root(p)); 3507 } 3508 } 3509 3510 mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) { 3511 3512 char *err_msg; 3513 mpc_parser_t *err_out; 3514 mpc_result_t r; 3515 mpc_parser_t *GrammarTotal, *Grammar, *Term, *Factor, *Base; 3516 3517 GrammarTotal = mpc_new("grammar_total"); 3518 Grammar = mpc_new("grammar"); 3519 Term = mpc_new("term"); 3520 Factor = mpc_new("factor"); 3521 Base = mpc_new("base"); 3522 3523 mpc_define(GrammarTotal, 3524 mpc_predictive(mpc_total(Grammar, mpc_soft_delete)) 3525 ); 3526 3527 mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, 3528 Term, 3529 mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), 3530 mpc_soft_delete 3531 )); 3532 3533 mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); 3534 3535 mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, 3536 Base, 3537 mpc_or(6, 3538 mpc_sym("*"), 3539 mpc_sym("+"), 3540 mpc_sym("?"), 3541 mpc_sym("!"), 3542 mpc_tok_brackets(mpc_int(), free), 3543 mpc_pass()), 3544 mpc_soft_delete 3545 )); 3546 3547 mpc_define(Base, mpc_or(5, 3548 mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), 3549 mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), 3550 mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)), 3551 mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), 3552 mpc_tok_parens(Grammar, mpc_soft_delete) 3553 )); 3554 3555 mpc_optimise(GrammarTotal); 3556 mpc_optimise(Grammar); 3557 mpc_optimise(Factor); 3558 mpc_optimise(Term); 3559 mpc_optimise(Base); 3560 3561 if(!mpc_parse("<mpc_grammar_compiler>", grammar, GrammarTotal, &r)) { 3562 err_msg = mpc_err_string(r.error); 3563 err_out = mpc_failf("Invalid Grammar: %s", err_msg); 3564 mpc_err_delete(r.error); 3565 free(err_msg); 3566 r.output = err_out; 3567 } 3568 3569 mpc_cleanup(5, GrammarTotal, Grammar, Term, Factor, Base); 3570 3571 mpc_optimise(r.output); 3572 3573 return (st->flags & MPCA_LANG_PREDICTIVE) ? mpc_predictive(r.output) : r.output; 3574 3575 } 3576 3577 mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...) { 3578 mpca_grammar_st_t st; 3579 mpc_parser_t *res; 3580 va_list va; 3581 va_start(va, grammar); 3582 3583 st.va = &va; 3584 st.parsers_num = 0; 3585 st.parsers = NULL; 3586 st.flags = flags; 3587 3588 res = mpca_grammar_st(grammar, &st); 3589 free(st.parsers); 3590 va_end(va); 3591 return res; 3592 } 3593 3594 typedef struct { 3595 char *ident; 3596 char *name; 3597 mpc_parser_t *grammar; 3598 } mpca_stmt_t; 3599 3600 static mpc_val_t *mpca_stmt_afold(int n, mpc_val_t **xs) { 3601 mpca_stmt_t *stmt = malloc(sizeof(mpca_stmt_t)); 3602 stmt->ident = ((char**)xs)[0]; 3603 stmt->name = ((char**)xs)[1]; 3604 stmt->grammar = ((mpc_parser_t**)xs)[3]; 3605 (void) n; 3606 free(((char**)xs)[2]); 3607 free(((char**)xs)[4]); 3608 3609 return stmt; 3610 } 3611 3612 static mpc_val_t *mpca_stmt_fold(int n, mpc_val_t **xs) { 3613 3614 int i; 3615 mpca_stmt_t **stmts = malloc(sizeof(mpca_stmt_t*) * (n+1)); 3616 3617 for (i = 0; i < n; i++) { 3618 stmts[i] = xs[i]; 3619 } 3620 stmts[n] = NULL; 3621 3622 return stmts; 3623 } 3624 3625 static void mpca_stmt_list_delete(mpc_val_t *x) { 3626 3627 mpca_stmt_t **stmts = x; 3628 3629 while(*stmts) { 3630 mpca_stmt_t *stmt = *stmts; 3631 free(stmt->ident); 3632 free(stmt->name); 3633 mpc_soft_delete(stmt->grammar); 3634 free(stmt); 3635 stmts++; 3636 } 3637 free(x); 3638 3639 } 3640 3641 static mpc_val_t *mpca_stmt_list_apply_to(mpc_val_t *x, void *s) { 3642 3643 mpca_grammar_st_t *st = s; 3644 mpca_stmt_t *stmt; 3645 mpca_stmt_t **stmts = x; 3646 mpc_parser_t *left; 3647 3648 while(*stmts) { 3649 stmt = *stmts; 3650 left = mpca_grammar_find_parser(stmt->ident, st); 3651 if (st->flags & MPCA_LANG_PREDICTIVE) { stmt->grammar = mpc_predictive(stmt->grammar); } 3652 if (stmt->name) { stmt->grammar = mpc_expect(stmt->grammar, stmt->name); } 3653 mpc_optimise(stmt->grammar); 3654 mpc_define(left, stmt->grammar); 3655 free(stmt->ident); 3656 free(stmt->name); 3657 free(stmt); 3658 stmts++; 3659 } 3660 3661 free(x); 3662 3663 return NULL; 3664 } 3665 3666 static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) { 3667 3668 mpc_result_t r; 3669 mpc_err_t *e; 3670 mpc_parser_t *Lang, *Stmt, *Grammar, *Term, *Factor, *Base; 3671 3672 Lang = mpc_new("lang"); 3673 Stmt = mpc_new("stmt"); 3674 Grammar = mpc_new("grammar"); 3675 Term = mpc_new("term"); 3676 Factor = mpc_new("factor"); 3677 Base = mpc_new("base"); 3678 3679 mpc_define(Lang, mpc_apply_to( 3680 mpc_total(mpc_predictive(mpc_many(mpca_stmt_fold, Stmt)), mpca_stmt_list_delete), 3681 mpca_stmt_list_apply_to, st 3682 )); 3683 3684 mpc_define(Stmt, mpc_and(5, mpca_stmt_afold, 3685 mpc_tok(mpc_ident()), mpc_maybe(mpc_tok(mpc_string_lit())), mpc_sym(":"), Grammar, mpc_sym(";"), 3686 free, free, free, mpc_soft_delete 3687 )); 3688 3689 mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, 3690 Term, 3691 mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), 3692 mpc_soft_delete 3693 )); 3694 3695 mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); 3696 3697 mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, 3698 Base, 3699 mpc_or(6, 3700 mpc_sym("*"), 3701 mpc_sym("+"), 3702 mpc_sym("?"), 3703 mpc_sym("!"), 3704 mpc_tok_brackets(mpc_int(), free), 3705 mpc_pass()), 3706 mpc_soft_delete 3707 )); 3708 3709 mpc_define(Base, mpc_or(5, 3710 mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), 3711 mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), 3712 mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)), 3713 mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), 3714 mpc_tok_parens(Grammar, mpc_soft_delete) 3715 )); 3716 3717 mpc_optimise(Lang); 3718 mpc_optimise(Stmt); 3719 mpc_optimise(Grammar); 3720 mpc_optimise(Term); 3721 mpc_optimise(Factor); 3722 mpc_optimise(Base); 3723 3724 if (!mpc_parse_input(i, Lang, &r)) { 3725 e = r.error; 3726 } else { 3727 e = NULL; 3728 } 3729 3730 mpc_cleanup(6, Lang, Stmt, Grammar, Term, Factor, Base); 3731 3732 return e; 3733 } 3734 3735 mpc_err_t *mpca_lang_file(int flags, FILE *f, ...) { 3736 mpca_grammar_st_t st; 3737 mpc_input_t *i; 3738 mpc_err_t *err; 3739 3740 va_list va; 3741 va_start(va, f); 3742 3743 st.va = &va; 3744 st.parsers_num = 0; 3745 st.parsers = NULL; 3746 st.flags = flags; 3747 3748 i = mpc_input_new_file("<mpca_lang_file>", f); 3749 err = mpca_lang_st(i, &st); 3750 mpc_input_delete(i); 3751 3752 free(st.parsers); 3753 va_end(va); 3754 return err; 3755 } 3756 3757 mpc_err_t *mpca_lang_pipe(int flags, FILE *p, ...) { 3758 mpca_grammar_st_t st; 3759 mpc_input_t *i; 3760 mpc_err_t *err; 3761 3762 va_list va; 3763 va_start(va, p); 3764 3765 st.va = &va; 3766 st.parsers_num = 0; 3767 st.parsers = NULL; 3768 st.flags = flags; 3769 3770 i = mpc_input_new_pipe("<mpca_lang_pipe>", p); 3771 err = mpca_lang_st(i, &st); 3772 mpc_input_delete(i); 3773 3774 free(st.parsers); 3775 va_end(va); 3776 return err; 3777 } 3778 3779 mpc_err_t *mpca_lang(int flags, const char *language, ...) { 3780 3781 mpca_grammar_st_t st; 3782 mpc_input_t *i; 3783 mpc_err_t *err; 3784 3785 va_list va; 3786 va_start(va, language); 3787 3788 st.va = &va; 3789 st.parsers_num = 0; 3790 st.parsers = NULL; 3791 st.flags = flags; 3792 3793 i = mpc_input_new_string("<mpca_lang>", language); 3794 err = mpca_lang_st(i, &st); 3795 mpc_input_delete(i); 3796 3797 free(st.parsers); 3798 va_end(va); 3799 return err; 3800 } 3801 3802 mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...) { 3803 3804 mpca_grammar_st_t st; 3805 mpc_input_t *i; 3806 mpc_err_t *err; 3807 3808 va_list va; 3809 3810 FILE *f = fopen(filename, "rb"); 3811 3812 if (f == NULL) { 3813 err = mpc_err_file(filename, "Unable to open file!"); 3814 return err; 3815 } 3816 3817 va_start(va, filename); 3818 3819 st.va = &va; 3820 st.parsers_num = 0; 3821 st.parsers = NULL; 3822 st.flags = flags; 3823 3824 i = mpc_input_new_file(filename, f); 3825 err = mpca_lang_st(i, &st); 3826 mpc_input_delete(i); 3827 3828 free(st.parsers); 3829 va_end(va); 3830 3831 fclose(f); 3832 3833 return err; 3834 } 3835 3836 static int mpc_nodecount_unretained(mpc_parser_t* p, int force) { 3837 3838 int i, total; 3839 3840 if (p->retained && !force) { return 0; } 3841 3842 if (p->type == MPC_TYPE_EXPECT) { return 1 + mpc_nodecount_unretained(p->data.expect.x, 0); } 3843 3844 if (p->type == MPC_TYPE_APPLY) { return 1 + mpc_nodecount_unretained(p->data.apply.x, 0); } 3845 if (p->type == MPC_TYPE_APPLY_TO) { return 1 + mpc_nodecount_unretained(p->data.apply_to.x, 0); } 3846 if (p->type == MPC_TYPE_PREDICT) { return 1 + mpc_nodecount_unretained(p->data.predict.x, 0); } 3847 3848 if (p->type == MPC_TYPE_CHECK) { return 1 + mpc_nodecount_unretained(p->data.check.x, 0); } 3849 if (p->type == MPC_TYPE_CHECK_WITH) { return 1 + mpc_nodecount_unretained(p->data.check_with.x, 0); } 3850 3851 if (p->type == MPC_TYPE_NOT) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } 3852 if (p->type == MPC_TYPE_MAYBE) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } 3853 3854 if (p->type == MPC_TYPE_MANY) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } 3855 if (p->type == MPC_TYPE_MANY1) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } 3856 if (p->type == MPC_TYPE_COUNT) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } 3857 3858 if (p->type == MPC_TYPE_OR) { 3859 total = 1; 3860 for(i = 0; i < p->data.or.n; i++) { 3861 total += mpc_nodecount_unretained(p->data.or.xs[i], 0); 3862 } 3863 return total; 3864 } 3865 3866 if (p->type == MPC_TYPE_AND) { 3867 total = 1; 3868 for(i = 0; i < p->data.and.n; i++) { 3869 total += mpc_nodecount_unretained(p->data.and.xs[i], 0); 3870 } 3871 return total; 3872 } 3873 3874 return 1; 3875 3876 } 3877 3878 void mpc_stats(mpc_parser_t* p) { 3879 printf("Stats\n"); 3880 printf("=====\n"); 3881 printf("Node Count: %i\n", mpc_nodecount_unretained(p, 1)); 3882 } 3883 3884 static void mpc_optimise_unretained(mpc_parser_t *p, int force) { 3885 3886 int i, n, m; 3887 mpc_parser_t *t; 3888 3889 if (p->retained && !force) { return; } 3890 3891 /* Optimise Subexpressions */ 3892 3893 if (p->type == MPC_TYPE_EXPECT) { mpc_optimise_unretained(p->data.expect.x, 0); } 3894 if (p->type == MPC_TYPE_APPLY) { mpc_optimise_unretained(p->data.apply.x, 0); } 3895 if (p->type == MPC_TYPE_APPLY_TO) { mpc_optimise_unretained(p->data.apply_to.x, 0); } 3896 if (p->type == MPC_TYPE_CHECK) { mpc_optimise_unretained(p->data.check.x, 0); } 3897 if (p->type == MPC_TYPE_CHECK_WITH) { mpc_optimise_unretained(p->data.check_with.x, 0); } 3898 if (p->type == MPC_TYPE_PREDICT) { mpc_optimise_unretained(p->data.predict.x, 0); } 3899 if (p->type == MPC_TYPE_NOT) { mpc_optimise_unretained(p->data.not.x, 0); } 3900 if (p->type == MPC_TYPE_MAYBE) { mpc_optimise_unretained(p->data.not.x, 0); } 3901 if (p->type == MPC_TYPE_MANY) { mpc_optimise_unretained(p->data.repeat.x, 0); } 3902 if (p->type == MPC_TYPE_MANY1) { mpc_optimise_unretained(p->data.repeat.x, 0); } 3903 if (p->type == MPC_TYPE_COUNT) { mpc_optimise_unretained(p->data.repeat.x, 0); } 3904 3905 if (p->type == MPC_TYPE_OR) { 3906 for(i = 0; i < p->data.or.n; i++) { 3907 mpc_optimise_unretained(p->data.or.xs[i], 0); 3908 } 3909 } 3910 3911 if (p->type == MPC_TYPE_AND) { 3912 for(i = 0; i < p->data.and.n; i++) { 3913 mpc_optimise_unretained(p->data.and.xs[i], 0); 3914 } 3915 } 3916 3917 /* Perform optimisations */ 3918 3919 while (1) { 3920 3921 /* Merge rhs `or` */ 3922 if (p->type == MPC_TYPE_OR 3923 && p->data.or.xs[p->data.or.n-1]->type == MPC_TYPE_OR 3924 && !p->data.or.xs[p->data.or.n-1]->retained) { 3925 t = p->data.or.xs[p->data.or.n-1]; 3926 n = p->data.or.n; m = t->data.or.n; 3927 p->data.or.n = n + m - 1; 3928 p->data.or.xs = realloc(p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); 3929 memmove(p->data.or.xs + n - 1, t->data.or.xs, m * sizeof(mpc_parser_t*)); 3930 free(t->data.or.xs); free(t->name); free(t); 3931 continue; 3932 } 3933 3934 /* Merge lhs `or` */ 3935 if (p->type == MPC_TYPE_OR 3936 && p->data.or.xs[0]->type == MPC_TYPE_OR 3937 && !p->data.or.xs[0]->retained) { 3938 t = p->data.or.xs[0]; 3939 n = p->data.or.n; m = t->data.or.n; 3940 p->data.or.n = n + m - 1; 3941 p->data.or.xs = realloc(p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); 3942 memmove(p->data.or.xs + m, p->data.or.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); 3943 memmove(p->data.or.xs, t->data.or.xs, m * sizeof(mpc_parser_t*)); 3944 free(t->data.or.xs); free(t->name); free(t); 3945 continue; 3946 } 3947 3948 /* Remove ast `pass` */ 3949 if (p->type == MPC_TYPE_AND 3950 && p->data.and.n == 2 3951 && p->data.and.xs[0]->type == MPC_TYPE_PASS 3952 && !p->data.and.xs[0]->retained 3953 && p->data.and.f == mpcf_fold_ast) { 3954 t = p->data.and.xs[1]; 3955 mpc_delete(p->data.and.xs[0]); 3956 free(p->data.and.xs); free(p->data.and.dxs); free(p->name); 3957 memcpy(p, t, sizeof(mpc_parser_t)); 3958 free(t); 3959 continue; 3960 } 3961 3962 /* Merge ast lhs `and` */ 3963 if (p->type == MPC_TYPE_AND 3964 && p->data.and.f == mpcf_fold_ast 3965 && p->data.and.xs[0]->type == MPC_TYPE_AND 3966 && !p->data.and.xs[0]->retained 3967 && p->data.and.xs[0]->data.and.f == mpcf_fold_ast) { 3968 t = p->data.and.xs[0]; 3969 n = p->data.and.n; m = t->data.and.n; 3970 p->data.and.n = n + m - 1; 3971 p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); 3972 p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); 3973 memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); 3974 memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); 3975 for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } 3976 free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); 3977 continue; 3978 } 3979 3980 /* Merge ast rhs `and` */ 3981 if (p->type == MPC_TYPE_AND 3982 && p->data.and.f == mpcf_fold_ast 3983 && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND 3984 && !p->data.and.xs[p->data.and.n-1]->retained 3985 && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_fold_ast) { 3986 t = p->data.and.xs[p->data.and.n-1]; 3987 n = p->data.and.n; m = t->data.and.n; 3988 p->data.and.n = n + m - 1; 3989 p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); 3990 p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); 3991 memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); 3992 for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } 3993 free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); 3994 continue; 3995 } 3996 3997 /* Remove re `lift` */ 3998 if (p->type == MPC_TYPE_AND 3999 && p->data.and.n == 2 4000 && p->data.and.xs[0]->type == MPC_TYPE_LIFT 4001 && p->data.and.xs[0]->data.lift.lf == mpcf_ctor_str 4002 && !p->data.and.xs[0]->retained 4003 && p->data.and.f == mpcf_strfold) { 4004 t = p->data.and.xs[1]; 4005 mpc_delete(p->data.and.xs[0]); 4006 free(p->data.and.xs); free(p->data.and.dxs); free(p->name); 4007 memcpy(p, t, sizeof(mpc_parser_t)); 4008 free(t); 4009 continue; 4010 } 4011 4012 /* Merge re lhs `and` */ 4013 if (p->type == MPC_TYPE_AND 4014 && p->data.and.f == mpcf_strfold 4015 && p->data.and.xs[0]->type == MPC_TYPE_AND 4016 && !p->data.and.xs[0]->retained 4017 && p->data.and.xs[0]->data.and.f == mpcf_strfold) { 4018 t = p->data.and.xs[0]; 4019 n = p->data.and.n; m = t->data.and.n; 4020 p->data.and.n = n + m - 1; 4021 p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); 4022 p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); 4023 memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); 4024 memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); 4025 for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = free; } 4026 free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); 4027 continue; 4028 } 4029 4030 /* Merge re rhs `and` */ 4031 if (p->type == MPC_TYPE_AND 4032 && p->data.and.f == mpcf_strfold 4033 && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND 4034 && !p->data.and.xs[p->data.and.n-1]->retained 4035 && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_strfold) { 4036 t = p->data.and.xs[p->data.and.n-1]; 4037 n = p->data.and.n; m = t->data.and.n; 4038 p->data.and.n = n + m - 1; 4039 p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); 4040 p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); 4041 memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); 4042 for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = free; } 4043 free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); 4044 continue; 4045 } 4046 4047 return; 4048 4049 } 4050 4051 } 4052 4053 void mpc_optimise(mpc_parser_t *p) { 4054 mpc_optimise_unretained(p, 1); 4055 } 4056