mumble

A Lisp written in C, following the *Build Your Own Lisp* book
Log | Files | Refs | README

mpc.h (11197B)


      1 /*
      2 ** mpc - Micro Parser Combinator library for C
      3 **
      4 ** https://github.com/orangeduck/mpc
      5 **
      6 ** Daniel Holden - contact@daniel-holden.com
      7 ** Licensed under BSD3
      8 */
      9 
     10 #ifndef mpc_h
     11 #define mpc_h
     12 
     13 #ifdef __cplusplus
     14 extern "C" {
     15 #endif
     16 
     17 #include <stdlib.h>
     18 #include <stdio.h>
     19 #include <stdarg.h>
     20 #include <string.h>
     21 #include <math.h>
     22 #include <errno.h>
     23 #include <ctype.h>
     24 
     25 /*
     26 ** State Type
     27 */
     28 
     29 typedef struct {
     30   long pos;
     31   long row;
     32   long col;
     33   int term;
     34 } mpc_state_t;
     35 
     36 /*
     37 ** Error Type
     38 */
     39 
     40 typedef struct {
     41   mpc_state_t state;
     42   int expected_num;
     43   char *filename;
     44   char *failure;
     45   char **expected;
     46   char received;
     47 } mpc_err_t;
     48 
     49 void mpc_err_delete(mpc_err_t *e);
     50 char *mpc_err_string(mpc_err_t *e);
     51 void mpc_err_print(mpc_err_t *e);
     52 void mpc_err_print_to(mpc_err_t *e, FILE *f);
     53 
     54 /*
     55 ** Parsing
     56 */
     57 
     58 typedef void mpc_val_t;
     59 
     60 typedef union {
     61   mpc_err_t *error;
     62   mpc_val_t *output;
     63 } mpc_result_t;
     64 
     65 struct mpc_parser_t;
     66 typedef struct mpc_parser_t mpc_parser_t;
     67 
     68 int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r);
     69 int mpc_nparse(const char *filename, const char *string, size_t length, mpc_parser_t *p, mpc_result_t *r);
     70 int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r);
     71 int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r);
     72 int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r);
     73 
     74 /*
     75 ** Function Types
     76 */
     77 
     78 typedef void(*mpc_dtor_t)(mpc_val_t*);
     79 typedef mpc_val_t*(*mpc_ctor_t)(void);
     80 
     81 typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*);
     82 typedef mpc_val_t*(*mpc_apply_to_t)(mpc_val_t*,void*);
     83 typedef mpc_val_t*(*mpc_fold_t)(int,mpc_val_t**);
     84 
     85 typedef int(*mpc_check_t)(mpc_val_t**);
     86 typedef int(*mpc_check_with_t)(mpc_val_t**,void*);
     87 
     88 /*
     89 ** Building a Parser
     90 */
     91 
     92 mpc_parser_t *mpc_new(const char *name);
     93 mpc_parser_t *mpc_copy(mpc_parser_t *a);
     94 mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a);
     95 mpc_parser_t *mpc_undefine(mpc_parser_t *p);
     96 
     97 void mpc_delete(mpc_parser_t *p);
     98 void mpc_cleanup(int n, ...);
     99 
    100 /*
    101 ** Basic Parsers
    102 */
    103 
    104 mpc_parser_t *mpc_any(void);
    105 mpc_parser_t *mpc_char(char c);
    106 mpc_parser_t *mpc_range(char s, char e);
    107 mpc_parser_t *mpc_oneof(const char *s);
    108 mpc_parser_t *mpc_noneof(const char *s);
    109 mpc_parser_t *mpc_satisfy(int(*f)(char));
    110 mpc_parser_t *mpc_string(const char *s);
    111 
    112 /*
    113 ** Other Parsers
    114 */
    115 
    116 mpc_parser_t *mpc_pass(void);
    117 mpc_parser_t *mpc_fail(const char *m);
    118 mpc_parser_t *mpc_failf(const char *fmt, ...);
    119 mpc_parser_t *mpc_lift(mpc_ctor_t f);
    120 mpc_parser_t *mpc_lift_val(mpc_val_t *x);
    121 mpc_parser_t *mpc_anchor(int(*f)(char,char));
    122 mpc_parser_t *mpc_state(void);
    123 
    124 /*
    125 ** Combinator Parsers
    126 */
    127 
    128 mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *e);
    129 mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...);
    130 mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f);
    131 mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x);
    132 mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e);
    133 mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e);
    134 mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...);
    135 mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...);
    136 
    137 mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da);
    138 mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf);
    139 mpc_parser_t *mpc_maybe(mpc_parser_t *a);
    140 mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf);
    141 
    142 mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a);
    143 mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a);
    144 mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da);
    145 
    146 mpc_parser_t *mpc_or(int n, ...);
    147 mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...);
    148 
    149 mpc_parser_t *mpc_predictive(mpc_parser_t *a);
    150 
    151 /*
    152 ** Common Parsers
    153 */
    154 
    155 mpc_parser_t *mpc_eoi(void);
    156 mpc_parser_t *mpc_soi(void);
    157 
    158 mpc_parser_t *mpc_boundary(void);
    159 mpc_parser_t *mpc_boundary_newline(void);
    160 
    161 mpc_parser_t *mpc_whitespace(void);
    162 mpc_parser_t *mpc_whitespaces(void);
    163 mpc_parser_t *mpc_blank(void);
    164 
    165 mpc_parser_t *mpc_newline(void);
    166 mpc_parser_t *mpc_tab(void);
    167 mpc_parser_t *mpc_escape(void);
    168 
    169 mpc_parser_t *mpc_digit(void);
    170 mpc_parser_t *mpc_hexdigit(void);
    171 mpc_parser_t *mpc_octdigit(void);
    172 mpc_parser_t *mpc_digits(void);
    173 mpc_parser_t *mpc_hexdigits(void);
    174 mpc_parser_t *mpc_octdigits(void);
    175 
    176 mpc_parser_t *mpc_lower(void);
    177 mpc_parser_t *mpc_upper(void);
    178 mpc_parser_t *mpc_alpha(void);
    179 mpc_parser_t *mpc_underscore(void);
    180 mpc_parser_t *mpc_alphanum(void);
    181 
    182 mpc_parser_t *mpc_int(void);
    183 mpc_parser_t *mpc_hex(void);
    184 mpc_parser_t *mpc_oct(void);
    185 mpc_parser_t *mpc_number(void);
    186 
    187 mpc_parser_t *mpc_real(void);
    188 mpc_parser_t *mpc_float(void);
    189 
    190 mpc_parser_t *mpc_char_lit(void);
    191 mpc_parser_t *mpc_string_lit(void);
    192 mpc_parser_t *mpc_regex_lit(void);
    193 
    194 mpc_parser_t *mpc_ident(void);
    195 
    196 /*
    197 ** Useful Parsers
    198 */
    199 
    200 mpc_parser_t *mpc_startwith(mpc_parser_t *a);
    201 mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da);
    202 mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da);
    203 
    204 mpc_parser_t *mpc_stripl(mpc_parser_t *a);
    205 mpc_parser_t *mpc_stripr(mpc_parser_t *a);
    206 mpc_parser_t *mpc_strip(mpc_parser_t *a);
    207 mpc_parser_t *mpc_tok(mpc_parser_t *a);
    208 mpc_parser_t *mpc_sym(const char *s);
    209 mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da);
    210 
    211 mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c);
    212 mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad);
    213 mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad);
    214 mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad);
    215 mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad);
    216 
    217 mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c);
    218 mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad);
    219 mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad);
    220 mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad);
    221 mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad);
    222 
    223 /*
    224 ** Common Function Parameters
    225 */
    226 
    227 void mpcf_dtor_null(mpc_val_t *x);
    228 
    229 mpc_val_t *mpcf_ctor_null(void);
    230 mpc_val_t *mpcf_ctor_str(void);
    231 
    232 mpc_val_t *mpcf_free(mpc_val_t *x);
    233 mpc_val_t *mpcf_int(mpc_val_t *x);
    234 mpc_val_t *mpcf_hex(mpc_val_t *x);
    235 mpc_val_t *mpcf_oct(mpc_val_t *x);
    236 mpc_val_t *mpcf_float(mpc_val_t *x);
    237 mpc_val_t *mpcf_strtriml(mpc_val_t *x);
    238 mpc_val_t *mpcf_strtrimr(mpc_val_t *x);
    239 mpc_val_t *mpcf_strtrim(mpc_val_t *x);
    240 
    241 mpc_val_t *mpcf_escape(mpc_val_t *x);
    242 mpc_val_t *mpcf_escape_regex(mpc_val_t *x);
    243 mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x);
    244 mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x);
    245 
    246 mpc_val_t *mpcf_unescape(mpc_val_t *x);
    247 mpc_val_t *mpcf_unescape_regex(mpc_val_t *x);
    248 mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x);
    249 mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x);
    250 
    251 mpc_val_t *mpcf_null(int n, mpc_val_t** xs);
    252 mpc_val_t *mpcf_fst(int n, mpc_val_t** xs);
    253 mpc_val_t *mpcf_snd(int n, mpc_val_t** xs);
    254 mpc_val_t *mpcf_trd(int n, mpc_val_t** xs);
    255 
    256 mpc_val_t *mpcf_fst_free(int n, mpc_val_t** xs);
    257 mpc_val_t *mpcf_snd_free(int n, mpc_val_t** xs);
    258 mpc_val_t *mpcf_trd_free(int n, mpc_val_t** xs);
    259 mpc_val_t *mpcf_all_free(int n, mpc_val_t** xs);
    260 
    261 mpc_val_t *mpcf_freefold(int n, mpc_val_t** xs);
    262 mpc_val_t *mpcf_strfold(int n, mpc_val_t** xs);
    263 
    264 /*
    265 ** Regular Expression Parsers
    266 */
    267 
    268 enum {
    269   MPC_RE_DEFAULT   = 0,
    270   MPC_RE_M         = 1,
    271   MPC_RE_S         = 2,
    272   MPC_RE_MULTILINE = 1,
    273   MPC_RE_DOTALL    = 2
    274 };
    275 
    276 mpc_parser_t *mpc_re(const char *re);
    277 mpc_parser_t *mpc_re_mode(const char *re, int mode);
    278 
    279 /*
    280 ** AST
    281 */
    282 
    283 typedef struct mpc_ast_t {
    284   char *tag;
    285   char *contents;
    286   mpc_state_t state;
    287   int children_num;
    288   struct mpc_ast_t** children;
    289 } mpc_ast_t;
    290 
    291 mpc_ast_t *mpc_ast_new(const char *tag, const char *contents);
    292 mpc_ast_t *mpc_ast_build(int n, const char *tag, ...);
    293 mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a);
    294 mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a);
    295 mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t);
    296 mpc_ast_t *mpc_ast_add_root_tag(mpc_ast_t *a, const char *t);
    297 mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t);
    298 mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s);
    299 
    300 void mpc_ast_delete(mpc_ast_t *a);
    301 void mpc_ast_print(mpc_ast_t *a);
    302 void mpc_ast_print_to(mpc_ast_t *a, FILE *fp);
    303 
    304 int mpc_ast_get_index(mpc_ast_t *ast, const char *tag);
    305 int mpc_ast_get_index_lb(mpc_ast_t *ast, const char *tag, int lb);
    306 mpc_ast_t *mpc_ast_get_child(mpc_ast_t *ast, const char *tag);
    307 mpc_ast_t *mpc_ast_get_child_lb(mpc_ast_t *ast, const char *tag, int lb);
    308 
    309 typedef enum {
    310   mpc_ast_trav_order_pre,
    311   mpc_ast_trav_order_post
    312 } mpc_ast_trav_order_t;
    313 
    314 typedef struct mpc_ast_trav_t {
    315   mpc_ast_t             *curr_node;
    316   struct mpc_ast_trav_t *parent;
    317   int                    curr_child;
    318   mpc_ast_trav_order_t   order;
    319 } mpc_ast_trav_t;
    320 
    321 mpc_ast_trav_t *mpc_ast_traverse_start(mpc_ast_t *ast,
    322                                        mpc_ast_trav_order_t order);
    323 
    324 mpc_ast_t *mpc_ast_traverse_next(mpc_ast_trav_t **trav);
    325 
    326 void mpc_ast_traverse_free(mpc_ast_trav_t **trav);
    327 
    328 /*
    329 ** Warning: This function currently doesn't test for equality of the `state` member!
    330 */
    331 int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b);
    332 
    333 mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **as);
    334 mpc_val_t *mpcf_str_ast(mpc_val_t *c);
    335 mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs);
    336 
    337 mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t);
    338 mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t);
    339 mpc_parser_t *mpca_root(mpc_parser_t *a);
    340 mpc_parser_t *mpca_state(mpc_parser_t *a);
    341 mpc_parser_t *mpca_total(mpc_parser_t *a);
    342 
    343 mpc_parser_t *mpca_not(mpc_parser_t *a);
    344 mpc_parser_t *mpca_maybe(mpc_parser_t *a);
    345 
    346 mpc_parser_t *mpca_many(mpc_parser_t *a);
    347 mpc_parser_t *mpca_many1(mpc_parser_t *a);
    348 mpc_parser_t *mpca_count(int n, mpc_parser_t *a);
    349 
    350 mpc_parser_t *mpca_or(int n, ...);
    351 mpc_parser_t *mpca_and(int n, ...);
    352 
    353 enum {
    354   MPCA_LANG_DEFAULT              = 0,
    355   MPCA_LANG_PREDICTIVE           = 1,
    356   MPCA_LANG_WHITESPACE_SENSITIVE = 2
    357 };
    358 
    359 mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...);
    360 
    361 mpc_err_t *mpca_lang(int flags, const char *language, ...);
    362 mpc_err_t *mpca_lang_file(int flags, FILE *f, ...);
    363 mpc_err_t *mpca_lang_pipe(int flags, FILE *f, ...);
    364 mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...);
    365 
    366 /*
    367 ** Misc
    368 */
    369 
    370 
    371 void mpc_print(mpc_parser_t *p);
    372 void mpc_optimise(mpc_parser_t *p);
    373 void mpc_stats(mpc_parser_t *p);
    374 
    375 int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d,
    376   int(*tester)(const void*, const void*),
    377   mpc_dtor_t destructor,
    378   void(*printer)(const void*));
    379 
    380 int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d,
    381   int(*tester)(const void*, const void*),
    382   mpc_dtor_t destructor,
    383   void(*printer)(const void*));
    384 
    385 #ifdef __cplusplus
    386 }
    387 #endif
    388 
    389 #endif