#ifndef __MODULE_H_ #define __MODULE_H_ #include #include "util.h" typedef enum { FILTER_PASS, FILTER_STALL, FILTER_REJECT, } filterres_t; typedef enum { EXTRA_JSON, EXTRA_TIDY, EXTRA_OTHER, } extradata_type_t; typedef struct { extradata_type_t type; char *key; void *val; } extradata_t; dynarr_def(extradata_t, extradata_dynarr_t); typedef struct { const char *url; CURL *handle; char *page; size_t npage; extradata_dynarr_t extradata; charp_dynarr_t *parsedlinks; } pagecompletedata_t; typedef void (*reqcb_t)(void *userdata, const char *url, char *page, size_t npage, CURL *handle); typedef struct { const char *url; reqcb_t cb; void *userdata; CURL *handle; } requestedreq_t; dynarr_def(requestedreq_t, requestedreq_dyanrr_t); typedef struct crawlermodule { void *userdata; // `init` will both initialize the module, and populate all other functions in its entry, if necessary. int (*init)(struct crawlermodule *entry); int (*destroy)(void *userdata); int (*onpagewrite)(void *userdata, const char *url, const byte *data, size_t ndata); int (*onpagecomplete)(void *userdata, pagecompletedata_t *data); int (*onpagedestroy)(void *userdata, pagecompletedata_t *data); filterres_t (*filter)(void *userdata, const char *url); } crawlermodule_t; dynarr_def(crawlermodule_t, crawlermodule_dynarr_t); dynarr_def(crawlermodule_t *, crawlermodulep_dynarr_t); typedef struct { const char *name; crawlermodule_t module; } moduleentry_t; dynarr_def(moduleentry_t, moduleentry_dynarr_t); dynarr_def(moduleentry_t *, moduleentryp_dynarr_t); void *searchextradata(extradata_type_t type, char *key, extradata_t *data, size_t ndata); void makerequest(const char *url, reqcb_t cb, void *cbdata); int mod_pagedata_init(crawlermodule_t *entry); int mod_tidy_init(crawlermodule_t *entry); int mod_debug_init(crawlermodule_t *entry); int mod_parse_init(crawlermodule_t *entry); int mod_robots_init(crawlermodule_t *entry); extern requestedreq_dyanrr_t requestedreqs; extern moduleentry_t availmodules[]; #endif