83 lines
2.1 KiB
C
83 lines
2.1 KiB
C
|
#ifndef __MODULE_H_
|
||
|
#define __MODULE_H_
|
||
|
|
||
|
#include <curl/curl.h>
|
||
|
|
||
|
#include "util.h"
|
||
|
|
||
|
typedef enum {
|
||
|
FILTER_PASS,
|
||
|
FILTER_STALL,
|
||
|
FILTER_REJECT,
|
||
|
} filterres_t;
|
||
|
|
||
|
typedef enum {
|
||
|
EXTRA_JSON,
|
||
|
EXTRA_TIDY,
|
||
|
EXTRA_OTHER,
|
||
|
} extradata_type_t;
|
||
|
|
||
|
typedef struct {
|
||
|
extradata_type_t type;
|
||
|
char *key;
|
||
|
void *val;
|
||
|
} extradata_t;
|
||
|
|
||
|
dynarr_def(extradata_t, extradata_dynarr_t);
|
||
|
|
||
|
typedef struct {
|
||
|
const char *url;
|
||
|
CURL *handle;
|
||
|
char *page;
|
||
|
size_t npage;
|
||
|
extradata_dynarr_t extradata;
|
||
|
charp_dynarr_t *parsedlinks;
|
||
|
} pagecompletedata_t;
|
||
|
|
||
|
typedef void (*reqcb_t)(void *userdata, const char *url, char *page, size_t npage, CURL *handle);
|
||
|
|
||
|
typedef struct {
|
||
|
const char *url;
|
||
|
reqcb_t cb;
|
||
|
void *userdata;
|
||
|
CURL *handle;
|
||
|
} requestedreq_t;
|
||
|
|
||
|
dynarr_def(requestedreq_t, requestedreq_dyanrr_t);
|
||
|
|
||
|
typedef struct crawlermodule {
|
||
|
void *userdata;
|
||
|
// `init` will both initialize the module, and populate all other functions in its entry, if necessary.
|
||
|
int (*init)(struct crawlermodule *entry);
|
||
|
int (*destroy)(void *userdata);
|
||
|
int (*onpagewrite)(void *userdata, const char *url, const byte *data, size_t ndata);
|
||
|
int (*onpagecomplete)(void *userdata, pagecompletedata_t *data);
|
||
|
int (*onpagedestroy)(void *userdata, pagecompletedata_t *data);
|
||
|
filterres_t (*filter)(void *userdata, const char *url);
|
||
|
} crawlermodule_t;
|
||
|
|
||
|
dynarr_def(crawlermodule_t, crawlermodule_dynarr_t);
|
||
|
dynarr_def(crawlermodule_t *, crawlermodulep_dynarr_t);
|
||
|
|
||
|
typedef struct {
|
||
|
const char *name;
|
||
|
crawlermodule_t module;
|
||
|
} moduleentry_t;
|
||
|
|
||
|
dynarr_def(moduleentry_t, moduleentry_dynarr_t);
|
||
|
dynarr_def(moduleentry_t *, moduleentryp_dynarr_t);
|
||
|
|
||
|
void *searchextradata(extradata_type_t type, char *key, extradata_t *data, size_t ndata);
|
||
|
void makerequest(const char *url, reqcb_t cb, void *cbdata);
|
||
|
|
||
|
int mod_pagedata_init(crawlermodule_t *entry);
|
||
|
int mod_tidy_init(crawlermodule_t *entry);
|
||
|
int mod_debug_init(crawlermodule_t *entry);
|
||
|
int mod_parse_init(crawlermodule_t *entry);
|
||
|
int mod_robots_init(crawlermodule_t *entry);
|
||
|
|
||
|
extern requestedreq_dyanrr_t requestedreqs;
|
||
|
extern moduleentry_t availmodules[];
|
||
|
|
||
|
#endif
|