src/nocem.c

/* [<][>]
[^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following functions.
  1. nocem_regex_set
  2. nocemInit
  3. nocem_regex_match_header
  4. nocem_hide
  5. nocem_process_msgids
  6. pgp_check_art
  7. nocem_process_art
  8. nocem_scan_arts
  9. nocem_scan_group
  10. nocem_scan
  11. nocemDaemon

/* $Id: nocem.c,v 1.3 2000/06/11 19:29:28 proff Exp $
 * $Copyright$
 */

#include "nglobal.h"

#include "reg.h"
#include "group.h"
#include "xover.h"
#include "article.h"
#include "history.h"

#include "nocem.h"

#define bs bigToStr

static regex_t nocemFromPreg;
static regex_t nocemTypePreg;
static regex_t nocemActionPreg;
static regex_t nocemPGPgoodPreg;

static bool nocem_regex_set(regex_t *preg, char *pat, char *field)
/* [<][>][^][v][top][bottom][index][help] */
{
    int err_code;
    if (!field)
        {
            con->nocem = FALSE;
            logen (("nocem enabled, but '%s' unspecified, nocem disabled", field));
            return FALSE;
        }
    if ((err_code = nn_regcomp(preg, pat, REG_EXTENDED|REG_NOSUB|REG_ICASE))!=0)
        {
            char errbuf[MAX_LINE];
            regerror(err_code, preg, errbuf, sizeof errbuf);
            logen (("bad regular expression in '%s' (nocem now disabled): %s", field, errbuf));
            con->nocem = FALSE;
            return FALSE;
        }
    return TRUE;
}


EXPORT bool nocemInit()
/* [<][>][^][v][top][bottom][index][help] */
{
    if (!con->nocem)
        return FALSE;
    if (!nocem_regex_set(&nocemFromPreg, con->nocemFrom, "nocemFrom") ||
        !nocem_regex_set(&nocemTypePreg, con->nocemType, "nocemType") ||
        !nocem_regex_set(&nocemActionPreg, con->nocemAction, "nocemAction"))
        return FALSE;
    if (con->nocemPGP)
        {
            setenv ("PGPPATH", con->nocemPGPPATH, 1);
            if (!nocem_regex_set(&nocemPGPgoodPreg, con->nocemPGPgood, "nocemPGPgood"))
                return FALSE;
        }
    return TRUE;
}

/*
 * pat should be prepended with \n
 */

static bool nocem_regex_match_header(regex_t *preg, char *s, char *pat)
/* [<][>][^][v][top][bottom][index][help] */
{
    char *p;
    char *eol;
    bool ret;
    p = strCaseStr(s, pat);
    if (!p)
        {
        bad_hdr:
            logwn (("nocem message missing '%s' header '%.128s'", pat+1, s));
            return FALSE;
        }
    p += strlen(pat);
    SKIPWHITE (p);
    eol = strchr(p, '\r');
    if (!eol)
        goto bad_hdr;
    *eol = '\0';
    ret = (nn_regexec(preg, p, eol - p, 0, 0, 0) == 0);
    *eol = '\r';
    return ret;
}

static enum {nh_ok, nh_dup, nh_err} nocem_hide(struct nocem_stats *st, char *msgid)
/* [<][>][^][v][top][bottom][index][help] */
{
    bool ret;
    char *p;
    p = hisGet(msgid);
    if (p)
        {
            if (strEq(p, SPAM))
                logn (("<%s> already clasified as '%s'", msgid, p));
            else
                logn (("<%s> aready classified as a genuine article, can't reclassify as spam", msgid)); /* XXX we can with code */
            return nh_dup;
        }
    ret = hisAdd(msgid, SPAM);
    if (!ret)
        {
            logw (("adding <%s> to history failed", msgid));
            return nh_err;
        }
    logn (("classified <%s> as '%s'", msgid, SPAM));
    return nh_ok;
}

static bool nocem_process_msgids(struct nocem_stats *st, char *s)
/* [<][>][^][v][top][bottom][index][help] */
{
    settaskinfo ("nocem: processing msgid's");
    for (; s && *s && (s = strchr(s, '<'));)
        {
            int i;
            int v;
            s++;
            i = strcspn(s, ">\r\n\t ");
            if (i < 2 || s[i] != '>')
                continue;
            s[i] = '\0';
            v = nocem_hide(st, s);
            switch (v)
                {
                case nh_ok: st->msgid_good++; break;
                case nh_dup: st->msgid_dup++; break;
                case nh_err: st->msgid_fail++; break;
                }
            s+=i+1;
        }
    return TRUE;
}
            
static bool pgp_check_art(struct nocem_stats *st, char *art, int len, int artno)
/* [<][>][^][v][top][bottom][index][help] */
{
    FILE *fp;
    int fd;
    char cmd[MAX_FILE];
    char tmp[MAX_FILE];
    char buf[8192];
    int cc;
    sprintf(tmp, "nocem-pgp-tmp.%d", getpid());
    snprintf(cmd, sizeof cmd, con->nocemPGPcommand, tmp);
    settaskinfo ("nocem: verifying PGP signature");
    fp = popen(cmd, "w");
    if (!fp)
        {
            loge (("popen(\"%s\") failed", cmd));
        bad:
            st->pgp_fail++;
            unlink(tmp);
            return FALSE;
        }
    if (fwrite (art, len, 1, fp) != 1)
        {
            loge (("error in writing to PGP process '%s'", cmd));
            pclose (fp);
            goto bad;
        }
    if (pclose (fp) !=0)
        {
        badsig:
            logwn (("unable to verify PGP signature for article %d", artno));
            goto bad;
        }
    fd = open(tmp, O_RDONLY);
    if (fd == -1)
        {
            loge (("open(\"%s\") failed", tmp));
            goto bad;
        }
    cc = read(fd, buf, sizeof buf);
    close (fd);
    if (cc < 1)
        goto bad;
    if (nn_regexec(&nocemPGPgoodPreg, buf, cc, 0, 0, 0) != 0)
        goto badsig;
    logd (("valid PGP signature for article %d", artno));
    st->pgp_good++;
    return TRUE;
}
    
/*
 * this overly hardcoded, but it's faster than a congressional blowjob
 */

static bool nocem_process_art(struct nocem_stats *st, struct strStack *stack, int artno)
/* [<][>][^][v][top][bottom][index][help] */
{
    char *p;
    bool ret;
    struct nocem_stats st2;
    char *nocem_headers, *nocem_body, *nocem_endbody;
    p = stack->data;
    nocem_headers = strstr(p, "@@BEGIN NCM HEADERS");
    if (!nocem_headers)
        return FALSE;
    nocem_body = strstr(nocem_headers, "@@BEGIN NCM BODY");
    if (!nocem_body)
        return FALSE;
    nocem_endbody = strstr(nocem_body, "@@END NCM BODY");
    if (!nocem_endbody)
        {
            logwn (("missing '@@END NCM BODY in article %d", artno));
            return FALSE;
        }
    if (con->nocemPGP)
        {
            char *start;
            char *end;
            start = strstr(stack->data, con->nocemPGPbegin);
            if (!start)
                {
                    logwn (("missing '%s' in article %d", con->nocemPGPbegin, artno));
                    return FALSE;
                }
            end = strstr(start, con->nocemPGPend);
            if (!end)
                {
                    logwn (("missing '%s' in article %d", con->nocemPGPend, artno));
                    return FALSE;
                }
            /* stop out of window attacks */
            if (start > nocem_headers ||
                end < nocem_endbody)
                {
                    logwn (("PGP message boundary does not correlate with NCM message boundary for article %d", artno));
                    return FALSE;
                }
            end += strlen(con->nocemPGPend) + 2;        /* +2 for \r\n -- pgp might need the eol */
            *end = '\0';
            if (!pgp_check_art(st, start, end-start, artno))
                return FALSE;
        }
    nocem_body[-1] = nocem_endbody[-1] = '\0';
    if (!nocem_regex_match_header(&nocemTypePreg, nocem_headers, "\nType:"))
        return FALSE;
    if (!nocem_regex_match_header(&nocemActionPreg, nocem_headers, "\nAction:"))
        return FALSE;
    st2 = *st;
    ret = nocem_process_msgids(st, nocem_body);
    logd (("article %d yeilded %s new msgsid's and %s duplicates (already classified msgid's)", artno, bs(st->msgid_good - st2.msgid_good), bs(st->msgid_dup - st2.msgid_dup)));
    return ret;
}

static bool nocem_scan_arts(struct nocem_stats *st, struct strStack *stack)
/* [<][>][^][v][top][bottom][index][help] */
{
    char *p = stack->data;
    if (strToi (p) != NNTP_HEAD_FOLLOWS_VAL)
        return FALSE;
    p = strchr(p, '\n');
    if (!p)
        return FALSE;
    p++;
    settaskinfo("nocem: scanning article list");
    for (; p < stack->data + stack->used;)
        {       
            int artno;
            char *from;
            char cmd[MAX_CMD];
            struct strStack *art;
            struct command cm;
            artno = strToi (p);
            SKIPNOWHITE (p);
            SKIPWHITE (p);
            from = p;
            p = strchr(p, '\r');        /* we cab rely on this, as we are talking to ourself, */
                                        /* and WE read the RFC */
            if (!p)
                return FALSE;
            *p++ = '\0';
            if (*p != '\n')
                return FALSE;
            *p++ = '\0';                /* nuke \n too */
            if (artno < 1)
                {
                    st->art_skip++;
                    continue;
                }
            if (nn_regexec(&nocemFromPreg, from, p - from - 2, 0, 0, 0)!=0)
                {
                    st->art_skip++;
                    continue;
                }
            slaveClient = strStackAdd(NULL, "");
            cm.val = c_article;         /* XXX ick */
            sprintf(cmd, "ARTICLE %d\r\n", artno);
            settaskinfo("nocem: => [%s] ARTICLE %d", CurrentGroupScfg->host, artno, CurrentGroup);
            if (!CMDarticle (&cm, cmd, !con->nocemCache))
                {
                    strStackFree (slaveClient);
                    st->art_fail++;
                    continue;
                }
            st->art_hi = artno;
            art = slaveClient;
            slaveClient = NULL;
            st->bytes_from += art->used;
            if (nocem_process_art(st, art, artno))
                st->art_good++;
            else
                st->art_fail++;
            strStackFree (art);
        }
    return TRUE;
}
            
/*
 * scans group for nocem articles.
 * returns highest article scanned (or 0 -- failure)
 */

static bool nocem_scan_group(char *group, struct nocem_stats *st)
/* [<][>][^][v][top][bottom][index][help] */
{
        char cmd[MAX_CMD];
        int hi, lo;
        struct strStack *stack;
        struct nocem_stats st2;
        int start = st->art_hi;
        settaskinfo("nocem: entering group '%s'", group);
        assert (!slaveClient);
        slaveClient = strStackAdd(NULL, "");
        sprintf(cmd, "GROUP %.128s\r\n", group);
        if (!CMDgroup(cmd))
            {
                loge (("group change to '%s' failed", group));
                return FALSE;
            }
        strStackFree(slaveClient);
        slaveClient = NULL;
        hi = getHi(CurrentGroupNode);
        lo = getLo(CurrentGroupNode);
        if (start > hi)
            {
                logwn (("highwater article number for '%s' on '%s' decreased, presuming server reset", group, CurrentGroupScfg->host));
                start = 0;
            }
        if (start < lo)
            {
                logd (("highwater article number for '%s' adjusted from %d to %d (lowest article in group)", group, start, lo));
                start = lo;
            }
        start = MAX(start, hi - con->nocemInitialScan);
        st->art_hi = start;
        if (hi <= start) /* XXX if a server article renumbering has occurred then this code skips the first article */
            {
                logd (("no new articles in '%s' on '%s'", group, CurrentGroupScfg->host));
                return TRUE;
            }
        slaveClient = strStackAdd(NULL, "");
        settaskinfo("nocem: => [%s] XHDR %d-%d (group '%s')", CurrentGroupScfg->host, start, hi, group);
        sprintf(cmd, "XHDR From %d-%d\r\n", start, hi); 
        if (!CMDxhdr(cmd))
            {
                loge (("XHDR From %d-%d in '%s' on '%s' failed", start, hi, group, CurrentGroupScfg->host));
                return FALSE;
            }
        stack = slaveClient;
        slaveClient = NULL;
        st->bytes_from += stack->used;
        st2 = *st;
        nocem_scan_arts(st, stack);
        log (("group '%s' yeilded %s articles, %s new %s msgid's and %s duplicates (already classified msgid's)", group, bs(st->art_good - st2.art_good), bs(st->msgid_good - st2.msgid_good), SPAM, bs(st->msgid_dup - st2.msgid_dup)));
        logd (("group '%s' nocem highwater mark now at %d", group, st->art_hi));
        strStackFree(stack);
        st->last_scan = time (NULL);
        return TRUE;
}

static void nocem_scan()
/* [<][>][^][v][top][bottom][index][help] */
{
        struct strList *l;
        int n;
        for (l = con->nocemGroups, n=0; l && l->data; l = l->next, n++)
            {
                nocem_scan_group(l->data, &Stats->nocem_stats[n]);
            }
}

EXPORT int volatile NocemDaemonPid;

EXPORT bool nocemDaemon()
/* [<][>][^][v][top][bottom][index][help] */
{
    int pid = 0;
    static time_t last_time;
    time_t tim = time(NULL);
    sigset_t myset;

    if (Task->ti_state != nc_master)
        return FALSE;
    if (!con->nocem || NocemDaemonPid || tim - last_time < con->nocemInterval)
        return FALSE;
    last_time = tim;
    sigemptyset(&myset);
    sigaddset(&myset, SIGCHLD);
    sigprocmask (SIG_BLOCK, &myset, NULL);
    pid = make_vm_proc(nc_nocem, -1, "nocem");
    if (pid == -1)
    {
        sigprocmask (SIG_UNBLOCK, &myset, NULL);
        return FALSE;
    }
    if (pid > 1)
        {
            NocemDaemonPid = pid;
            sigprocmask (SIG_UNBLOCK, &myset, NULL);
            return TRUE;
        }
    sigprocmask (SIG_UNBLOCK, &myset, NULL);
    while (HoldForksNocem) {}
    /* set internal credentials */
    strcpy (ClientHost, "<nocem@nntpcache>");
    strcpy (ClientHostAddr, "127.0.0.1");

    ModeReader = TRUE;

    nocem_scan();
    retire_vm_proc (0);
    NOTREACHED;
}

/* [<][>][^][v][top][bottom][index][help] */