Spam learning per user

Wed Sep 16 09:31:54 CEST 2015

Under CentOS 6 the spamassassin auto learning option is also disabled. 
This is how my local.cf file looks like:

# These values can be overridden by editing 
~/.spamassassin/user_prefs.cf
# (see spamassassin(1) for details)

# These should be safe assumptions and allow for simple visual sifting
# without risking lost emails.

required_hits 5

report_safe 0
rewrite_header Subject [SPAM]
score BAYES_99 7.308
#score BAYES_99 3
add_header all Status _YESNO_, score=_SCORE_ required=_REQD_ 
tests=_TESTS_ autolearn=_AUTOLEARN_ version=_VERSION_
add_header all Level _STARS(*)_
add_header all Checker-Version SpamAssassin _VERSION_ (_SUBVERSION_) on 
_HOSTNAME_
#bayes
use_bayes 1
use_bayes_rules 1
bayes_auto_learn 1
bayes_file_mode 0600
bayes_path /etc/spamassassin/bayes
#bayes_auto_expire 0
bayes_auto_expire 1
bayes_journal_max_size 15000000
bayes_expiry_max_db_size 20000000

# dcc
use_dcc 1
dcc_path /usr/local/bin/dccproc
dcc_home /var/dcc
dcc_timeout     10
add_header all  DCC _DCCB_: _DCCR_

#pyzor
use_pyzor 1
pyzor_path /usr/bin/pyzor

#razor
use_razor2 1
razor_config /var/spool/amavisd/razor-agent.conf
score RAZOR2_CHECK 2.500
score PYZOR_CHECK 2.500
score DCC_CHECK 4.000

# Blacklist

# Whitelist

# end of local.cf

For each user I have following folder structure:
->Spam  (here goes recognized spam by spamassassin)
----->Spam\UnknownSpam (false negative)
----->Spam\NoSpam (false positve)

To avoid creating those folders manually every time I add an user, I 
have following entries in /etc/kolab.conf

         'Spam': {
         'annotations': {
         '/private/vendor/kolab/folder-type': "mail.junkemail",
         },
         },
         'Spam/UnknownSpam': {
         'annotations': {
         '/private/vendor/kolab/folder-type': "mail.junkemail",
         },
         },
         'Spam/NoSpam': {
         'annotations': {
         '/private/vendor/kolab/folder-type': "mail.junkemail",
         },
         },

Newly created users receive a automated (welcome) mail where I explain 
what to do with false positive/negative mail

Now to train spamassassin I use a handy script [1] which I run via cron 
every night:

15 2 * * * root /usr/sbin/sa-learn-cyrus

This is the config:

# Configuration for sa-learn-cyrus
#
# hjb -- 2011-11-02
#
# -------------------------------------------------------
# global parameters
#
[global]

# Directory to store output of sa-learn and ipurge temporarily
tmp_dir = /tmp

# To avoid race conditions, we use a lock file.
lock_file = /var/lock/sa-learn-cyrus.lock

# level of verbosity (0 .. 3)?
verbose = 3

# Don't excute commands, show only what would be executed,
# Change this to 'no' after testing.
simulate = no

# Prepend log output with a tag (date, time, pid)?
# Choose 'no' if you prefer to pipe the output to syslog
# (default is 'yes')
log_with_tag = yes

# -------------------------------------------------------
# Mailbox
#
[mailbox]

# List of mailboxes/users which will be considered.
# If this list is empty all mailboxes will be searched.
#
include_list = ''

# If include_list is empty, only mailboxes matching this pattern will be 
considered
#include_regexp = '.*'
include_regexp = ''

# List of mailboxes/users which will be ignored
exclude_list = ''

# If exclude_list is empty, mailboxes matching this pattern will be 
ignored
exclude_regexp = ''

# Spam folder relative to INBOX (cyrus nomenclature: e.g. 'junk.Spam')
spam_folder = 'Spam/UnknownSpam'

# Ham folder relative to INBOX (cyrus nomenclature: e.g. 'junk.Ham')
ham_folder = 'Spam/NoSpam'

# Remove spam after feeding it to SA
remove_spam = yes

# Remove ham after feeding it to SA
remove_ham = no

# -------------------------------------------------------
# Spamassassin
#
[sa]

# run sa-learn in debug mode (useful to examine problems)
debug = no

# Path with system-wide SA preferences
site_config_path = /etc/mail/spamassassin

# Path to sa-learn
learn_cmd = /usr/bin/sa-learn

# Bayes storage mechanism (berkely|sql)
#   berkely: Berkely DB (default)
#   sql: SQL Database
bayes_storage = berkely

# SA configuration file.
# Used to get the Bayes database path if bayes_storage = berkely
# Required to run sa-learn.
prefs_file = /etc/mail/spamassassin/local.cf

# Should permissions of DB files be fixed?
# Ignored unless bayes_storage = berkely
fix_db_permissions = yes

# SA user and group (required if fix_db_permissions = yes)
user = amavis
group = amavis

# Skip synchronization after every change of database, but sync once
# after all messages have been learned.
# May speed up learning from many folders.
sync_once = yes

# Use this if you use the --virtual-config-dir option of spamd (it
# needs to match exactly). See the spamd man page for more
# information.
virtual_config_dir = ''

# -------------------------------------------------------
# IMAP
#
[imap]

# Base directory of IMAP spool (below that mailboxes are located)
base_dir = /var/spool/imap

# If base_dir has subdivisions with initial letters of mailbox names
# set initial_letter = yes (default), otherwise choose no.
# Example for joe's mailbox:
#   yes: <base_dir>/j/user/joe/
#    no: <base_dir>/user/joe/
initial_letter = yes

# If your cyrus spool uses domain hierarchy give a list of domains
# Example for mailbox fritz at bar.org and joe at foo.com
#   <base_dir>/domain/b/bar.org/f/fritz
#   <base_dir>/domain/f/foo.com/j/joe
# domains = foo.com bar.org
#
# If you don't use Cyrus's domain support leave the entry empty.
# The initial_letter option (see above) is applied to domains, too.
domains = 'stonebyte.de forumschaessburg.eu sorinamoyrer.de'

# Choose 'unixhierarchysep = yes' if Cyrus is configured to accept 
usernames
# like 'hans.mueller.somedomain.tld'
unixhierarchysep = yes

# imap command to purge mail messages
purge_cmd = /usr/lib/cyrus-imapd/ipurge

# Cyrus-IMAPd user
user = cyrus

Hope this helps.

Cheers,

Christian

[1] http://www.pollux.franken.de/mail-server-tools/sa-learn-cyrus/

Am 15-09-2015 18:52, schrieb Paul Bronson:
> I read online the documentation regarding the spam and ham training
> but had a question..
> 
> Let's say for instance I have 500,000 users on my install. If people
> put mail in their spam/junk folder created by default, doesn't the
> system already learn by that, or do you need to manually tell
> spamassassin it's spam?