Spam learning per user

signaldeveloper at gmail.com signaldeveloper at gmail.com
Thu Sep 17 16:11:41 CEST 2015


Christian,

This is awesome!! Thanks! I wanted to know though, is there a way just to trust that the user will put spam into their spam folder and set it up to auto learn from each users default created spam folder?

- Paul

> On Sep 16, 2015, at 3:31 AM, Christian Hügel <christian.huegel at stonebyte.de> wrote:
> 
> Under CentOS 6 the spamassassin auto learning option is also disabled. This is how my local.cf file looks like:
> 
> 
> # These values can be overridden by editing ~/.spamassassin/user_prefs.cf
> # (see spamassassin(1) for details)
> 
> # These should be safe assumptions and allow for simple visual sifting
> # without risking lost emails.
> 
> required_hits 5
> 
> report_safe 0
> rewrite_header Subject [SPAM]
> score BAYES_99 7.308
> #score BAYES_99 3
> add_header all Status _YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ autolearn=_AUTOLEARN_ version=_VERSION_
> add_header all Level _STARS(*)_
> add_header all Checker-Version SpamAssassin _VERSION_ (_SUBVERSION_) on _HOSTNAME_
> #bayes
> use_bayes 1
> use_bayes_rules 1
> bayes_auto_learn 1
> bayes_file_mode 0600
> bayes_path /etc/spamassassin/bayes
> #bayes_auto_expire 0
> bayes_auto_expire 1
> bayes_journal_max_size 15000000
> bayes_expiry_max_db_size 20000000
> 
> # dcc
> use_dcc 1
> dcc_path /usr/local/bin/dccproc
> dcc_home /var/dcc
> dcc_timeout     10
> add_header all  DCC _DCCB_: _DCCR_
> 
> #pyzor
> use_pyzor 1
> pyzor_path /usr/bin/pyzor
> 
> #razor
> use_razor2 1
> razor_config /var/spool/amavisd/razor-agent.conf
> score RAZOR2_CHECK 2.500
> score PYZOR_CHECK 2.500
> score DCC_CHECK 4.000
> 
> # Blacklist
> 
> # Whitelist
> 
> # end of local.cf
> 
> For each user I have following folder structure:
> ->Spam  (here goes recognized spam by spamassassin)
> ----->Spam\UnknownSpam (false negative)
> ----->Spam\NoSpam (false positve)
> 
> To avoid creating those folders manually every time I add an user, I have following entries in /etc/kolab.conf
> 
>        'Spam': {
>        'annotations': {
>        '/private/vendor/kolab/folder-type': "mail.junkemail",
>        },
>        },
>        'Spam/UnknownSpam': {
>        'annotations': {
>        '/private/vendor/kolab/folder-type': "mail.junkemail",
>        },
>        },
>        'Spam/NoSpam': {
>        'annotations': {
>        '/private/vendor/kolab/folder-type': "mail.junkemail",
>        },
>        },
> 
> Newly created users receive a automated (welcome) mail where I explain what to do with false positive/negative mail
> 
> Now to train spamassassin I use a handy script [1] which I run via cron every night:
> 
> 15 2 * * * root /usr/sbin/sa-learn-cyrus
> 
> This is the config:
> 
> # Configuration for sa-learn-cyrus
> #
> # hjb -- 2011-11-02
> #
> # -------------------------------------------------------
> # global parameters
> #
> [global]
> 
> # Directory to store output of sa-learn and ipurge temporarily
> tmp_dir = /tmp
> 
> # To avoid race conditions, we use a lock file.
> lock_file = /var/lock/sa-learn-cyrus.lock
> 
> # level of verbosity (0 .. 3)?
> verbose = 3
> 
> # Don't excute commands, show only what would be executed,
> # Change this to 'no' after testing.
> simulate = no
> 
> # Prepend log output with a tag (date, time, pid)?
> # Choose 'no' if you prefer to pipe the output to syslog
> # (default is 'yes')
> log_with_tag = yes
> 
> # -------------------------------------------------------
> # Mailbox
> #
> [mailbox]
> 
> # List of mailboxes/users which will be considered.
> # If this list is empty all mailboxes will be searched.
> #
> include_list = ''
> 
> # If include_list is empty, only mailboxes matching this pattern will be considered
> #include_regexp = '.*'
> include_regexp = ''
> 
> # List of mailboxes/users which will be ignored
> exclude_list = ''
> 
> # If exclude_list is empty, mailboxes matching this pattern will be ignored
> exclude_regexp = ''
> 
> # Spam folder relative to INBOX (cyrus nomenclature: e.g. 'junk.Spam')
> spam_folder = 'Spam/UnknownSpam'
> 
> # Ham folder relative to INBOX (cyrus nomenclature: e.g. 'junk.Ham')
> ham_folder = 'Spam/NoSpam'
> 
> # Remove spam after feeding it to SA
> remove_spam = yes
> 
> # Remove ham after feeding it to SA
> remove_ham = no
> 
> # -------------------------------------------------------
> # Spamassassin
> #
> [sa]
> 
> # run sa-learn in debug mode (useful to examine problems)
> debug = no
> 
> # Path with system-wide SA preferences
> site_config_path = /etc/mail/spamassassin
> 
> # Path to sa-learn
> learn_cmd = /usr/bin/sa-learn
> 
> # Bayes storage mechanism (berkely|sql)
> #   berkely: Berkely DB (default)
> #   sql: SQL Database
> bayes_storage = berkely
> 
> # SA configuration file.
> # Used to get the Bayes database path if bayes_storage = berkely
> # Required to run sa-learn.
> prefs_file = /etc/mail/spamassassin/local.cf
> 
> # Should permissions of DB files be fixed?
> # Ignored unless bayes_storage = berkely
> fix_db_permissions = yes
> 
> # SA user and group (required if fix_db_permissions = yes)
> user = amavis
> group = amavis
> 
> # Skip synchronization after every change of database, but sync once
> # after all messages have been learned.
> # May speed up learning from many folders.
> sync_once = yes
> 
> # Use this if you use the --virtual-config-dir option of spamd (it
> # needs to match exactly). See the spamd man page for more
> # information.
> virtual_config_dir = ''
> 
> # -------------------------------------------------------
> # IMAP
> #
> [imap]
> 
> # Base directory of IMAP spool (below that mailboxes are located)
> base_dir = /var/spool/imap
> 
> # If base_dir has subdivisions with initial letters of mailbox names
> # set initial_letter = yes (default), otherwise choose no.
> # Example for joe's mailbox:
> #   yes: <base_dir>/j/user/joe/
> #    no: <base_dir>/user/joe/
> initial_letter = yes
> 
> # If your cyrus spool uses domain hierarchy give a list of domains
> # Example for mailbox fritz at bar.org and joe at foo.com
> #   <base_dir>/domain/b/bar.org/f/fritz
> #   <base_dir>/domain/f/foo.com/j/joe
> # domains = foo.com bar.org
> #
> # If you don't use Cyrus's domain support leave the entry empty.
> # The initial_letter option (see above) is applied to domains, too.
> domains = 'stonebyte.de forumschaessburg.eu sorinamoyrer.de'
> 
> # Choose 'unixhierarchysep = yes' if Cyrus is configured to accept usernames
> # like 'hans.mueller.somedomain.tld'
> unixhierarchysep = yes
> 
> # imap command to purge mail messages
> purge_cmd = /usr/lib/cyrus-imapd/ipurge
> 
> # Cyrus-IMAPd user
> user = cyrus
> 
> 
> 
> Hope this helps.
> 
> Cheers,
> 
> Christian
> 
> 
> [1] http://www.pollux.franken.de/mail-server-tools/sa-learn-cyrus/
> 
> 
> 
> 
> 
> Am 15-09-2015 18:52, schrieb Paul Bronson:
>> I read online the documentation regarding the spam and ham training
>> but had a question..
>> Let's say for instance I have 500,000 users on my install. If people
>> put mail in their spam/junk folder created by default, doesn't the
>> system already learn by that, or do you need to manually tell
>> spamassassin it's spam?
> 
> 
> 


More information about the users mailing list