Friday, February 10, 2006

Banning abusing bots using mod_rewrite, .htaccess and modsecurity

Here are the currently blocked user agents as per my /etc/modsecurity/useragents.conf file:


# http://www.gotroot.com/mod_security+rules
# Gotroot.com ModSecurity rules
#
# Created by The Prometheus Group (http://www.prometheus-group.com)
#
# User Agent Security Rules
#
# Download from: http://www.gotroot.com/downloads/ftp/mod_security/useragents.conf
# Copyright 2005, all rights reserved.
#
# Commercial redistribution prohibited.
#
# Version: N-20051203-01

#Comment spam header line
SecFilter "x-aaaaaa.*"
SecFilterSelective POST_PAYLOAD "X-AAAAAA.*"

#check for bad meta characters in User-Agent field
#SecFilterSelective HTTP_USER_AGENT ".*\'"

#XSS in the UA field
SecFilterSelective HTTP_USER_AGENT "<(.|\s|\n)?(script|about|applet|activex|chrome|object)(.|\s|\n)?>.*<(.|\s|\n)?(script|about|applet|activex|chrome|object)"

#PHP code injection attack
SecFilterSelective HTTP_USER_AGENT "(<\?php|<[[:space:]]*\?[[:space:]]*php)"
SecFilterSelective HTTP_USER_AGENT ".*HTTP_GET_VARS"

#must have a useragent string and not be from ourself
#Some hosting software does not send a UA, so use with caution
#SecFilterSelective "HTTP_USER_AGENT|HTTP_HOST" "^$" chain
#SecFilterSelective REMOTE_ADDR "!^127\.0\.0\.1$"

#Exploit agent
SecFilterSelective HTTP_USER_AGENT "Mosiac 1\.*"

#Bad agent
SecFilterSelective HTTP_USER_AGENT "Brutus/AET"

#CGI vuln scan tool
SecFilterSelective HTTP_USER_AGENT cgichk
SecFilterSelective HTTP_USER_AGENT "DataCha0s/2\.0"

#Damn fine UA
SecFilterSelective HTTP_USER_AGENT ".*THIS IS AN EXPLOIT*"
SecFilterSelective HTTP_USER_AGENT "Morzilla"

#CIRT.DK Webroot auditing tool
SecFilterSelective HTTP_USER_AGENT ".*WebRoot "

#Exploit UA
SecFilterSelective HTTP_USER_AGENT ".*T H A T \' S G O T T A H U R T*"

#XML RPC exploit tool
SecFilterSelective HTTP_USER_AGENT "xmlrpc exploit*"

#A friendly little exploit banner for a WP vuln
SecFilterSelective HTTP_USER_AGENT "Wordpress Hash Grabber"

#Blocks scripts
SecFilterSelective HTTP_USER_AGENT lwp

#Web leaches
SecFilterSelective HTTP_USER_AGENT "Web Downloader"
SecFilterSelective HTTP_USER_AGENT WebZIP
SecFilterSelective HTTP_USER_AGENT WebCopier
SecFilterSelective HTTP_USER_AGENT Webster
SecFilterSelective HTTP_USER_AGENT WebZIP
SecFilterSelective HTTP_USER_AGENT WebStripper
SecFilterSelective HTTP_USER_AGENT "teleport pro"
SecFilterSelective HTTP_USER_AGENT combine
SecFilterSelective HTTP_USER_AGENT "Black Hole"
SecFilterSelective HTTP_USER_AGENT "SiteSnagger"
SecFilterSelective HTTP_USER_AGENT "ProWebWalker"
SecFilterSelective HTTP_USER_AGENT "CheeseBot"

#Bogus Mozilla UA lines
SecFilterSelective HTTP_USER_AGENT "Mozilla/(4|5)\.0$"
SecFilterSelective HTTP_USER_AGENT "Mozilla/3\.Mozilla/2\.01$"

#Bogus IE UA line
SecFilterSelective HTTP_USER_AGENT "Microsoft Internet Explorer/5\.0$"

#Bogus UA
SecFilterSelective HTTP_USER_AGENT "FooBar/42"

#Nessus Vuln scanner UA
SecFilterSelective HTTP_USER_AGENT ".*Nessus"

#Nikto vuln scanner UA
SecFilterSelective HTTP_USER_AGENT ".*Nikto"

#BAd/Bogus UAs
SecFilterSelective HTTP_USER_AGENT "Indy Library"
SecFilterSelective HTTP_USER_AGENT "Faxobot"
SecFilterSelective HTTP_USER_AGENT ".*SAFEXPLORER TL"

#Spam spinder UAs
SecFilterSelective HTTP_USER_AGENT ".*fantomBrowser"
SecFilterSelective HTTP_USER_AGENT ".*fantomCrew Browser"

#VB development library used by many spammers, might block legite VBscripts
#comment out if you have problems
SecFilterSelective HTTP_USER_AGENT "Crescent Internet ToolPak"

#Borland Delphi signature, as above, comment out if it gives you problems
#spammers sometimes use these UAs
SecFilterSelective HTTP_USER_AGENT "NEWT ActiveX\; Win32"
SecFilterSelective HTTP_USER_AGENT "Mozilla.*NEWT"

#Part of the Microsoft MSINET.OCX, as above, spammers sometimes use this, if
#it causes problems, comment out. If you are a member of the Microsoft Site
#Builder Network, you probably do NOT want to block this ID.
#SecFilterSelective HTTP_USER_AGENT "Microsoft URL Control"
#SecFilterSelective HTTP_USER_AGENT "^Microsoft URL"

#e-mail collectors and spammers
SecFilterSelective HTTP_USER_AGENT "WebBandit"
SecFilterSelective HTTP_USER_AGENT "WEBMOLE"
SecFilterSelective HTTP_USER_AGENT "Telesoft*"
SecFilterSelective HTTP_USER_AGENT "WebEMailExtractor"
SecFilterSelective HTTP_USER_AGENT "CherryPicker*"
SecFilterSelective HTTP_USER_AGENT NICErsPRO
SecFilterSelective HTTP_USER_AGENT "Advanced Email Extractor*"
SecFilterSelective HTTP_USER_AGENT EmailSiphon
SecFilterSelective HTTP_USER_AGENT Extractorpro
SecFilterSelective HTTP_USER_AGENT webbandit
SecFilterSelective HTTP_USER_AGENT EmailCollector
SecFilterSelective HTTP_USER_AGENT "WebEMailExtrac*"
SecFilterSelective HTTP_USER_AGENT EmailWolf

#Spiders that eat up bandwidth for their customers
#Not a spammer, just a spider, comment out if you like
SecFilterSelective HTTP_USER_AGENT "CopyRightCheck"
SecFilterSelective HTTP_USER_AGENT "CopyGuard"
SecFilterSelective HTTP_USER_AGENT "Digimarc WebReader"

#MArketing spiders
SecFilterSelective HTTP_USER_AGENT "Zeus .*Webster Pro*"

#Poker spam
SecFilterSelective HTTP_USER_AGENT "8484 Boston Project"

#collectors
SecFilterSelective HTTP_USER_AGENT "autoemailspider"
SecFilterSelective HTTP_USER_AGENT "ecollector"
SecFilterSelective HTTP_USER_AGENT "grub crawler"

#referrer spam, not the real weblogs
SecFilterSelective HTTP_USER_AGENT "^www\.weblogs\.com"

#spam bots
SecFilterSelective HTTP_USER_AGENT "DTS Agent"
SecFilterSelective HTTP_USER_AGENT "POE-Component-Client"
SecFilterSelective HTTP_USER_AGENT "WISEbot"
SecFilterSelective HTTP_USER_AGENT "^Shockwave Flash"
SecFilterSelective HTTP_USER_AGENT "Missigua"

#comment spam sign
SecFilterSelective HTTP_USER_AGENT "compatible \; MSIE"

#Some regexps to catch silly bots
SecFilterSelective REQUEST_URI "!/ps(zones\|comp).txt1" chain
SecFilterSelective HTTP_USER_AGENT "^(google|i?explorer?\.exe|(MS)?IE( [0-9.]+)?[ ]?(Compatible( Browser)?)?)$"
SecFilterSelective HTTP_USER_AGENT "^(Mozilla( [0-9.]+)?[ ]?\((Windows|Linux|(IE )?Compatible)\))$"
SecFilterSelective HTTP_USER_AGENT "^Mozilla/5\.0 \(X11; U; Linux i686; en-US; rv\:0\.9\.6\+\) Gecko/2001112$"
SecFilterSelective HTTP_USER_AGENT "^Mozilla/[0-9.]+ \(compatible; MSIE [0-9.]+; Windows( NT)?( [0-9.]*)?;[0-9./ ]*\)?$"
SecFilterSelective HTTP_USER_AGENT "^Mozilla/.+[. ]+$"

#spammer
SecFilterSelective HTTP_USER_AGENT "Butch__2\.1\.1"
SecFilterSelective HTTP_USER_AGENT "agdm79@mail\.ru"

#Fake Gameboy UA
SecFilterSelective HTTP_USER_AGENT "GameBoy\, Powered by Nintendo"

#bogus amiga UA
SecFilterSelective HTTP_USER_AGENT "Amiga-AWeb/3\.4"

#exploit UA
SecFilterSelective HTTP_USER_AGENT "Internet Ninja x\.0"

#bogus googlebot UA
SecFilterSelective HTTP_USER_AGENT "Nokia-WAPToolkit.* googlebot.*googlebot"

#recently caught sending spam referrals, from their actual crawler IP
SecFilterSelective HTTP_USER_AGENT "BecomeBot"

#Suverybot
SecFilterSelective HTTP_USER_AGENT "SurveyBot"

#exploit
SecFilterSelective HTTP_USER_AGENT "S\.T\.A\.L\.K\.E\.R\."
SecFilterSelective HTTP_USER_AGENT "NeuralBot/0\.2"
SecFilterSelective HTTP_USER_AGENT "Kenjin Spider"
#SecFilterSelective HTTP_USER_AGENT "FunWebProducts"
SecFilterSelective HTTP_USER_AGENT "ichiro"
SecFilterSelective HTTP_USER_AGENT "picsearch"
SecFilterSelective HTTP_USER_AGENT "psbot/0.1.*"
SecFilterSelective HTTP_USER_AGENT "ichiro/2.0"
SecFilterSelective HTTP_USER_AGENT "Mozilla.*Dead.*"
SecFilterSelective HTTP_USER_AGENT "Bloglines.*"
SecFilterSelective HTTP_USER_AGENT "PluckFeedCrawler.*"
SecFilterSelective HTTP_USER_AGENT "Baiduspider.*"
SecFilterSelective HTTP_USER_AGENT "voyager/1.0"
SecFilterSelective HTTP_USER_AGENT "boitho.com"
SecFilterSelective HTTP_USER_AGENT "findlinks/1.*"
SecFilterSelective HTTP_USER_AGENT "NewsGatorOnline/2\.0"
SecFilterSelective HTTP_USER_AGENT "Mozilla/4.0.*ZyBorg/1\.0.*Dead.*Checker"
SecFilterSelective HTTP_USER_AGENT "g2Crawler.*"
SecFilterSelective HTTP_USER_AGENT "Onfolio/2\.02"
SecFilterSelective HTTP_USER_AGENT "Java/1\.5\.0\_04"
SecFilterSelective HTTP_USER_AGENT "Gigabot.*"
SecFilterSelective HTTP_USER_AGENT "e.SocietyRobot"


Banning using .htaccess/mod_rewrite

RewriteEngine on
RewriteBase /
RewriteCond %{HTTP_USER_AGENT} ^EmailSiphon [OR]
RewriteCond %{HTTP_USER_AGENT} ^EmailWolf [OR]
RewriteCond %{HTTP_USER_AGENT} ^ExtractorPro [OR]
RewriteCond %{HTTP_USER_AGENT} ^Mozilla.*NEWT [OR]
RewriteCond %{HTTP_USER_AGENT} ^Crescent [OR]
RewriteCond %{HTTP_USER_AGENT} ^CherryPicker [OR]
RewriteCond %{HTTP_USER_AGENT} ^[Ww]eb[Bb]andit [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebEMailExtrac.* [OR]
RewriteCond %{HTTP_USER_AGENT} ^NICErsPRO [OR]
RewriteCond %{HTTP_USER_AGENT} ^Teleport [OR]
RewriteCond %{HTTP_USER_AGENT} ^Zeus.*Webster [OR]
RewriteCond %{HTTP_USER_AGENT} ^Microsoft.URL [OR]
RewriteCond %{HTTP_USER_AGENT} ^Wget [OR]
RewriteCond %{HTTP_USER_AGENT} ^LinkWalker [OR]
RewriteCond %{HTTP_USER_AGENT} ^sitecheck.internetseer.com [OR]
RewriteCond %{HTTP_USER_AGENT} ^ia_archiver [OR]
RewriteCond %{HTTP_USER_AGENT} ^DIIbot [OR]
RewriteCond %{HTTP_USER_AGENT} ^psbot [OR]
RewriteCond %{HTTP_USER_AGENT} ^EmailCollector
RewriteRule ^.* - [F]
RewriteCond %{HTTP_REFERER} ^http://www.iaea.org$
RewriteRule !^http://[^/.]\.your-site.com.* - [F]



Credits for .htaccess / mod_rewrite based banning:
.htaccess ban list Part 1
.htaccess ban list Part 2
.htaccess ban list Part 3

1 Comments:

At Sun Apr 23, 05:47:00 AM, Anonymous Anonymous said...

It states:
htaccess/mod_rewrite

Do have to enable a script called mod_rewrite or is the statement 'RewriteEngine on' in the .htaccess file enough to start the file working?

Thanks!
Peter

 

Post a Comment

Links to this post:

Create a Link

<< Home