• 27 Posts
  • 250 Comments
Joined 1 year ago
cake
Cake day: June 11th, 2024

help-circle







  • For now I asked chatgtp to help me to implement a simple return 403 on bot user agent. I looked into my logs and collected the bot names which I saw. I know it won’t hold forever but for now it’s quite nice, I just added this file to /etc/nginx/conf.d/block_bots.conf and it gets run before all the vhosts and rejects all bots. The rest just goes normally to the vhosts. This way I don’t need to implement it in each vhost seperatelly.

    ➜ jeena@Abraham conf.d cat block_bots.conf 
    # /etc/nginx/conf.d/block_bots.conf  
    
    # 1️⃣ Map user agents to $bad_bot  
    map $http_user_agent $bad_bot {  
        default 0;  
    
        ~*SemrushBot                            1;  
        ~*AhrefsBot                             1;  
        ~*PetalBot                              1;  
        ~*YisouSpider                           1;  
        ~*Amazonbot                             1;  
        ~*VelenPublicWebCrawler                 1;  
        ~*DataForSeoBot                          1;  
        ~*Expanse,\ a\ Palo\ Alto\ Networks\ company 1;  
        ~*BacklinksExtendedBot                   1;  
        ~*ClaudeBot                              1;  
        ~*OAI-SearchBot                          1;  
        ~*GPTBot                                 1;  
        ~*meta-externalagent                     1;  
    }  
    
    # 2️⃣ Global default server to block bad bots  
    server {  
        listen 80 default_server;  
        listen [::]:80 default_server;  
        listen 443 ssl default_server;  
        listen [::]:443 ssl default_server;  
    
        # dummy SSL cert for HTTPS  
        ssl_certificate     /etc/ssl/certs/ssl-cert-snakeoil.pem;  
        ssl_certificate_key /etc/ssl/private/ssl-cert-snakeoil.key;  
    
        # block bad bots  
        if ($bad_bot) {  
            return 403;  
        }  
    
        # close connection for anything else hitting default server  
        return 444;  
    }