Tag Archives: access

FreeBSD10.1: separate static access and script (php) access for nginx

This article is based on the php fourm Discuz (http://www.discuz.net), you should modify the configuration as needed.

Operation system: Freebsd 10.1 with ZFS

1. Enable httpready, aio, tmpfs

# echo 'accf_http_load=”YES”' >> /boot/loader.conf
# echo 'aio_load="YES"' >> /boot/loader.conf
# echo 'tmpfs /tmp tmpfs rw,mode=777 0 0' >> /etc/fstab
# kldload accf_http aio
# umount /tmp && mount /tmp

2. Create tmp mount point for nginx

# zfs create -o atime=off -o setuid=off -o checksum=off -o mountpoint=/var/tmp/nginx zroot/nginx
# chown www /var/tmp/nginx

3. Nginx configuration (nginx version 1.6)

compile nginx from port

# cd /usr/ports/www/nginx && make config && make install clean

make sure that you have enabled below modules:

FILE_AIO
GOOGLE_PERFTOOLS
HTTP
HTTP_ADDITION
HTTP_GEOIP
HTTP_REALIP
HTTP_REWRITE
HTTP_STATUS
CACHE_PURGE
HEADERS_MORE
LUA

create log folder

# mkdir /var/log/nginx

below is the configuration for nginx. Please be aware that I put the configuration in several different files.

main:

user                          www;
worker_processes              4; # Please change this number as needed
error_log                     /var/log/nginx/nginx-error.log info;
pid                           /var/run/nginx.pid;
google_perftools_profiles     /tmp/nginx_gperf;

events:

events                        {
  worker_connections          1024;
  use                         kqueue;
}

http:

http                          {
  aio                         sendfile;
  sendfile                    on;
  tcp_nopush                  on;
  directio                    4m;  #this line can be removed because ZFS doesn't support directio
  directio_alignment          4096;
  recursive_error_pages       on;
  set_real_ip_from            127.0.0.0/8;
  set_real_ip_from            unix:;
  real_ip_header              X-Real-IP;
  default_type                application/octet-stream;
  server_tokens               off;
  server_name_in_redirect     off;
  keepalive_timeout           120;
  log_format                  main '$remote_addr | $time_local | $host | $request | $status | '
                                   '$body_bytes_sent | $http_referer | $http_user_agent | '
                                   '$http_x_forwarded_for';
  log_format                  cache '$remote_addr | $time_local | $host | $request | $status | '
                                    '$body_bytes_sent | $http_referer | $http_user_agent | '
                                    '$http_x_forwarded_for | $upstream_cache_status';
  client_body_temp_path       /tmp/nginx_tmp_client;
  fastcgi_temp_path           /tmp/nginx_tmp_fcgi;
  uwsgi_temp_path             /tmp/nginx_tmp_uwsgi;
  scgi_temp_path              /tmp/nginx_tmp_sgi;
  proxy_temp_path             /tmp/nginx_tmp_proxy;
  proxy_cache_path            /var/tmp/nginx/cache levels=2:2 keys_zone=cache_disk:256m inactive=1d;
  limit_req_zone              $binary_remote_addr zone=scriptzone:4m rate=10r/s;
  limit_req_zone              $binary_remote_addr zone=staiczone:4m rate=30r/s;
  limit_conn_zone             $binary_remote_addr zone=clientzone:4m;
  include                     mime.types;
  include                     map.conf;
  include                     upstream.conf;
  server                      {
    listen                    80 accept_filter=httpready;
    include                   front.conf;
  }
  # Can use curl http://127.0.0.1:888 to check nginx status
  server                      {
    listen                    127.0.0.1:888;
    stub_status               on;
    access_log                off;
  }
  include                     site/*.conf;
}

I will use map.conf and front.conf to separate static access and script access, also separate human access and robot access.

Use map.conf to sort out different access type:

map.conf:

# 基于geo的ip映射
# below are offline download servers ip address (such as thunder, qqdownload)
geo                                     $geo {
  ranges;
  101.226.180.1-101.226.180.255         oldl;
  111.161.24.1-111.161.24.255           oldl;
  112.117.217.1-112.117.217.255         oldl;
  112.90.17.1-112.90.17.255             oldl;
  114.112.202.1-114.112.202.255         oldl;
  114.80.183.1-114.80.189.255           oldl;
  114.80.245.1-114.80.245.255           oldl;
  116.55.230.1-116.55.230.255           oldl;
  117.34.91.1-117.34.91.255             oldl;
  118.122.36.1-118.122.36.255           oldl;
  118.122.87.1-118.122.88.255           oldl;
  119.120.94.1-119.120.94.255           oldl;
  119.144.9.1-119.144.9.255             oldl;
  119.147.41.1-119.147.41.255           oldl;
  119.178.12.1-119.178.12.255           oldl;
  119.188.11.1-119.188.12.255           oldl;
  119.188.129.1-119.188.129.255         oldl;
  119.188.13.1-119.188.15.255           oldl;
  119.188.50.1-119.188.50.255           oldl;
  119.189.1.1-119.189.1.255             oldl;
  119.84.114.1-119.84.114.255           oldl;
  119.97.178.1-119.97.178.255           oldl;
  119.97.183.1-119.97.183.255           oldl;
  121.10.120.1-121.10.120.255           oldl;
  121.10.137.1-121.10.137.255           oldl;
  121.10.24.1-121.10.24.255             oldl;
  121.14.222.1-121.14.222.255           oldl;
  121.14.82.1-121.14.82.255             oldl;
  121.9.209.1-121.9.209.255             oldl;
  121.9.246.1-121.9.246.255             oldl;
  122.141.227.1-122.141.227.255         oldl;
  122.141.235.1-122.141.235.255         oldl;
  122.143.1.1-122.143.6.255             oldl;
  122.228.241.1-122.228.241.255         oldl;
  122.228.255.1-122.228.255.255         oldl;
  123.129.219.1-123.129.219.255         oldl;
  123.129.242.1-123.129.242.255         oldl;
  123.183.223.1-123.183.223.255         oldl;
  124.232.148.1-124.232.148.255         oldl;
  124.95.156.1-124.95.156.255           oldl;
  124.95.172.1-124.95.172.255           oldl;
  124.95.173.1-124.95.173.255           oldl;
  124.95.174.1-124.95.174.255           oldl;
  125.221.46.1-125.221.46.255           oldl;
  125.39.148.1-125.39.150.255           oldl;
  125.39.72.1-125.39.72.255             oldl;
  125.46.42.1-125.46.42.255             oldl;
  125.78.242.1-125.78.242.255           oldl;
  125.78.247.1-125.78.247.255           oldl;
  180.153.115.1-180.153.115.255         oldl;
  180.153.91.1-180.153.91.255           oldl;
  182.118.125.1-182.118.125.255         oldl;
  182.118.13.1-182.118.18.255           oldl;
  182.140.142.1-182.140.142.255         oldl;
  183.136.156.1-183.136.156.255         oldl;
  183.60.208.1-183.60.208.255           oldl;
  183.60.209.1-183.60.209.255           oldl;
  183.63.33.1-183.63.33.255             oldl;
  183.94.216.1-183.94.219.255           oldl;
  183.94.228.1-183.94.231.255           oldl;
  183.94.237.1-183.94.239.255           oldl;
  211.137.100.1-211.137.100.255         oldl;
  211.162.73.1-211.162.73.255           oldl;
  211.98.168.1-211.98.171.255           oldl;
  218.21.68.1-218.21.68.255             oldl;
  218.26.232.1-218.26.232.255           oldl;
  218.59.144.1-218.59.144.255           oldl;
  218.6.13.1-218.6.13.255               oldl;
  218.75.172.1-218.75.172.255           oldl;
  219.129.83.1-219.129.83.255           oldl;
  219.134.132.1-219.134.132.255         oldl;
  220.113.9.1-220.113.9.255             oldl;
  220.115.240.1-220.115.240.255         oldl;
  220.249.103.1-220.249.103.255         oldl;
  221.203.179.1-221.203.179.255         oldl;
  221.204.204.1-221.204.204.255         oldl;
  221.204.220.1-221.204.220.255         oldl;
  221.215.87.1-221.215.87.255           oldl;
  221.235.189.1-221.235.189.255         oldl;
  221.235.205.1-221.235.205.255         oldl;
  221.238.25.1-221.238.25.255           oldl;
  221.4.246.1-221.4.246.255             oldl;
  221.5.8.1-221.5.8.255                 oldl;
  222.141.53.1-222.141.53.255           oldl;
  222.186.19.1-222.186.19.255           oldl;
  222.73.133.1-222.73.133.255           oldl;
  222.73.49.1-222.73.49.255             oldl;
  58.222.25.1-58.222.25.255             oldl;
  58.251.57.1-58.251.60.255             oldl;
  58.251.61.1-58.251.61.255             oldl;
  58.252.209.1-58.252.209.255           oldl;
  58.254.134.1-58.254.134.255           oldl;
  58.255.249.1-58.255.249.255           oldl;
  58.255.250.1-58.255.253.255           oldl;
  58.61.152.1-58.61.152.255             oldl;
  58.61.39.1-58.61.39.255               oldl;
  58.67.137.1-58.67.137.255             oldl;
  60.18.146.1-60.18.146.255             oldl;
  60.18.147.1-60.18.147.255             oldl;
  60.19.64.1-60.19.64.255               oldl;
  60.21.219.1-60.21.219.255             oldl;
  60.214.64.1-60.214.64.255             oldl;
  60.217.235.1-60.217.235.255           oldl;
  60.221.254.1-60.221.254.255           oldl;
  61.137.191.1-61.137.191.255           oldl;
  61.138.177.1-61.138.177.255           oldl;
  61.139.103.1-61.139.103.255           oldl;
  61.147.76.1-61.147.76.255             oldl;
  61.147.81.1-61.147.81.255             oldl;
  61.147.94.1-61.147.94.255             oldl;
  61.152.105.1-61.152.105.255           oldl;
  61.178.227.1-61.178.227.255           oldl;
  61.183.55.1-61.183.55.255             oldl;
  61.188.190.1-61.188.190.255           oldl;
  61.235.71.1-61.235.71.255             oldl;
  61.54.12.1-61.54.12.255               oldl;
}

#filter access based on user-agent
map                                     $http_user_agent $ifbot {
  "~*Bot"                               isbot;
  "~*Spider"                            isbot;
  "~*archive"                           isbot;
  "~*search"                            isbot;
  "~*Yahoo"                             isbot;
  "~Mediapartners-Google"               isbot;
  "~*Ruby"                              isbot;
  "~*Player"                            isbot;
  "~*Go http package"                 isbot;
  "~*Lynx"                              isbot;
  "~*Sleuth"                            isbot;
  "~*Python"                            isbot;
  "~*Wget"                              isbot;
  "~*curl"                              isbot;
  "~*perl"                              isbot;
  "~*libfetch"                          isbot;
}

#filter script access
map                $uri $my_filetype {
  "~*.py$"        script;
  "~*.rb$"        script;
  "~*.fcgi$"      script;
  "~*.cgi$"       script;
  "~*.php$"       script;
  "~*.pl$"        script;
}
#for discuz, usually we'll use url rewrite to rewrite *.php to *.html. Below section is about to map the *html back to *php
map                                             $request_uri $my_uritype {
  "~/topic-(.+).html$"                         f_static;
  "~/article-([0-9]+)-([0-9]+).html$"          f_static;
  "~/forum-(w+)-([0-9]+).html$"               f_static;
  "~/thread-([0-9]+)-([0-9]+)-([0-9]+).html$"  f_static;
  "~/group-([0-9]+)-([0-9]+).html$"            f_static;
  "~/space-(username|uid)-(.+).html$"          f_static;
  "~/blog-([0-9]+)-([0-9]+).html$"             f_static;
  "~/(fid|tid)-([0-9]+).html$"                 f_static;
}

Use LUA to identify the robots which pretend to be a web browser

cookie.conf (for this script, please refer to http://ocdn.me/nginx-defense.html)

rewrite_by_lua '
  local rdmnum = ngx.var.cookie_rdmnum
  if(rdmnum == nil) then
    rdmnum = math.random(999999)
  end
  local rdmid = ngx.md5("FreeBSD" .. ngx.var.remote_addr .. rdmnum)
  if (ngx.var.cookie_rdmid ~= rdmid) then
    ngx.header["Set-Cookie"] = {"rdmid=" .. rdmid, "rdmnum=" .. rdmnum}
    return ngx.redirect(ngx.var.scheme .. "://" .. ngx.var.host .. ngx.var.request_uri)
  end
';

Front.conf

access_log                 /var/log/nginx/front-access.log main;
error_log                  /var/log/nginx/front-error.log;
# add vary to fix gzip related issue
more_set_headers           'Vary: Accept-Encoding, User-Agent';
# limit download speed
limit_rate_after           8m;
limit_rate                 20k;
# limit total requests number
# 300/s should be enough for normal browse
limit_req                  zone=staiczone burst=10 nodelay;
limit_conn                 clientzone 10;
# cache purge放在最前面,保证可以正常清理
location                   ~ /purge(/.*) {
  access_log               /var/log/nginx/purge-disk.log main;
  limit_conn               clientzone 1;
  limit_req                zone=scriptzone;
  include                  cookie.conf;
  proxy_cache_purge        cache_disk $host$1$is_args$args;
}

location                   / {
  if                       ( $geo = "oldl" ) {
    access_log             /var/log/nginx/block-badip.log main;
    return                 301 $scheme://$remote_addr$request_uri; 
    break;
  }
  # sort out robort and spider
  if                       ( $ifbot = "isbot" ) {
    return                 482;
    break;
  }
  # use error_page for redirection. No need to worry about the strange http response code.
  # We have already enabled recursive_error_pages in http part
  return                   481;
  error_page               481 = @human;
  error_page               482 = @isbot;
}

# For human browse
location                   @human {
  internal;
  error_log                /var/log/nginx/human-static-error.log;
  access_log               /var/log/nginx/human-static-access.log cache;
  if                       ( $request_method !~ (GET|HEAD) ) {
    return                 483;
    break;
  }
  if                       ( $my_filetype = "script"  ) {
    return                 483;
    break;
  }
  if                       ( $my_uritype = "f_static" ) {
    return                 483;
    break;
  }
  if                       ( $request_uri ~ (/$|/?) ) {
    return                 483;
    break;
  }
  error_page               483 = @script;
  include                  proxy.conf;
  # If you only use 1 server, there is no need to use proxy_cache
  proxy_cache              cache_disk;
  # Clear the cookie for static files. There is no need to add cookie for static files
  more_clear_input_headers 'Cookie';
  proxy_pass               http://backend;
}
location                   @script {
  internal;
  error_log                /var/log/nginx/human-script-error.log;
  access_log               /var/log/nginx/human-script-access.log main;
  # limit access to php files
  limit_req                zone=scriptzone burst=3 nodelay;
  # sort out the fake web browser
  # exclude discuz flash upload
  if                       ( $request_uri !~ "~mod=swfupload&action=swfupload" ) {
    include                cookie.conf;    
  }
  include                  proxy.conf;
  proxy_pass               http://backend;
}

# below code is used for spider/robot access
location                   @isbot {
  internal;
  error_log                /var/log/nginx/bot-static-error.log;
  access_log               /var/log/nginx/bot-static-access.log cache;
  # for robot access, no post allowned
  if                       ( $request_method !~ (GET|HEAD) ) {
    access_log             /var/log/nginx/bot-block.log main;
    return                 403;
    break;
  }
  if                       ( $my_filetype = "script"  ) {
    return                 484;
    break;
  }
  if                       ( $my_uritype = "f_static" ) {
    return                 484;
    break;
  }
  if                       ( $request_uri ~ (/$|/?) ) {
    return                 484;
  }
  error_page               484 = @botscript;
  include                  proxy.conf;
  # If you only use 1 server, there is no need to use proxy_cache
  proxy_ignore_headers     Set-Cookie Expires Cache-Control X-Accel-Expires X-Accel-Redirect;
  proxy_cache              cache_disk;
  more_clear_input_headers 'Cookie';
  proxy_pass               http://backend;
}
location                   @botscript{
  internal;
  error_log                /var/log/nginx/bot-script-error.log;
  access_log               /var/log/nginx/bot-script-access.log cache;
  # limit robot access to php files
  limit_conn               clientzone 10;
  limit_req                zone=scriptzone;
  include                  proxy.conf;
  # enable cache for spider
  proxy_ignore_headers     Set-Cookie Expires Cache-Control X-Accel-Expires X-Accel-Redirect;
  proxy_cache              cache_disk;
  more_clear_input_headers 'Cookie';
  proxy_pass               http://backend;
}

upstream.conf

upstream.conf内容

upstream        backend {
  server        12.34.56.78:90;
  keepalive     128;
}

# if you only have 1 server, you can also use socket which is faster than IP connection

upstream        backend {
  server        unix:/tmp/nginx-local.sock;
}

site/yourdomain.conf

server                  {
  # use socket can save resource and it's faster than ip connecton
  listen                unix:/tmp/nginx-local.sock;
  server_name           .yourdomain.com;
  index                 index.php;
  root                  /path-to-your-site;
  gzip                  on;
  access_log            /var/log/nginx/yourdomain-access.log main;
  error_log             /var/log/nginx/yourdomain-error.log;

  location / {
    rewrite             ^([^.]*)/topic-(.+).html$ $1/portal.php?mod=topic&topic=$2 last;
    rewrite             ^([^.]*)/article-([0-9]+)-([0-9]+).html$ $1/portal.php?mod=view&aid=$2&page=$3 last;
    rewrite             ^([^.]*)/forum-(w+)-([0-9]+).html$ $1/forum.php?mod=forumdisplay&fid=$2&page=$3 last;
    rewrite             ^([^.]*)/thread-([0-9]+)-([0-9]+)-([0-9]+).html$ $1/forum.php?mod=viewthread&tid=$2&extra=page%3D$4&page=$3 last;
    rewrite             ^([^.]*)/group-([0-9]+)-([0-9]+).html$ $1/forum.php?mod=group&fid=$2&page=$3 last;
    rewrite             ^([^.]*)/space-(username|uid)-(.+).html$ $1/home.php?mod=space&$2=$3 last;
    rewrite             ^([^.]*)/blog-([0-9]+)-([0-9]+).html$ $1/home.php?mod=space&uid=$2&do=blog&id=$3 last;
    rewrite             ^([^.]*)/(fid|tid)-([0-9]+).html$ $1/index.php?action=$2&value=$3 last;
    rewrite             ^([^.]*)/([a-z]+[a-z0-9_]*)-([a-z0-9_-]+).html$ $1/plugin.php?id=$2:$3 last;
  }
  # make sure that no php can execute in to data folder
  location              ~ /(data|config|template)/.*.php$ {
    return              403;
    break;
  }

  location              ~ .*.php$ {
     include             fastcgi_params;
    fastcgi_param       SCRIPT_FILENAME $document_root$fastcgi_script_name;
    fastcgi_pass        unix:/tmp/php-fpm-yourdomain.sock;
    fastcgi_index       index.php;
    expires             -1;
  }
}

logorotate

#!/bin/sh
today=$(date +%a)
logdir='/var/log/nginx'

if [ ! "${today}" ] || [ ! "${logdir}" ]; then
  exit
else
  rm -rf ${logdir}.${today}
  mv ${logdir} ${logdir}.${today}
  mkdir ${logdir}
  service nginx reload
fi

exit

With this configuration, you can set different access rules for human access and bot access, you can also seperate static file access and script(php) access. It can spped up your website and improve the workload of your server.