#!/bin/bash -x ## HTBWondershaper script to do traffic shaping and policying with HTB packet scheduler. Version 1.2.1 #By Hans-Cees at hanscees<-nospammlumberjack->@hanscees.com. I already apologise for typos in documentation here. ##script can be called as: # HTBWondershaper :implements the defined TC classes and filters # HTBWondershaper status :shows counters # HTBWondershaper stop :flushes TC classes and iptables -t mangle FORWARD and OUTPUT chains #before using it you should adjust variables DOWNLINK, CEIL EXTDEV AND LANDEV #you can just make this script executable (chmod +x HTBWondershaper) and run it. It will make latency smaller #and gives traffic with tos-minimum delay priority, as well as small packets (syns, acks and the like) #This script implements traffic shaping: it filters outgoing traffic into classes and then sets limits/prioritises # these classes. Each class get a lower and upper bandtwith limit. Each class also has a priority, and if the prio # is lower, the traffic goes first. #Shaping: The process of delaying packets before they go out to make traffic confirm to a #configured maximum rate. Shaping is performed on egress (outgoing traffic). Colloquially, dropping packets #to slow traffic down is also often called Shaping. #Policing: Delaying or dropping packets in order to make traffic stay below a configured bandwidth. #In Linux, policing can only drop a packet and not delay it - there is no 'ingress queue'. # Strength of the tactics of this script is: # - it uses HTB filter: this guarantees minimum bandtwith per class and devides surplus bandtwith evenly # - it uses "iptables marks" to filter traffic into classes # This means you can use the rather simple iptables language to put specific traffic into TC classes # rather than the complex non-documented TC stuff. ######################################### ###example (given classes setup below): #for instance: pin network 192.168.2.0 behind this server down to 50 Kb/s UPload: #set root class on external device # tc qdisc add dev $EXTDEV root handle 1: htb default 13 # tc class add dev $EXTDEV parent 1: classid 1:1 htb rate 10000kbit ceil 10000kbit #make class with 50kb ceiling: # tc class add dev $EXTDEV parent 1:1 classid 1:14 htb rate 10kbit ceil 50kbit prio 5 # tc qdisc add dev $EXTDEV parent 1:14 handle 140: sfq perturb 10 #filter traffic mark 5 into it (set handle to 5): # tc filter add dev $EXTDEV parent 1:0 protocol ip prio 5 handle 5 fw classid 1:14 #give specific traffic mark 5 # iptables -t mangle -I FORWARD -s 192.168.2.0/24 -j RETURN # iptables -t mangle -I FORWARD -s 192.168.2.0/24 -o $EXTDEV -j MARK --set-mark 0x5 #For download speed, for instance: pin network 192.168.2.0 behind this server down to 50 Kb/s DOWNload (=~ 6kB) #(so use internal interface for classes): # tc qdisc add dev $LANDEV root handle 1: htb default 13 # tc class add dev $LANDEV parent 1: classid 1:1 htb rate 10000kbit ceil 10000kbit #make class with 50kb ceiling: # tc class add dev $LANDEV parent 1:1 classid 1:14 htb rate 10kbit ceil 50kbit prio 5 # tc qdisc add dev $LANDEV parent 1:14 handle 140: sfq perturb 10 #filter traffic mark 9 into it (set handle to 9): # tc filter add dev $LANDEV parent 1:0 protocol ip prio 5 handle 9 fw classid 1:14 #give specific traffic mark 5 # iptables -t mangle -I FORWARD -d 192.168.2.0/24 -j RETURN # iptables -t mangle -I FORWARD -d 192.168.2.0/24 -o $LANDEV -j MARK --set-mark 0x9 # watch it: #tc -s qdisc show dev $LANDEV #tc -s class show dev $LANDEV #tc -s filter show dev $LANDEV #delete it again: #tc qdisc del dev $LANDEV root 2> /dev/null > /dev/null ########## end Example #################################################### ######### target for this script (which you will probably want to alter) #this script is written for an adsl line where a local web-server generates too much #traffic. Therefore the rules used are aimed to prioritise lan http/other traffic from #behind the server above the webservers upload to internet. #if you have a different scenario please adjust the iptables rules. # internet <------> server with busy webserver <--------> lan # <------------------priority 1 ------------------ # <--------------- priority 5 # webserver (src tcp 80) #### Further reading ################################ #This script is based on this howto: Better bandwidth #http://lartc.org/howto/lartc.cookbook.fullnat.intro.html #part of linux howto bandwidth: #http://lartc.org/howto/index.html #see also "man tc" see http://lartc.org/manpages/ #a much more deep-going script is here: http://digriz.org.uk/jdg-qos-script/ #Explanation of htb #http://lartc.org/howto/lartc.qdisc.classful.html#AEN1072 #htb home #http://luxik.cdi.cz/~devik/qos/htb/ #a good other similar script http://www.linuxinfor.com/english/ADSL-Bandwidth-Management/implementation.html # and another here: http://www.freenet.org.nz/python/pyshaper/pyshaper.conf.html #### END Further reading ########################### #Size of downloads. Used for igress filter all below (2mb). DOWNLINK=1950 #Adjust CEIL to 75% of your upstream bandwith limit by now(1mb line). #tinkering with ceil and ceil-related variables in tables is crucial for success. #with 80% of upload, my line becomes clogged. With 70% all goes well. CEIL=950 ## On with the script: ##set devices, extdev is your internet/outside device. Lan the inside one. EXTDEV=eth1 LANDEV=eth0 ## if [ "$1" = "status" ] #see http://www.docum.org/docum.org/faq/cache/33.html then echo "[qdisc]" tc -s qdisc show dev $EXTDEV echo "[class]" tc -s class show dev $EXTDEV echo "[filter]" tc -s filter show dev $EXTDEV echo "[iptables]" iptables -t mangle -L FORWARD -v 2> /dev/null iptables -t mangle -L OUTPUT -v 2> /dev/null exit fi ##before either stopping or running the script, flush things first # Reset everything to a known state (cleared) tc qdisc del dev $EXTDEV root 2> /dev/null > /dev/null tc qdisc del dev $EXTDEV ingress 2> /dev/null > /dev/null /sbin/iptables -F -t mangle if [ "$1" = "stop" ] then echo "Shaping removed on $EXTDEV." exit fi # Make TC CLASSES #First we set up some qdiscs in which we will classify the traffic. #We create a htb qdisc with 64classes with ascending priority. Then we have classes that will #always get allocated rate, but can use the unused bandwidth that other classes don't need. #Recall that classes with higher priority ( i.e with a lower prio number ) will get excess #of bandwith allocated first. Our connection is 2Mb down 1000kbits/s up Adsl. #I use 950kbit/s as ceil rate just because it's the higher I can set it before #latency starts to grow, due to buffer filling in whatever place between us and remote #hosts. This parameter should be timed experimentally, raising and lowering it while #observing latency between some near hosts. #if you get this error: "kernel: HTB: quantum of class 10012 is small" see: #http://www.docum.org/docum.org/faq/cache/31.html #When a packet with size > quantum is sent, it will be sent and an error that the quantum is too small will be logged. But there is no pay back. The WRR scheduler is faster then the DRR scheduler. So make sure quantum is bigger then the default packet size. For 15 kbyte/s and default r2q, quantum is 1500 and this is exactly the maximum packet size. If you want to tune htb for rates smaller then 15 kbyte/s, you can manually set the r2q and/or quantum. tc qdisc add dev $EXTDEV root handle 1: htb default 13 tc class add dev $EXTDEV parent 1: classid 1:1 htb rate ${CEIL}kbit ceil ${CEIL}kbit tc class add dev $EXTDEV parent 1:1 classid 1:10 htb rate 150kbit ceil 300kbit prio 0 tc class add dev $EXTDEV parent 1:1 classid 1:11 htb rate 200kbit ceil ${CEIL}kbit prio 1 tc class add dev $EXTDEV parent 1:1 classid 1:12 htb rate 50kbit ceil ${CEIL}kbit prio 2 #$[8*$CEIL/10] this class is for upload webserver. We set the ceil not at 100% but 80% tc class add dev $EXTDEV parent 1:1 classid 1:13 htb rate 100kbit ceil $[8*$CEIL/10]kbit prio 3 tc qdisc add dev $EXTDEV parent 1:10 handle 100: sfq perturb 10 tc qdisc add dev $EXTDEV parent 1:11 handle 110: sfq perturb 10 tc qdisc add dev $EXTDEV parent 1:12 handle 120: sfq perturb 10 tc qdisc add dev $EXTDEV parent 1:13 handle 130: sfq perturb 10 #add another class: #tc class add dev $EXTDEV parent 1:1 classid 1:14 htb rate 100kbit ceil ${CEIL}kbit prio 4 #tc qdisc add dev $EXTDEV parent 1:14 handle 140: sfq perturb 10 ########what traffic goes where? ## ## FORWARD to internet | OUTPUT to internet ##level 1 acks/small packets all icmp | dns icmp time small-packets ##level 2 rest traffic uploads &gets | gets etc downloads for server (dport 21|80) ##level 3 smtp upload | all but http output (smtp) ##level 4 | -src http (output webserver) #http://lartc.org/howto/lartc.qdisc.classless.html#LARTC.SFQ #perturb Reconfigure hashing once this many seconds. If unset, hash will never be reconfigured. #Not recommended. 10 seconds is probably a good value #a child sfq makes sure that within a full class the queue is fairly dispersed among #different traffic connections. # We have just created a htb tree with one level depth. Something like this: #+---------+ #| root 1: | #+---------+ # | #+---------------------------------------+ #| class 1:1 | #+---------------------------------------+ # | | | | | | #+----+ +----+ +----+ +----+ +----+ +----+ #|1:10| |1:11| |1:12| |1:13| |1:14| |1:15| #+----+ +----+ +----+ +----+ +----+ +----+ ## Make a filter to direct traffic into classes #Classifying packets #We have created the qdisc setup but no packet classification has been made, so now all outgoing packets # are going out in class 1:13 ( because we used: tc qdisc add dev $LANDEV root handle 1: htb default 13 ). # Now we need to tell which packets go where. This is the most important part. # Now we set the filters so we can classify the packets with iptables. I really prefer to do it with iptables, # because they are very flexible and you have packet count for each rule. Also with the RETURN target packets # don't need to traverse all rules. We execute the following commands: # tc filter add dev $EXTDEV parent 1:0 protocol ip prio 1 handle 1 fw classid 1:10 tc filter add dev $EXTDEV parent 1:0 protocol ip prio 2 handle 2 fw classid 1:11 tc filter add dev $EXTDEV parent 1:0 protocol ip prio 3 handle 3 fw classid 1:12 tc filter add dev $EXTDEV parent 1:0 protocol ip prio 4 handle 4 fw classid 1:13 #youcould add filter 14: tc filter add dev $EXTDEV parent 1:0 protocol ip prio 4 handle 5 fw classid 1:14 #We have just told the kernel that packets that have a specific FWMARK value ( handle x fw ) go in the # specified class ( classid x:x). # IPTABLES PART #Next you will see how to mark packets with iptables. #First you have to understand how packet traverse the filters with iptables: # +------------+ +---------+ +-------------+ #Packet -| PREROUTING |--- routing-----| FORWARD |-------+-------| POSTROUTING |- Packets #input +------------+ decision +-¡-------+ | +-------------+ out # | | # +-------+ +--------+ # | INPUT |---- Local process -| OUTPUT | # +-------+ +--------+ #this one is even better: http://l7-filter.sourceforge.net/PacketFlow.png ###iptables is already setup. so is natting #Now check that packets are flowing through 1:15: #tc -s class show dev $EXTDEV #You can start marking packets adding rules to the PREROUTING chain in the mangle table ###this is about traffic coming from the internal workstations to internet ##remember we are talking about limiting upload here, so traffic from your server to internet. #if you download http for instance the download form internet to you is not the problem. #what is competing for bandwith is: # - internet hosts downloading/visiting your website # the bulk of this traffic has src-port 80, and dst port some high port # output from your webserver (output chain) # - your traffic to internet. # the bulk of this are syns with dst port 80 (you browsing) (prerouting) # and udp with destination port 53 (you doing dns) (prerouting) # So if you have a busy website, you better make sure src port 80 going out does not get too much prio and traffic. # and that dst port 53 and perhaps 80 gets high prio. and icmp proto. syns out, and small packets out. ######test and use this #iptables -t mangle -A POSTROUTING -p icmp -j MARK --set-mark 0x1 ####iptables -t mangle -A POSTROUTING -p icmp -j RETURN ##### Now you should be able to see packet count increasing when pinging from machines within the private # network to some site on the Internet. Check packet count increasing in 1:10 #tc -s class show dev $EXTDEV ############# ##The original at #http://lartc.org/howto/lartc.cookbook.fullnat.intro.html #uses prerouting for forwarded traffic and output for traffic out of the server. #however, there are new chains in the mangle table (see man iptables): forward and input and postrouting. #If you want to be efficient you use output only, for traffic from the server, and forward for forwarded # trafic. If we want to do all outgoing traffic we can use postrouting. To keep things clear we # won't use postrouting however. # remember we are doing egress filtering and are shaping outgoing traffic ########what traffic goes where? ## ## FORWARD to internet | OUTPUT to internet ##level 1 acks/small packets all icmp | dns icmp time small-packets ##level 2 urest traffic uploads &gets | gets etc downloads for server (dport 21|80) ##level 3 smtp upload | all but http output (smtp) ##level 4 | -src http (output webserver) ############FORWARDING: traffic from the internal network and server #We have done a -j RETURN so packets don't traverse all rules. Icmp packets won't match other rules below RETURN. Keep that in mind. #Now we can start adding more rules, lets do proper TOS handling: iptables -t mangle -A FORWARD -m tos --tos Minimize-Delay -o $EXTDEV -j MARK --set-mark 0x1 iptables -t mangle -A FORWARD -m tos --tos Minimize-Delay -j RETURN iptables -t mangle -A FORWARD -m tos --tos Minimize-Cost -o $EXTDEV -j MARK --set-mark 0x3 iptables -t mangle -A FORWARD -m tos --tos Minimize-Cost -j RETURN iptables -t mangle -A FORWARD -m tos --tos Maximize-Throughput -o $EXTDEV -j MARK --set-mark 0x4 iptables -t mangle -A FORWARD -m tos --tos Maximize-Throughput -j RETURN ##level1 #insert a rule on line three to make sure output lanside does not have to go through all chains iptables -t mangle -I FORWARD -o $LANDEV -j RETURN ##level1 ## Catchs all those small TCP SYN, SYN-ack etc packets going out like this: iptables -t mangle -I FORWARD -p tcp -m length --length :64 -j RETURN iptables -t mangle -I FORWARD -p tcp -m length --length :64 -o $EXTDEV -j MARK --set-mark 0x1 # length This module matches the length of a packet against a specific value or range of values. # --length length[:length]. syns/syn-ack are 62 bytes, fin-ack/rst-ack/rst are 60 #Inserts! so they go on top! ##icmp traffic iptables -t mangle -A FORWARD -p icmp -o $EXTDEV -j MARK --set-mark 0x1 iptables -t mangle -A FORWARD -p icmp -j RETURN #level 3: eas smtp outgoing a bit if it is used. iptables -t mangle -A FORWARD -p tcp --dport 25 -o $EXTDEV -j MARK --set-mark 0x3 iptables -t mangle -A FORWARD -p tcp --dport 25 -j RETURN # Rest upload such as gets and ssh and so on # we terminate the tables with: iptables -t mangle -A FORWARD -j MARK --set-mark 0x2 iptables -t mangle -A FORWARD -j RETURN ######OUTPUT server #I finish OUTPUT chain with -j MARK --set-mark 0x6 so forward traffic has higher priority. iptables -t mangle -A OUTPUT -m tos --tos Minimize-Delay -o $EXTDEV -j MARK --set-mark 0x1 iptables -t mangle -A OUTPUT -m tos --tos Minimize-Delay -j RETURN iptables -t mangle -A OUTPUT -m tos --tos Minimize-Cost -o $EXTDEV -j MARK --set-mark 0x3 iptables -t mangle -A OUTPUT -m tos --tos Minimize-Cost -j RETURN iptables -t mangle -A OUTPUT -m tos --tos Maximize-Throughput -o $EXTDEV -j MARK --set-mark 0x4 iptables -t mangle -A OUTPUT -m tos --tos Maximize-Throughput -j RETURN ##level1 ##icmp so webserver can slow down clients with flowcontrol iptables -t mangle -A OUTPUT -p icmp -j MARK --set-mark 0x1 #other protocols: or ports? dns certainly, and time iptables -t mangle -A OUTPUT -p udp --dport 53 -o $EXTDEV -j MARK --set-mark 0x1 iptables -t mangle -A OUTPUT -p udp --dport 53 -j RETURN iptables -t mangle -A OUTPUT -p tcp --dport 53 -o $EXTDEV -j MARK --set-mark 0x1 iptables -t mangle -A OUTPUT -p tcp --dport 53 -j RETURN iptables -t mangle -A OUTPUT -p udp --dport 123 -o $EXTDEV -j MARK --set-mark 0x1 iptables -t mangle -A OUTPUT -p udp --dport 123 -j RETURN #insert a rule on line three to make sure output lanside does not have to go through all chains iptables -t mangle -I OUTPUT -o $LANDEV -j RETURN #A good idea is to prioritize small packets to begin/control tcp connections, SYN,syn-ack,reset flag set: #iptables -t mangle -I OUTPUT -p tcp -m length --length :64 -j RETURN #iptables -t mangle -I OUTPUT -p tcp -m length --length :64 -o $EXTDEV -j MARK --set-mark 0x1 #Or also include dns lookups via the server: they are mostly upto 85 bytes udp iptables -t mangle -I OUTPUT -m length --length :85 -j RETURN iptables -t mangle -I OUTPUT -m length --length :85 -o $EXTDEV -j MARK --set-mark 0x1 #now make sure all -spt 80 is NOT prioritized: this should be before small packets! #webserver output to internet eats this line up: level 4 iptables -t mangle -I OUTPUT -p tcp --sport 80 -j RETURN iptables -t mangle -I OUTPUT -p tcp --sport 80 -o $EXTDEV -j MARK --set-mark 0x4 ##level2 ##downloads by/for the server such as anti-virus and updates: port 80 and 21 # Necessary for gets etc bigger than very small iptables -t mangle -A OUTPUT -p tcp --dport 21 -o $EXTDEV -j MARK --set-mark 0x2 iptables -t mangle -A OUTPUT -p tcp --dport 21 -j RETURN iptables -t mangle -A OUTPUT -p tcp --dport 80 -o $EXTDEV -j MARK --set-mark 0x2 iptables -t mangle -A OUTPUT -p tcp --dport 80 -j RETURN #I finish OUTPUT chain with -j MARK --set-mark 0x3 so forward traffic has higher priority iptables -t mangle -A OUTPUT -o $EXTDEV -j MARK --set-mark 0x3 iptables -t mangle -A OUTPUT -j RETURN ########## downlink ############# #from http://lartc.org/howto/lartc.qdisc.terminology.html # Userspace programs # ^ # | # +---------------+-----------------------------------------+ # | Y | # | -------> IP Stack | # | | | | # | | Y | # | | Y | # | ^ | | # | | / ----------> Forwarding -> | # | ^ / | | # | |/ Y | # | | | | # | ^ Y /-qdisc1-\ | # | | Egress /--qdisc2--\ | # --->->Ingress Classifier ---qdisc3---- | -> # | Qdisc \__qdisc4__/ | # | \-qdiscN_/ | # | | # +----------------------------------------------------------+ #Thanks to Jamal Hadi Salim for this ASCII representation. # #The big block represents the kernel. The leftmost arrow represents traffic entering your machine from the #network. It is then fed to the Ingress Qdisc which may apply Filters to a packet, and decide to drop it. #This is called 'Policing'. #This happens at a very early stage, before it has seen a lot of the kernel. It is therefore a very good #place to drop traffic very early, without consuming a lot of CPU power. ########## downlink ############# (from wondershaper) # slow downloads down to somewhat less than the real speed to prevent # queuing at our ISP. Tune to see how high you can set it. # ISPs tend to have *huge* queues to make sure big downloads are fast # # attach ingress policer: tc qdisc add dev $EXTDEV handle ffff: ingress # filter *everything* to it (0.0.0.0/0), drop everything that's # coming in too fast: tc filter add dev $EXTDEV parent ffff: protocol ip prio 50 u32 match ip src \ 0.0.0.0/0 police rate ${DOWNLINK}kbit burst 10k drop flowid :1 logger "htpwondershaper finished"