#!/usr/bin/env bash
# This script dynamically manages allocated hugepages size depending on running libvirt VMs.
# Based on Thomas Lindroth's shell script which sets up host for VM: http://sprunge.us/JUfS
# put this script to /etc/libvirt/hooks/qemu

TOTAL_CORES='0-15'
TOTAL_CORES_MASK=FFFF            # 0-7
HOST_CORES='0-1,8-9'            # Cores reserved for host
HOST_CORES_MASK=303
VIRT_CORES='2,10,3,11,4,12,5,13,6,14,7,15'           # Cores reserved for virtual machine(s)
#VIRT_CORES='2-3,6-7'           # Cores reserved for virtual machine(s)

HUGEPAGES_SIZE=$(grep Hugepagesize /proc/meminfo | awk {'print $2'})
HUGEPAGES_SIZE=$(($HUGEPAGES_SIZE * 1024))
HUGEPAGES_ALLOCATED=$(sysctl vm.nr_hugepages | awk {'print $3'})

VM_NAME=$1
VM_ACTION=$2/$3

shield_vm() {
 systemctl set-property --runtime -- user.slice AllowedCPUs=$HOST_CORES
 systemctl set-property --runtime -- system.slice AllowedCPUs=$HOST_CORES
 systemctl set-property --runtime -- init.scope AllowedCPUs=$HOST_CORES
#    cset set -c $TOTAL_CORES -s machine.slice
#    # Shield two cores cores for host and rest for VM(s)
#    cset shield --kthread on --cpu $VIRT_CORES
}

unshield_vm() {
    echo $TOTAL_CORES_MASK > /sys/bus/workqueue/devices/writeback/cpumask
#    cset shield --reset
    #systemctl set-property --runtime -- user.slice AllowedCPUs=$TOTAL_CORES
    #systemctl set-property --runtime -- system.slice AllowedCPUs=$TOTAL_CORES
    #systemctl set-property --runtime -- init.scope AllowedCPUs=$TOTAL_CORES
    systemctl set-property --runtime -- user.slice AllowedCPUs=
    systemctl set-property --runtime -- system.slice AllowedCPUs=
    systemctl set-property --runtime -- init.scope AllowedCPUs=
}

# For manual invocation
if [[ $VM_NAME == 'shield' ]];
then
    shield_vm
    exit 0
elif [[ $VM_NAME == 'unshield' ]];
then
    unshield_vm
    exit 0
fi

cd $(dirname "$0")
VM_HUGEPAGES_NEED=$(( $(./vm-mem-requirements $VM_NAME) / $HUGEPAGES_SIZE ))

if [[ $VM_ACTION == 'prepare/begin' ]];
then
    sync
    echo 3 > /proc/sys/vm/drop_caches
    echo 1 > /proc/sys/vm/compact_memory

    VM_HUGEPAGES_TOTAL=$(($HUGEPAGES_ALLOCATED + $VM_HUGEPAGES_NEED))
    sysctl vm.nr_hugepages=$VM_HUGEPAGES_TOTAL

    if [[ $HUGEPAGES_ALLOCATED == '0' ]];
    then
	echo "libvirt-qemu cset: Reserving CPUs $VIRT_CORES for VM $VM_NAME" > /dev/kmsg 2>&1
        shield_vm # > /dev/kmsg 2>&1
        # Reduce VM jitter: https://www.kernel.org/doc/Documentation/kernel-per-CPU-kthreads.txt
        sysctl vm.stat_interval=120

        sysctl -w kernel.watchdog=0
        # the kernel's dirty page writeback mechanism uses kthread workers. They introduce
        # massive arbitrary latencies when doing disk writes on the host and aren't
        # migrated by cset. Restrict the workqueue to use only cpu 0.
        echo $HOST_CORES_MASK > /sys/bus/workqueue/devices/writeback/cpumask
        # THP can allegedly result in jitter. Better keep it off.
        echo never > /sys/kernel/mm/transparent_hugepage/enabled
        # Force P-states to P0
        echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
        echo 0 > /sys/bus/workqueue/devices/writeback/numa
        echo "libvirt-qemu cset: Successfully reserved CPUs $VIRT_CORES" > /dev/kmsg 2>&1
    fi
fi

if [[ $VM_ACTION == 'release/end' ]];
then
    VM_HUGEPAGES_TOTAL=$(($HUGEPAGES_ALLOCATED - $VM_HUGEPAGES_NEED))
    VM_HUGEPAGES_TOTAL=$(($VM_HUGEPAGES_TOTAL<0?0:$VM_HUGEPAGES_TOTAL))
    sysctl vm.nr_hugepages=$VM_HUGEPAGES_TOTAL

    if [[ $VM_HUGEPAGES_TOTAL == '0' ]];
    then
        # All VMs offline
        sysctl vm.stat_interval=1
        sysctl -w kernel.watchdog=1
	echo "libvirt-qemu cset: Releasing CPUs $VIRT_CORES from VM $VM_NAME" > /dev/kmsg 2>&1
        unshield_vm # > /dev/kmsg 2>&1
        echo always > /sys/kernel/mm/transparent_hugepage/enabled
        echo powersave | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
	echo $TOTAL_CORES_MASK > /sys/bus/workqueue/devices/writeback/cpumask
        echo 1 > /sys/bus/workqueue/devices/writeback/numa
	echo "libvirt-qemu cset: Successfully released CPUs $VIRT_CORES" > /dev/kmsg 2>&1
    fi
fi