#!/usr/bin/env bash
# shellcheck disable=SC2154
set -o pipefail

readonly DEBUG=0

# Echo wrappers
DEBUG(){ :; }
(( DEBUG == 1 )) && DEBUG() { echo "DEBUG: $*";}

INFO(){ echo "INFO: $*";}
WARN(){ echo "WARN: $*";}
ERRO(){ echo "ERRO: $*"; exit 1;}
INFO_E(){ echo "INFO: $*"; exit 0;}

# Helper functions
YN(){
  l="${1,,}"
  [[ ${l} =~ (yes|y|1|true) ]] && return 0
  return 1
}

write(){
  [[ -z $1 || -z $2 ]] && return
  local -r data=$1 file=$2
  echo "${data}" > "${file}"
}

# Files with one byte data will use one page of RAM in tmpfs,
# i.e. 4KiB on x86_64 - to avoid that, store short strings
# in symbolic links
write_l(){
  [[ -z $1  ||  -z $2 ]] && return
  local data=$1 file=$2
  data=$(echo "${data}" | base64)
  ln -sfr "/${data}" "${file}"
}

read_l(){
  [[ -z $1 ]] && return
  local -r file=$1 data=$(basename "$(realpath "${file}")")
  echo "${data}" | base64 -d
}

get_mem_stat_multi(){
  # expects name of output array as $1, followed by n wanted keys
  local -ar fields=("${@:2}")
  local -n outref=$1
  local -i matches=0
  while read -r line; do
    for field in "${fields[@]}"; do
      if [[ $line =~ ^$field:[[:space:]]+([[:digit:]]+)[[:space:]]+kB$ ]]; then
        outref+=( $(( BASH_REMATCH[1] * 1024 )) )
        ((++matches))
      fi
    done
    ((matches == ${#fields[@]})) && break;
  done < /proc/meminfo
}

get_mem_stat(){
  local -a value
  get_mem_stat_multi value "$1"
  printf "%s" "${value[0]}"
}

get_fs_type(){
  if [[ -d $1 ]]; then path=$1;
  elif [[ -d "${1%/*}" ]]; then path=${1%/*};
  else ERRO "swapfc_path is invalid"
  fi

  fstype=$(df "$path" --output=fstype | tail -n 1)

  if [[ "$fstype" == "-" ]]; then
    if btrfs subvolume show "$path" &>/dev/null; then
      fstype=btrfs
    else
      ERRO "swapfc_path is located on an unknown filesystem"
    fi
  fi

  echo $fstype
}
AMI_ROOT(){ [ "${UID}" == "0" ] || ERRO "Script must be run as root!"; }
FIND_SWAP_UNITS(){ find /run/systemd/{system/,generator/} -type f -name "*.swap"; }
GET_WHAT_FROM_SWAP_UNIT(){ grep -oP 'What=\K.*' "$1"; }

help(){
  echo "$0 start|stop|status"
  echo "   start  - init daemon"
  echo "   stop   - stop daemon"
  echo "   status - show some swap status info"
}

snore(){
  local IFS
  [[ -n ${_snore_fd:-} ]] || exec {_snore_fd}<> <(:)
  read -r ${1:+-t "$1"} -u $_snore_fd || :
}

# Global variables
# NCPU is being referenced inside of swap-default.conf
# shellcheck disable=SC2034
readonly NCPU=$(nproc)
readonly RUN_SYSD="/run/systemd"
readonly ETC_SYSD="/etc/systemd"
readonly VEN_SYSD="/usr/lib/systemd"
readonly DEF_CONFIG="/usr/share/systemd-swap/swap-default.conf"
readonly ETC_CONFIG="${ETC_SYSD}/swap.conf"
readonly RAM_SIZE=$(get_mem_stat MemTotal)
readonly PAGE_SIZE=$(getconf PAGESIZE)
readonly WORK_DIR="/run/systemd/swap"
readonly LOCK_STARTED="${WORK_DIR}/.started"
readonly ZSWAP_M="/sys/module/zswap"
readonly ZSWAP_M_P="/sys/module/zswap/parameters"

# shellcheck disable=SC2034
IFS=. read -r KMAJOR KMINOR _ <<< "$(uname -r)"

# Swap unit generator
gen_swap_unit(){
  export What Priority Options Tag Type
  for i in "$@"; do
    case $i in
      What=*)
        What="${i//What=/}"
      ;;
      Priority=*)
        Priority="${i//Priority=/}"
      ;;
      Options=*)
        Options="${i//Options=/}"
      ;;
      Tag=*)
        Tag="${i//Tag=}"
      ;;
    esac
  done
  [[ -n ${What} ]] || return 1

  What="$(realpath "${What}")"
  # assume it's a file by default
  Type="File"
  if [[ -b ${What} ]]; then
    Type="Block/Partition"
    [[ ${What} =~ loop ]] && Type="File"
  fi

  UNIT_NAME="${What/\/}"
  UNIT_NAME="${UNIT_NAME//-/\\x2d}"
  UNIT_NAME="${UNIT_NAME//\//-}"
  UNIT_PATH="${RUN_SYSD}/system/${UNIT_NAME}.swap"
  {
    echo '[Unit]'
    echo "Description=Swap ${Type}"
    echo 'Documentation=https://github.com/Nefelim4ag/systemd-swap'
    echo
    echo "# Generated by systemd-swap"
    echo "# Tag=${Tag}"
    echo
    echo '[Swap]'
    echo "What=${What}"
    echo "TimeoutSec=1h"
    [[ -z ${Priority} ]] || echo "Priority=${Priority}"
    [[ -z ${Options} ]] || echo "Options=${Options}"
  } > "${UNIT_PATH}"

  ln -srf "${UNIT_PATH}" "${RUN_SYSD}/system/swap.target.wants"

  [[ ${Type} == "File" ]] && ln -srf "${UNIT_PATH}" "${RUN_SYSD}/system/local-fs.target.wants"

  echo "${UNIT_NAME}.swap"
}

# Init
AMI_ROOT

mkdir -p \
  "${WORK_DIR}" \
  "${RUN_SYSD}/system/local-fs.target.wants" \
  "${RUN_SYSD}/system/swap.target.wants"

(( $# == 0 )) && set -- status
case $1 in
  start)
    [[ -f "${LOCK_STARTED}" ]] && ERRO "$0 already started"
    touch "${LOCK_STARTED}"

    # load default values
    # shellcheck source=swap-default.conf
    . "${DEF_CONFIG}" || ERRO "Error loading ${DEF_CONFIG}"

    load_config_fragments(){
      # config precedence follows systemd scheme:
      # etc > run > lib for all fragments > /etc/systemd/swap.conf
      if [[ -f $ETC_CONFIG ]]; then
        INFO "Load: ${ETC_CONFIG}"
        # shellcheck disable=SC1090
        . "${ETC_CONFIG}" || WARN "Could not load ${ETC_CONFIG}"
      fi
      local -A a_conf
      for conf in {"${VEN_SYSD}","${RUN_SYSD}","${ETC_SYSD}"}/swap.conf.d/*.conf; do
        if [[ ! -r ${conf} || -d ${conf} ]]; then
          [[ -f ${conf} ]] && WARN "Permission denied reading: ${conf}"
          continue
        fi
        a_conf[${conf##*/}]="${conf}"
        DEBUG "found ${conf}"
      done
      DEBUG "Selected configuration artifacts: ${a_conf[*]}"
      # sort lexicographically
      readarray -t skeys < <(printf '%s\n' "${!a_conf[@]}" | sort)
      if [[ -n ${skeys:-} ]]; then
        # and finally load it
        for fragment in "${skeys[@]}"; do
          INFO "Load: ${a_conf[$fragment]}"
          # shellcheck disable=SC1090
          . "${a_conf[$fragment]}" || ERRO "Error loading ${a_conf[$fragment]}"
        done
      fi
    }
    load_config_fragments

    systemd-notify "READY=1"

    if YN "${zram_enabled}" && { YN "${zswap_enabled}" || YN "${swapfc_enabled}" || YN "${swapd_auto_swapon}"; }; then
      WARN "Combining zram with zswap/swapfc/swapd_auto_swapon can lead to LRU inversion and is strongly recommended against"
    fi

    if YN "${zswap_enabled}"; then
      systemd-notify "STATUS=Setting up Zswap..."
      [[ ! -d ${ZSWAP_M} ]] && ERRO "Zswap - not supported on current kernel"
      INFO "Zswap: backup current configuration: start"
      mkdir -p "${WORK_DIR}/zswap"
      for file in "${ZSWAP_M_P}"/*; do
        read -r VAL < "${file}"
        write_l "${VAL}" "${WORK_DIR}/zswap/${file##*/}"
      done
      INFO "Zswap: backup current configuration: complete"
      INFO "Zswap: set new parameters: start"
      INFO "Zswap: Enable: ${zswap_enabled}, Comp: ${zswap_compressor},  Max pool %: ${zswap_max_pool_percent}, Zpool: ${zswap_zpool}"
      write "${zswap_enabled}"          "${ZSWAP_M_P}"/enabled
      write "${zswap_compressor}"       "${ZSWAP_M_P}"/compressor
      write "${zswap_max_pool_percent}" "${ZSWAP_M_P}"/max_pool_percent
      write "${zswap_zpool}"            "${ZSWAP_M_P}"/zpool
      INFO "Zswap: set new parameters: complete"
    fi

    if YN "${zram_enabled}"; then
      systemd-notify "STATUS=Setting up Zram..."
      INFO "Zram: check availability"
      if [[ ! -d /sys/module/zram ]]; then
        INFO "Zram: not part of kernel, trying to find zram module"
        modprobe -n zram || ERRO "Zram: can't find zram module!"
        # workaround for some zram initialization problems
        for (( i = 0; i < 10; ++i )); do
          [[ -d /sys/module/zram ]] && break
          modprobe zram
          snore 1
        done
        INFO "Zram: module successfully loaded"
      else
        INFO "Zram: module already loaded" && readonly ZRAM_ALREADY_SET=true
      fi

      systemctl daemon-reload
      # spread total capacity over all devices
      zram_size=$(( zram_size / zram_count ))
      for (( i = 0; i < zram_count; ++i )); do
        INFO "Zram: trying to initialize free device"
        # zramctl is an external program -> return path to first free device
        OUTPUT=$(zramctl -f -a "${zram_alg}" -t "${zram_streams}" -s "${zram_size}" 2>&1)
        case "${OUTPUT}" in
          *"failed to reset: Device or resource busy"*)
            snore 1
          ;;
          *"zramctl: no free zram device found"*)
            WARN "Zram: zramctl can't find free device"
            INFO "Zram: using workaround hook for hot add"
            [[ ! -f /sys/class/zram-control/hot_add ]] && \
              ERRO "Zram: this kernel does not support hot adding zram devices, please use a 4.2+ kernel or see 'modinfo zram´ and create a modprobe rule"
            read -r NEW_ZRAM < /sys/class/zram-control/hot_add
            INFO "Zram: success: new device /dev/zram${NEW_ZRAM}"
          ;;
          /dev/zram*)
            [[ -b ${OUTPUT} ]] || continue
            zram_dev="${OUTPUT}"
          ;;
        esac
        if [[ -b ${zram_dev} ]]; then
          INFO "Zram: initialized: ${zram_dev}"
          mkswap "${zram_dev}" &> /dev/null && \
          UNIT_NAME=$(gen_swap_unit What="${zram_dev}" Options=discard Priority="${zram_prio}" Tag=zram)
          systemctl daemon-reload
          systemctl start "${UNIT_NAME}"
        else
          WARN "Can't get free Zram device"
        fi
      done
      systemd-notify "STATUS=Zram setup finished"
    fi

    if YN "${swapd_auto_swapon}"; then
      systemd-notify "STATUS=Activating swap units..."
      INFO "swapD: pickup devices from systemd-gpt-auto-generator"
      FIND_SWAP_UNITS | while read -r UNIT_PATH; do
        if grep -q systemd-gpt-auto-generator "${UNIT_PATH}"; then
          DEV=$(GET_WHAT_FROM_SWAP_UNIT "${UNIT_PATH}")
          UNIT_NAME=${UNIT_PATH##*/}
          swapoff "${DEV}"
          rm -vf "${UNIT_PATH}"
        fi
      done

      INFO "swapD: searching swap devices"
      mkdir -p "${WORK_DIR}/swapd/"
      for device in $(blkid -t TYPE=swap -o device | grep -vE '(zram|loop)'); do
        for used_device in $(swapon --show=NAME --noheadings); do
          [[ ${device} == "${used_device}" ]] && unset device
        done
        [[ ! -b ${device} ]] && continue
        UNIT_NAME=$(gen_swap_unit What="${device}" Options=discard Priority=$swapd_prio Tag="swapd")
        systemctl daemon-reload
        systemctl start "${UNIT_NAME}" || continue
        INFO "swapD: enabled device: ${device}"
        swapd_prio=$(( swapd_prio - 1 ))
      done
      systemd-notify "STATUS=Swap unit activation finished"
    fi

    if YN "${swapfc_enabled}"; then
      # validate swapfc_frequency due to possible issues caused if set incorrectly
      if [[ -n ${swapfc_frequency//[0-9]} || -z $swapfc_frequency ]]; then
        ERRO "swapfc_frequency is not set to a valid integer value: ${swapfc_frequency}"
      elif (( swapfc_frequency < 1 || swapfc_frequency > 86400 )); then
        WARN "swapfc_frequency must be in range of 1..86400: ${swapfc_frequency} - set to 1"
        swapfc_frequency=1
      fi

      polling_rate=$swapfc_frequency
      double_polling_rate() {
        local val=$(( polling_rate * 2 ))
        # do not double, interval is long enough
        (( val > 86400 || val > swapfc_frequency * 1000 )) && return
        polling_rate=$val
        WARN "swapFC: polling rate doubled to ${polling_rate}s"
      }
      reset_polling_rate() {
        if (( polling_rate > swapfc_frequency )); then
          polling_rate=$swapfc_frequency
          INFO "swapFC: polling rate reset to ${polling_rate}s"
        fi
      }

      get_free_swap_perc(){
        local -a swap_stats
        get_mem_stat_multi swap_stats SwapTotal SwapFree
        SWAP_USED=$(( swap_stats[0] - swap_stats[1] ))
        # minimum for total is 1 to prevent divide by zero
        echo $(( (swap_stats[1] * 100) / (swap_stats[0] + 1) ));
      }

      get_free_ram_perc(){
        local -a ram_stats
        get_mem_stat_multi ram_stats MemTotal MemFree
        echo $(( (ram_stats[1] * 100) / ram_stats[0] ));
      }

      to_bytes(){ numfmt --to=none --from=iec "$1"; }

      systemd-notify "STATUS=Monitoring memory status..."

      FSTYPE=$(get_fs_type "${swapfc_path}")
      # must exists before stat()
      mkdir -p "${swapfc_path%/*}"
      if [[ $FSTYPE == btrfs ]]; then
        btrfs subvolume create "$swapfc_path" &>/dev/null
      else
        mkdir -p "$swapfc_path" 
      fi

      CHUNK_SIZE=$(to_bytes "${swapfc_chunk_size}")
      BLOCK_SIZE=$(stat -f -c %s "${swapfc_path}")

      if [[ $FSTYPE == btrfs ]]; then
        # if btrfs supports regular swap files(kernel version 5+), force disable COW to avoid data corruption
        # if it doesn't, use the old swap through loop workaround
        (( KMAJOR >= 5 )) && swapfc_nocow=true || swapfc_force_use_loop=true
      fi

      check_ENOSPC(){
        local -r path=$1
        # check free space for avoiding problems on swap io + ENOSPC
        FREE_BLOCKS=$(stat -f -c %f "${path}" )
        FREE_BYTES=$(( FREE_BLOCKS * BLOCK_SIZE ))
        # also try leaving some free space
        FREE_BYTES=$(( FREE_BYTES - CHUNK_SIZE ))
        (( FREE_BYTES < CHUNK_SIZE )) && return 0
        return 1
      }

      prepare_swapfile(){
        local -r chunk_size=$1 file=$2
        touch "${file}" && chmod 0600 "${file}"

        [[ $FSTYPE == btrfs ]] && YN "${swapfc_nocow}" && chattr +C "${file}"
        dd if=/dev/zero of="$file" conv=fdatasync status=none bs=1M count=$(( chunk_size / 1048576))

        losetup_w(){
          local -r file="$1"
          if YN "${swapfc_directio}"; then
            losetup -f --show --direct-io=on  "${file}"
          else
            losetup -f --show --direct-io=off "${file}"
          fi
          # loop uses a file descriptor - if the file still exists,
          # but does not have a path like O_TMPFILE
          # when loop detaches a file, the file will be deleted.
          rm "${file}"
        }

        RET="${file}"
        YN ${swapfc_force_use_loop} && RET=$(losetup_w "${file}")
        echo "${RET}"
      }

      if (( swapfc_max_count > 32 || swapfc_max_count < 1 )); then
        WARN "swapfc_max_count must be in range 1..32, reset to 1"
        swapfc_max_count=1
      fi

      create_swapfile(){
        if check_ENOSPC "${swapfc_path}"; then
          WARN "swapFC: ENOSPC"
          # prevent spamming the journal
          double_polling_rate
          systemd-notify "STATUS=Not enough space for allocating chunk"
          return
        fi  

        # in case we have adjusted the polling rate, reset it
        reset_polling_rate
        systemd-notify "STATUS=Allocating swap file..."
        (( ++allocated ))
        INFO "$1" $allocated
        swapfile=$(prepare_swapfile "${CHUNK_SIZE}" "${swapfc_path}/${allocated}")
        mkswap -L "SWAP_${FSTYPE}_${allocated}" "${swapfile}" &> /dev/null
        if YN ${swapfc_force_preallocated}; then
          UNIT_NAME=$(gen_swap_unit What="${swapfile}" Priority=$swapfc_priority Tag="swapfc_${allocated}")
        else
          UNIT_NAME=$(gen_swap_unit What="${swapfile}" Priority=$swapfc_priority Options=discard Tag="swapfc_${allocated}")
        fi
        (( --swapfc_priority ))
        systemctl daemon-reload
        systemctl start "${UNIT_NAME}"
        [[ -b ${swapfile} ]] && losetup -d "${swapfile}"
        systemd-notify "STATUS=Monitoring memory status..."
      }

      destroy_swapfile(){
        systemd-notify "STATUS=Deallocating swap file..."
        INFO "swapFC: free swap: ${curr_free_swap_perc} > ${swapfc_remove_free_swap_perc} - freeup chunk: ${allocated}"
        FIND_SWAP_UNITS | while read -r UNIT_PATH; do
          if grep -q "swapfc_${allocated}" "${UNIT_PATH}"; then
            DEV=$(GET_WHAT_FROM_SWAP_UNIT "${UNIT_PATH}")
            UNIT_NAME=${UNIT_PATH##*/}
            systemctl stop "${UNIT_NAME}" || swapoff "${DEV}"
            rm -vf "${UNIT_PATH}"
            [[ -f ${DEV} ]] && rm -f "${DEV}"
          fi
        done
        (( --allocated ))
        systemd-notify "STATUS=Monitoring memory status..."
      }

      mkdir -p "${WORK_DIR}/swapfc/"
      touch "${WORK_DIR}/swapfc/.lock"

      declare -i allocated=0
      for (( i = 0; i < swapfc_min_count; ++i )); do
        create_swapfile "swapFC: allocate chunk: "
      done
      (( allocated == 0 )) && INFO "swapFC: on-demand swap activation at >$(( RAM_SIZE * (100 - swapfc_free_ram_perc) / 104857600 )) MiB memory usage"

      # MAINLOOP
      while [[ -f ${WORK_DIR}/swapfc/.lock ]] && snore $polling_rate; do
        if (( allocated == 0 )); then
          curr_free_ram_perc=$(get_free_ram_perc)
          (( curr_free_ram_perc < swapfc_free_ram_perc )) && create_swapfile \
          "swapFC: free ram: $curr_free_ram_perc < $swapfc_free_ram_perc - allocate chunk: "
          continue
        fi
        curr_free_swap_perc=$(get_free_swap_perc)
        if (( curr_free_swap_perc < swapfc_free_swap_perc )) && (( allocated < swapfc_max_count )); then
          create_swapfile "swapFC: free swap: $curr_free_swap_perc < $swapfc_free_swap_perc - allocate chunk: "
          continue
        fi
        (( allocated <= 2 )) || (( allocated <= swapfc_min_count )) && continue
        (( curr_free_swap_perc > swapfc_remove_free_swap_perc )) && destroy_swapfile
      done
    fi
  ;;
  stop)
    [[ ! -f "${LOCK_STARTED}" ]] && INFO_E "$0 already stopped"
    systemd-notify "STOPPING=1"
    readonly swap_units=$(FIND_SWAP_UNITS)
    echo "$swap_units" | while read -r UNIT_PATH; do
      if grep -q swapd "${UNIT_PATH}"; then
        DEV=$(GET_WHAT_FROM_SWAP_UNIT "${UNIT_PATH}")
        UNIT_NAME=${UNIT_PATH##*/}
        swapoff "${DEV}"
        rm -vf "${UNIT_PATH}"
      fi
    done

    [[ -f ${WORK_DIR}/swapfc/.lock ]] && rm -f "${WORK_DIR}/swapfc/.lock"

    echo "$swap_units" | while read -r UNIT_PATH; do
      if grep -q swapfc "${UNIT_PATH}"; then
        DEV=$(GET_WHAT_FROM_SWAP_UNIT "${UNIT_PATH}")
        UNIT_NAME=${UNIT_PATH##*/}
        swapoff "${DEV}"
        rm -vf "${UNIT_PATH}"
        [[ -f ${DEV} ]] && rm -f "${DEV}"
      fi
    done

    echo "$swap_units" | while read -r UNIT_PATH; do
      if grep -q zram "${UNIT_PATH}"; then
        DEV=$(GET_WHAT_FROM_SWAP_UNIT "${UNIT_PATH}")
        UNIT_NAME=${UNIT_PATH##*/}
        INFO "Zram: swapoff ${DEV}"
        swapoff "${DEV}"
        rm -vf "${UNIT_PATH}"
        zramctl -r "${DEV}"
      fi
    done
    [[ -v ZRAM_ALREADY_SET ]] || modprobe -r zram

    if [[ -d ${WORK_DIR}/zswap ]]; then
      INFO "Zswap: restore configuration: start"
      for file in "${WORK_DIR}/zswap"/*; do
        read_l "${file}" > "${ZSWAP_M_P}/${file##*/}"
      done
      INFO "Zswap: restore configuration: complete"
    fi

    wait

    rm -f "${LOCK_STARTED}"
  ;;
  status)
    declare -a swap_stats
    get_mem_stat_multi swap_stats SwapTotal SwapFree
    SWAP_USED=$(( swap_stats[0] - swap_stats[1] ))
    unset swap_stats

    if [[ -d /sys/module/zswap ]]; then
      echo Zswap:
      read -r USED_BYTES < /sys/kernel/debug/zswap/pool_total_size
      USED_PAGES=$(( USED_BYTES / PAGE_SIZE ))
      read -r STORED_PAGES < /sys/kernel/debug/zswap/stored_pages
      STORED_BYTES=$(( STORED_PAGES * PAGE_SIZE ))
      RATIO=0
      (( STORED_PAGES > 0 )) && RATIO=$(( USED_PAGES * 100 / STORED_PAGES ))
      {
        grep . /sys/module/zswap/parameters/* | \
          cut -d'/' -f6 | \
          sed -e 's/:/ /g' -e 's/^/. /g'
      } | column -t
      {
        grep . /sys/kernel/debug/zswap/* | \
          cut -d'/' -f6 | \
          sed -e 's/:/ /g' -e 's/^/. . /g' | column -t
        echo . . compress_ratio ${RATIO}%
        (( SWAP_USED > 0 )) && \
          echo . . zswap_store/swap_store ${STORED_BYTES}/${SWAP_USED} $(( STORED_BYTES * 100 / SWAP_USED ))%
      } | column -t
    fi

    if zramctl | grep -q '\[SWAP\]'; then
      echo ZRam:
      zramctl | \
        grep -e '^NAME\|\[SWAP\]' | \
        sed -e 's/\(MOUNTPOINT\|\[SWAP\]\)$//g' -e 's/^/.\t/g' | \
        column -t | \
        uniq
    fi

    if [[ -d ${WORK_DIR}/swapd ]]; then
      echo swapD:
      swapon --raw | \
        grep -v 'zram\|file\|loop' | \
        sed 's/^/.\t/g' | \
        column -t
    fi

    if [[ -d ${WORK_DIR}/swapfc ]]; then
      echo swapFC:
      swapon --raw | \
        grep -e 'NAME\|file\|loop' | \
        sed 's/^/.\t/g' | \
        column -t
    fi
  ;;
  *)
    help
  ;;
esac

exit 0
# vim: set ft=sh ts=2 sw=2 et:
