#!/usr/bin/env bash

PROBE_TYPE="{{ .ProbeType }}"
PROBE_PORT="{{ .Port }}"
PROBE_PROTOCOL="{{ .Protocol }}"

# standard bash codes start at 126 and progress upward. pick error codes from 125 downward for
# script as to allow curl to output new error codes and still return a distinctive number.
USAGE_ERR_CODE=125
PROBE_ERR_CODE=124
# curl error codes: 1-123

STARTUP_TYPE='startup'
READINESS_TYPE='readiness'

RGW_URL="$PROBE_PROTOCOL://0.0.0.0:$PROBE_PORT"

function check() {
  local URL="$1"
  # --insecure - don't validate ssl if using secure port only
  # --silent - don't output progress info
  # --output /dev/stderr - output HTML header to stdout (good for debugging)
  # --write-out '%{response_code}' - print the HTTP response code to stdout
  curl --insecure --silent --output /dev/stderr --write-out '%{response_code}' "$URL"
}

http_response="$(check "$RGW_URL")"
retcode=$?

if [[ $retcode -ne 0 ]]; then
  # if this is the startup probe, always returning failure. if startup probe passes, all subsequent
  # probes can rely on the assumption that the health check was once succeeding without errors.
  # if this is the readiness probe, we know that curl was previously working correctly in the
  # startup probe, so curl error most likely means some new error with the RGW.
  echo "RGW health check failed with error code: $retcode. the RGW likely cannot be reached by clients" >/dev/stderr
  exit $retcode
fi

RGW_RATE_LIMITING_RESPONSE=503
RGW_MISCONFIGURATION_RESPONSE=500

if [[ $http_response -ge 200 ]] && [[ $http_response -lt 400 ]]; then
  # 200-399 are successful responses. same behavior as Kubernetes' HTTP probe
  exit 0

elif [[ $http_response -eq $RGW_RATE_LIMITING_RESPONSE ]]; then
  # S3's '503: slow down' code is not an error but an indication that RGW is throttling client
  # traffic. failing the readiness check here would only cause an increase in client connections on
  # other RGWs and likely cause those to fail also in a cascade. i.e., a special healthy response.
  echo "INFO: RGW is rate limiting" 2>/dev/stderr
  exit 0

elif [[ $http_response -eq $RGW_MISCONFIGURATION_RESPONSE ]]; then
  # can't specifically determine if the RGW is running or not. most likely a misconfiguration.
  case "$PROBE_TYPE" in
  "$STARTUP_TYPE")
    # fail until we can accurately get a valid healthy response when runtime starts.
    echo 'FAIL: HTTP code 500 suggests an RGW misconfiguration.' >/dev/stderr
    exit $PROBE_ERR_CODE
    ;;
  "$READINESS_TYPE")
    # config likely modified at runtime which could result in all RGWs failing this check.
    # occasional client failures are still better than total failure, so ignore this
    echo 'WARN: HTTP code 500 suggests an RGW misconfiguration' >/dev/stderr
    exit 0
    ;;
  *)
    # prior arg validation means this path should never be activated, but keep to be safe
    echo "ERROR: probe type is unknown: $PROBE_TYPE" >/dev/stderr
    exit $USAGE_ERR_CODE
    ;;
  esac

else
  # anything else is a failing response. same behavior as Kubernetes' HTTP probe
  echo "FAIL: received an HTTP error code: $http_response"
  exit $PROBE_ERR_CODE

fi