OpenShiftノードのローリングyum updateを行う

Red HatでOpenShiftのサポートをしているid:nekopです。OpenShift 全部俺 Advent Calendar 2018 - Qiitaの18日目のエントリです。昨日の変化形です。

OpenShiftではAnsibleを使ってOpenShiftノードのローリングin-placeアップグレードを行うことができますが、これにはOSのアップグレードは含まれていません。OSのコンポーネントにもバグ修正、セキュリティ修正やパフォーマンス向上などの修正を適用しなければいけませんが、そのためのAnsibleのplaybookはないので自力でどうにかする必要があります。

というわけで昨日と同じくAnsible ad-hocコマンドを利用してdrain, yum update, reboot, uncordonを行う最低限なスクリプトを記述してみましょう。こんな感じでしょうか。

#!/bin/bash

##
## openshift-rolling-yum-update
##

set -e

MASTERS=$(oc get node --no-headers -l "node-role.kubernetes.io/master=true" | awk '{print $1}')
INFRAS=$(oc get node --no-headers -l "node-role.kubernetes.io/infra=true" | awk '{print $1}')
NODES=$(oc get node --no-headers -l "node-role.kubernetes.io/compute=true" | awk '{print $1}')

MASTER=false

function stop_yum_update_start {
  for n in "$@"; do
    oc adm drain $n --delete-local-data --ignore-daemonsets
    if [ "$MASTER" = "true" ]; then
      ansible $n -m shell -a "mv /etc/origin/node/pods /etc/origin/node/pods.stop"
    fi
    ansible $n -m shell -a "systemctl stop atomic-openshift-node"
    ansible $n -m shell -a "docker ps -q | xargs docker stop --time 600"
    ansible $n -m shell -a "systemctl stop docker"
    ansible $n -m shell -a "yum update -y"
    ansible $n -m shell -a "reboot" ||:
    until ansible $n -m ping > /dev/null; do echo "$n not ready yet"; sleep 5; done
    if [ "$MASTER" = "true" ]; then
      ansible $n -m shell -a "mv /etc/origin/node/pods.stop /etc/origin/node/pods"
      sleep 60  # Wait for master coming up before doing any other operation
    fi
    oc adm uncordon $n
  done
}

MASTETR=true
stop_yum_update_start $MASTERS

MASTETR=false
stop_yum_update_start $INFRAS

MASTETR=false
stop_yum_update_start $NODES