From: Todd Malsbary Date: Fri, 3 Dec 2021 00:17:57 +0000 (-0800) Subject: Flux install of compute cluster X-Git-Url: https://gerrit.akraino.org/r/gitweb?a=commitdiff_plain;h=43dbb5a2353606d1801f08dcf3e2fd06399843fb;p=icn.git Flux install of compute cluster This includes SOPS encryption of BMC and cluster secrets. NOTE: Keys are included in this commit. These are only intended to be used in this ICN virtual environment for testing and demonstration purposes. Signed-off-by: Todd Malsbary Change-Id: I2c7b76ec5f2d125afb007d5a4b90a23924f8d305 --- diff --git a/Makefile b/Makefile index 1ccfae2..a259336 100644 --- a/Makefile +++ b/Makefile @@ -148,7 +148,6 @@ vm_verifier: jump_server \ vm_clean_all vm_cluster: - ./deploy/site/vm/vm.sh build ./deploy/site/vm/vm.sh deploy ./deploy/site/vm/vm.sh wait ./deploy/kata/kata.sh test diff --git a/Vagrantfile b/Vagrantfile index 82b72bc..377b215 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -1,5 +1,8 @@ # -*- mode: ruby -*- # vi: set ft=ruby : +require 'ipaddr' +require 'uri' +require 'yaml' # IMPORTANT To bring up the machines, use the "--no-parallel" option # to vagrant up. This is to workaround dependencies between the jump @@ -7,11 +10,24 @@ # machines will fail to come up until the baremetal network (created # by vagrant from the jump machine definition) is up. -vars = { - :site => 'vm', - :baremetal_cidr => '192.168.151.0/24', - :num_machines => 2 -} +site = ENV['ICN_SITE'] || 'vm' + +# Calculate the baremetal network address from the bmcAddress (aka +# IPMI address) specified in the machine pool values. IPMI in the +# virtual environment is emulated by virtualbmc listening on the host. +baremetal_cidr = nil +Dir.glob("deploy/site/#{site}/*.yaml") do |file| + YAML.load_stream(File.read(file)) do |document| + values = document.fetch('spec', {}).fetch('values', {}) + next if values['machineName'].nil? || values['bootMACAddress'].nil? + bmc_host = URI.parse(values['bmcAddress']).host + baremetal_cidr = "#{IPAddr.new(bmc_host).mask(24)}/24" + end +end +if baremetal_cidr.nil? + puts "Missing bmcAddress value in site definition, can't determine baremetal network address" + exit 1 +end $post_up_message = < "#{vars[:site]}-provisioning", + :libvirt__network_name => "#{site}-provisioning", :type => 'dhcp' # IPMI control of machines is provided by vbmc on the host @@ -96,61 +112,65 @@ Vagrant.configure("2") do |config| m.vm.post_up_message = $post_up_message end - # The machine pool used by cluster creation - (1..vars[:num_machines]).each do |i| - config.vm.define "machine-#{i}" do |m| - m.vm.hostname = "machine-#{i}" - m.vm.provider :libvirt do |libvirt| - libvirt.graphics_ip = '0.0.0.0' - libvirt.default_prefix = "#{vars[:site]}-" - libvirt.cpu_mode = 'host-passthrough' - libvirt.cpus = 8 - libvirt.memory = 16384 - libvirt.nested = true - # The image will be provisioned by ICN so just create an empty - # disk for the machine - libvirt.storage :file, :size => 50, :type => 'raw', :cache => 'none' - # Management attach is false so that vagrant will not interfere - # with these machines: the jump server will manage them - # completely - libvirt.mgmt_attach = false - end - # The provisioning network must be listed first for PXE boot to - # the metal3/ironic provided image - m.vm.network :private_network, - :libvirt__network_name => "#{vars[:site]}-provisioning", - :type => 'dhcp' - m.vm.network :private_network, - :libvirt__network_name => "#{vars[:site]}-baremetal", - :type => 'dhcp' - - # IPMI control - m.trigger.after [:up] do |trigger| - trigger.name = 'Adding machine to IPMI network' - trigger.run = {inline: "./tools/vagrant/add_machine_to_vbmc.sh #{i} #{vars[:site]} machine-#{i}"} - end - m.trigger.after [:destroy] do |trigger| - trigger.name = 'Removing machine from IPMI network' - trigger.run = {inline: "./tools/vagrant/remove_machine_from_vbmc.sh #{i} #{vars[:site]} machine-#{i}"} - end + # Look for any HelmReleases in the site directory with machineName in + # the values dictionary. This will provide the values needed to + # create the machine pool. + legacy_machine_args = "" + Dir.glob("deploy/site/#{site}/*.yaml") do |file| + YAML.load_stream(File.read(file)) do |document| + values = document.fetch('spec', {}).fetch('values', {}) + next if values['machineName'].nil? || values['bootMACAddress'].nil? + machine_name = values['machineName'] + boot_mac_address = values['bootMACAddress'] + bmc_port = URI.parse(values['bmcAddress']).port + config.vm.define machine_name do |m| + m.vm.hostname = machine_name + m.vm.provider :libvirt do |libvirt| + libvirt.graphics_ip = '0.0.0.0' + libvirt.default_prefix = "#{site}-" + libvirt.cpu_mode = 'host-passthrough' + libvirt.cpus = 8 + libvirt.memory = 16384 + libvirt.nested = true + # The image will be provisioned by ICN so just create an empty + # disk for the machine + libvirt.storage :file, :size => 50, :type => 'raw', :cache => 'none' + # Management attach is false so that vagrant will not interfere + # with these machines: the jump server will manage them + # completely + libvirt.mgmt_attach = false + end + # The provisioning network must be listed first for PXE boot to + # the metal3/ironic provided image + m.vm.network :private_network, + :libvirt__network_name => "#{site}-provisioning", + :mac => boot_mac_address, + :type => 'dhcp' + m.vm.network :private_network, + :libvirt__network_name => "#{site}-baremetal", + :type => 'dhcp' + + # IPMI control + m.trigger.after [:up] do |trigger| + trigger.name = 'Adding machine to IPMI network' + trigger.run = {inline: "./tools/vagrant/add_machine_to_vbmc.sh #{site} #{machine_name} #{bmc_port}"} + end + m.trigger.after [:destroy] do |trigger| + trigger.name = 'Removing machine from IPMI network' + trigger.run = {inline: "./tools/vagrant/remove_machine_from_vbmc.sh #{site} #{machine_name} #{bmc_port}"} + end - # Create configuration for ICN provisioning - m.trigger.after [:up] do |trigger| - if i == vars[:num_machines] then + # Create configuration for ICN provisioning + legacy_machine_args = "#{legacy_machine_args} #{machine_name} #{bmc_port}" + m.trigger.after [:up] do |trigger| trigger.info = 'Creating nodes.json.sample describing the machines' - trigger.run = {inline: "./tools/vagrant/create_nodes_json_sample.sh #{vars[:num_machines]} #{vars[:site]} machine-"} + trigger.run = {inline: "./tools/vagrant/create_nodes_json_sample.sh #{site} #{legacy_machine_args}"} end - end - m.trigger.after [:up] do |trigger| - if i == vars[:num_machines] then + m.trigger.after [:up] do |trigger| trigger.info = 'Creating Provisioning resource describing the cluster' - trigger.run = {inline: "./tools/vagrant/create_provisioning_cr.sh #{vars[:num_machines]} #{vars[:site]} machine-"} + trigger.run = {inline: "./tools/vagrant/create_provisioning_cr.sh #{site} #{legacy_machine_args}"} end end - m.trigger.after [:up] do |trigger| - trigger.name = 'Creating machine resource' - trigger.run = {inline: "./tools/vagrant/create_machine_resource.sh #{i} #{vars[:site]} machine-#{i}"} - end end end end diff --git a/deploy/site/common.sh b/deploy/site/common.sh new file mode 100644 index 0000000..b20bae5 --- /dev/null +++ b/deploy/site/common.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +set -eu -o pipefail + +function _gpg_key_fp { + gpg --with-colons --list-secret-keys $1 | awk -F: '/fpr/ {print $10;exit}' +} + +function create_gpg_key { + local -r key_name=$1 + + # Create an rsa4096 key that does not expire + gpg --batch --full-generate-key <${site_dir}/sops.pub.asc + + # Add .sops.yaml so users won't have to worry about specifying the + # proper key for the target cluster or namespace + echo "Creating ${site_dir}/.sops.yaml SOPS configuration file" + cat < ${site_dir}/.sops.yaml +creation_rules: + - path_regex: .*.yaml + encrypted_regex: ^(bmcPassword|hashedPassword)$ + pgp: ${key_fp} +EOF + + sops --encrypt --in-place --config=${site_dir}/.sops.yaml ${site_yaml} +} + +function sops_decrypt_site { + local -r site_yaml=$1 + + local -r site_dir=$(dirname ${site_yaml}) + sops --decrypt --in-place --config=${site_dir}/.sops.yaml ${site_yaml} +} + +function flux_create_site { + local -r url=$1 + local -r branch=$2 + local -r path=$3 + local -r key_name=$4 + + local -r source_name="$(basename ${url})-${branch}" + local -r kustomization_name="${source_name}-site-$(basename ${path})" + local -r key_fp=$(gpg --with-colons --list-secret-keys ${key_name} | awk -F: '/fpr/ {print $10;exit}') + local -r secret_name="${key_name}-sops-gpg" + + flux create source git ${source_name} --url=${url} --branch=${branch} + gpg --export-secret-keys --armor "$(_gpg_key_fp ${key_name})" | + kubectl -n flux-system create secret generic ${secret_name} --from-file=sops.asc=/dev/stdin --dry-run=client -o yaml | + kubectl apply -f - + flux create kustomization ${kustomization_name} --path=${path} --source=GitRepository/${source_name} --prune=true \ + --decryption-provider=sops --decryption-secret=${secret_name} +} diff --git a/deploy/site/site.sh b/deploy/site/site.sh new file mode 100755 index 0000000..d0c970a --- /dev/null +++ b/deploy/site/site.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -eu -o pipefail + +SCRIPTDIR="$(readlink -f $(dirname ${BASH_SOURCE[0]}))" +LIBDIR="$(dirname $(dirname ${SCRIPTDIR}))/env/lib" + +source $LIBDIR/common.sh +source $SCRIPTDIR/common.sh + +case $1 in + "create-gpg-key") create_gpg_key $2 ;; + "sops-encrypt-site") sops_encrypt_site $2 $3 ;; + "sops-decrypt-site") sops_decrypt_site $2 ;; + "flux-create-site") flux_create_site $2 $3 $4 $5;; + *) cat <${BUILDDIR}/cluster-e2etest-values.yaml -} + sed -i -e 's!sshAuthorizedKey: .*!sshAuthorizedKey: '"${SSH_AUTHORIZED_KEY}"'!' ${SCRIPTDIR}/site.yaml + + # Encrypt the site YAML + create_gpg_key ${FLUX_SOPS_KEY_NAME} + sops_encrypt_site ${SCRIPTDIR}/site.yaml ${FLUX_SOPS_KEY_NAME} -function release_name { - local -r values_path=$1 - name=$(basename ${values_path}) - echo ${name%-values.yaml} + # ONLY FOR TEST ENVIRONMENT: save the private key used + export_gpg_private_key ${FLUX_SOPS_KEY_NAME} >${SCRIPTDIR}/sops.asc } function deploy { - for values in ${BUILDDIR}/machine-*-values.yaml; do - helm -n metal3 install $(release_name ${values}) ${SCRIPTDIR}/../../machine --create-namespace -f ${values} - done - helm -n metal3 install cluster-e2etest ${SCRIPTDIR}/../../cluster --create-namespace -f ${BUILDDIR}/cluster-e2etest-values.yaml + gpg --import ${SCRIPTDIR}/sops.asc + flux_create_site https://gerrit.akraino.org/r/icn master deploy/site/vm ${FLUX_SOPS_KEY_NAME} } function clean { - helm -n metal3 uninstall cluster-e2etest - for values in ${BUILDDIR}/machine-*-values.yaml; do - helm -n metal3 uninstall $(release_name ${values}) - done + kubectl -n flux-system delete kustomization icn-master-site-vm } function is_cluster_ready { @@ -48,6 +61,28 @@ function is_control_plane_ready { [[ $(kubectl --kubeconfig=${BUILDDIR}/e2etest-admin.conf get nodes -l node-role.kubernetes.io/control-plane -o jsonpath='{range .items[*]}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' | grep -c True) == ${replicas} ]] } +function insert_control_plane_network_identity_into_ssh_config { + # This enables logging into the control plane machines from this + # machine without specifying the identify file on the command line + + # Create ssh config if it doesn't exist + mkdir -p ${HOME}/.ssh && chmod 700 ${HOME}/.ssh + touch ${HOME}/.ssh/config + chmod 600 ${HOME}/.ssh/config + # Add the entry for the control plane network, host value in ssh + # config is a wildcard + endpoint=$(helm -n metal3 get values -a cluster-e2etest | awk '/controlPlaneEndpoint:/ {print $2}') + prefix=$(helm -n metal3 get values -a cluster-e2etest | awk '/controlPlanePrefix:/ {print $2}') + host=$(ipcalc ${endpoint}/${prefix} | awk '/Network:/ {sub(/\.0.*/,".*"); print $2}') + if [[ $(grep -c "Host ${host}" ${HOME}/.ssh/config) != 0 ]]; then + sed -i -e '/Host '"${host}"'/,+1 d' ${HOME}/.ssh/config + fi + cat <>${HOME}/.ssh/config +Host ${host} + IdentityFile ${SCRIPTDIR}/id_rsa +EOF +} + function wait_for_all_ready { WAIT_FOR_INTERVAL=60s WAIT_FOR_TRIES=30 @@ -55,10 +90,11 @@ function wait_for_all_ready { clusterctl -n metal3 get kubeconfig e2etest >${BUILDDIR}/e2etest-admin.conf chmod 600 ${BUILDDIR}/e2etest-admin.conf wait_for is_control_plane_ready + insert_control_plane_network_identity_into_ssh_config } case $1 in - "build") build ;; + "build-source") build_source ;; "clean") clean ;; "deploy") deploy ;; "wait") wait_for_all_ready ;; @@ -66,7 +102,7 @@ case $1 in Usage: $(basename $0) COMMAND Commands: - build - Build the site deployment values + build-source - Build the in-tree site values clean - Remove the site deploy - Deploy the site wait - Wait for the site to be ready diff --git a/env/lib/common.sh b/env/lib/common.sh index 2009c58..f09ca6f 100755 --- a/env/lib/common.sh +++ b/env/lib/common.sh @@ -46,6 +46,9 @@ CAPM3_VERSION="v0.5.1" #The flux version to use FLUX_VERSION="0.20.0" +#The sops version to use +SOPS_VERSION="v3.7.1" + #refered from onap function call_api { #Runs curl with passed flags and provides @@ -248,6 +251,13 @@ function install_emcoctl { sudo install -o root -g root -m 0755 ${EMCOPATH}/bin/emcoctl/emcoctl /usr/local/bin/emcoctl } +function install_sops { + curl -L https://github.com/mozilla/sops/releases/download/${SOPS_VERSION}/sops-${SOPS_VERSION}.linux -o sops + sudo install -o root -g root -m 0755 sops /usr/local/bin/sops + rm sops + sops --version +} + function fetch_image { if [[ "${BM_IMAGE_URL}" && "${BM_IMAGE}" ]]; then mkdir -p "${IRONIC_DATA_DIR}/html/images" diff --git a/env/metal3/01_install_package.sh b/env/metal3/01_install_package.sh index d0409b3..8b8c36f 100755 --- a/env/metal3/01_install_package.sh +++ b/env/metal3/01_install_package.sh @@ -20,6 +20,7 @@ function install_essential_packages { curl \ dnsmasq \ figlet \ + ipcalc \ nmap \ patch \ psmisc \ @@ -64,6 +65,7 @@ install() { install_kustomize install_clusterctl install_flux_cli + install_sops install_emcoctl } diff --git a/tools/vagrant/add_machine_to_vbmc.sh b/tools/vagrant/add_machine_to_vbmc.sh index 5676a7b..86b3cd4 100755 --- a/tools/vagrant/add_machine_to_vbmc.sh +++ b/tools/vagrant/add_machine_to_vbmc.sh @@ -1,9 +1,9 @@ #!/bin/bash set -eu -o pipefail -index=$1 -site=$2 -name=$3 +site=$1 +name=$2 +port=$3 -vbmc --no-daemon add ${site}-${name} --port $((6230+index-1)) --libvirt-uri "qemu:///system?&no_verify=1&no_tty=1" +vbmc --no-daemon add ${site}-${name} --port ${port} --libvirt-uri "qemu:///system?&no_verify=1&no_tty=1" vbmc --no-daemon start ${site}-${name} diff --git a/tools/vagrant/create_machine_resource.sh b/tools/vagrant/create_machine_resource.sh deleted file mode 100755 index e1afdc2..0000000 --- a/tools/vagrant/create_machine_resource.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -index=$1 -site=$2 -name=$3 - -ipmi_host=$(virsh -c qemu:///system net-dumpxml ${site}-baremetal | xmlstarlet sel -t -v "//network/ip/@address") -ipmi_port=$((6230+index-1)) -boot_mac=$(virsh -c qemu:///system dumpxml "${site}-${name}" | xmlstarlet sel -t -v "//interface[source/@network='${site}-provisioning']/mac/@address") - -mkdir -p build/site/${site} -cat <build/site/${site}/machine-${index}-values.yaml -machineName: machine-${index} -bootMACAddress: ${boot_mac} -bmcAddress: ipmi://${ipmi_host}:${ipmi_port} -bmcUsername: admin -bmcPassword: password -EOF diff --git a/tools/vagrant/create_nodes_json_sample.sh b/tools/vagrant/create_nodes_json_sample.sh index e1a9948..4729edb 100755 --- a/tools/vagrant/create_nodes_json_sample.sh +++ b/tools/vagrant/create_nodes_json_sample.sh @@ -1,9 +1,7 @@ #!/bin/bash set -eu -o pipefail -num_machines=$1 -site=$2 -name_prefix=$3 +site=$1; shift nodes_json_path="deploy/metal3/scripts/nodes.json.sample" ipmi_host=$(virsh -c qemu:///system net-dumpxml ${site}-baremetal | xmlstarlet sel -t -v "//network/ip/@address") @@ -12,12 +10,13 @@ cat <${nodes_json_path} { "nodes": [ EOF -for ((i=1;i<=num_machines;++i)); do - name="${name_prefix}${i}" - ipmi_port=$((6230+i-1)) + +while (("$#")); do + name=$1; shift + ipmi_port=$1; shift baremetal_mac=$(virsh -c qemu:///system dumpxml "${site}-${name}" | xmlstarlet sel -t -v "//interface[source/@network='${site}-baremetal']/mac/@address") provisioning_mac=$(virsh -c qemu:///system dumpxml "${site}-${name}" | xmlstarlet sel -t -v "//interface[source/@network='${site}-provisioning']/mac/@address") - if ((i>${nodes_json_path} { "name": "${name}", diff --git a/tools/vagrant/create_provisioning_cr.sh b/tools/vagrant/create_provisioning_cr.sh index 2d07344..917245d 100755 --- a/tools/vagrant/create_provisioning_cr.sh +++ b/tools/vagrant/create_provisioning_cr.sh @@ -1,13 +1,12 @@ #!/bin/bash set -eu -o pipefail -num_machines=$1 -site=$2 -name_prefix=$3 +site=$1; shift provisioning_cr_path="cmd/bpa-operator/e2etest/test_bmh_provisioning_cr.yaml" -name="${name_prefix}1" +name=$1; shift +ipmi_port=$1; shift provisioning_mac=$(virsh -c qemu:///system dumpxml "${site}-${name}" | xmlstarlet sel -t -v "//interface[source/@network='${site}-provisioning']/mac/@address") cat <${provisioning_cr_path} apiVersion: bpa.akraino.org/v1alpha1 @@ -22,12 +21,13 @@ spec: - ${name}: mac-address: ${provisioning_mac} EOF -if ((num_machines>1)); then +if (("$#")); then cat <>${provisioning_cr_path} workers: EOF - for ((i=2;i<=num_machines;++i)); do - name="${name_prefix}${i}" + while (("$#")); do + name=$1; shift + ipmi_port=$1; shift provisioning_mac=$(virsh -c qemu:///system dumpxml "${site}-${name}" | xmlstarlet sel -t -v "//interface[source/@network='${site}-provisioning']/mac/@address") cat <>${provisioning_cr_path} - ${name}: diff --git a/tools/vagrant/remove_machine_from_vbmc.sh b/tools/vagrant/remove_machine_from_vbmc.sh index 4ff8018..4efc15b 100755 --- a/tools/vagrant/remove_machine_from_vbmc.sh +++ b/tools/vagrant/remove_machine_from_vbmc.sh @@ -1,8 +1,8 @@ #!/bin/bash set -eu -o pipefail -index=$1 -site=$2 -name=$3 +site=$1 +name=$2 +port=$3 vbmc --no-daemon delete ${site}-${name} || true