Logged into 192.41.233.196 / sc-stor-dmd01
Installed strace
sudo yum install strace -y
Logged into 192.41.233.197 / sc-stor-nvm01
Starting dd of 2G file
# dd if=/dev/urandom of=foo bs=8M count=256 conv=sync
256+0 records in
256+0 records out
2147483648 bytes (2.1 GB) copied, 9.86971 s, 218 MB/s
single-stream performance.
dropped in-memory caches and trying a read test:
[root@sc-stor-nvm01 xcache-osiris]# dd if=foo of=/dev/null bs=8M conv=sync
256+0 records in
256+0 records out
2147483648 bytes (2.1 GB) copied, 15.5068 s, 138 MB/s
We run this on each node, from here: https://kubernetes.io/docs/setup/independent/install-kubeadm/
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
exclude=kube*
EOF
# Set SELinux in permissive mode (effectively disabling it)
setenforce 0
sed -i 's/^SELINUX=enforcing$/SELINUX=permissive/' /etc/selinux/config
yum install -y kubelet kubeadm kubectl --disableexcludes=kubernetes
systemctl enable kubelet && systemctl start kubelet
cat <<EOF > /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
sysctl --system
Opened ports:
[root@sc-stor-dmd01 ~]# diff iptables iptables-orig
1c1
< # Generated by iptables-save v1.4.21 on Tue Oct 30 11:06:49 2018
---
> # Generated by iptables-save v1.4.21 on Tue Oct 30 11:27:41 2018
5c5
< :OUTPUT ACCEPT [86207775:122886300559]
---
> :OUTPUT ACCEPT [129669:168407785]
13,14d12
< -A INPUT -p tcp -m multiport --dports 1094 -j ACCEPT -m comment --comment ignore
< -A INPUT -s 192.168.0.0/16 -j ACCEPT -m comment --comment ignore
73a72
> -A f2b-ssh -s 177.56.128.221/32 -j REJECT --reject-with icmp-port-unreachable
80c79
< # Completed on Tue Oct 30 11:06:49 2018
---
> # Completed on Tue Oct 30 11:27:41 2018
Installed Docker via yum install docker
==============================================================================================================================================================================================================================================
Package Arch Version Repository Size
==============================================================================================================================================================================================================================================
Installing:
docker x86_64 2:1.13.1-75.git8633870.el7.centos extras 16 M
Installing for dependencies:
PyYAML x86_64 3.10-11.el7 base 153 k
atomic-registries x86_64 1:1.22.1-25.git5a342e3.el7.centos extras 35 k
container-selinux noarch 2:2.68-1.el7 extras 36 k
container-storage-setup noarch 0.11.0-2.git5eaf76c.el7 extras 35 k
docker-client x86_64 2:1.13.1-75.git8633870.el7.centos extras 3.8 M
docker-common x86_64 2:1.13.1-75.git8633870.el7.centos extras 93 k
oci-register-machine x86_64 1:0-6.git2b44233.el7 extras 1.1 M
oci-systemd-hook x86_64 1:0.1.17-2.git83283a0.el7 extras 33 k
oci-umount x86_64 2:2.3.3-3.gite3c9055.el7 extras 32 k
python2-pytoml noarch 0.1.18-1.el7 epel 20 k
skopeo-containers x86_64 1:0.1.31-1.dev.gitae64ff7.el7.centos extras 17 k
subscription-manager-rhsm-certificates x86_64 1.20.11-1.el7.centos base 195 k
Transaction Summary
==============================================================================================================================================================================================================================================
Install 1 Package (+12 Dependent packages)
Start Docker:
systemctl enable docker
systemctl start docker
Run kubeadm init:
kubeadm init --pod-network-cidr=192.168.0.0/16
Check:
[root@sc-stor-dmd01 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
sc-stor-dmd01.osris.org NotReady master 2m59s v1.12.2
Now need to add Networking provider (Calico, in this case):
kubectl apply -f https://docs.projectcalico.org/v3.1/getting-started/kubernetes/installation/hosted/rbac-kdd.yaml
kubectl apply -f https://docs.projectcalico.org/v3.1/getting-started/kubernetes/installation/hosted/kubernetes-datastore/calico-networking/1.7/calico.yaml
Now we're ready:
[root@sc-stor-dmd01 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
sc-stor-dmd01.osris.org Ready master 4m7s v1.12.2
On the second node, we need to install Kubernetes, Docker, and configure the firewall in the same way.
As before, we add the firewall rules for XRootD and Kubernetes traffic, with an iptables-save, edit, and iptables-restore:
-A INPUT -p tcp -m multiport --dports 1094 -j ACCEPT -m comment --comment ignore
-A INPUT -s 192.168.0.0/16 -j ACCEPT -m comment --comment ignore
Then install Kubernetes:
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
exclude=kube*
EOF
# Set SELinux in permissive mode (effectively disabling it)
setenforce 0
sed -i 's/^SELINUX=enforcing$/SELINUX=permissive/' /etc/selinux/config
yum install -y kubelet kubeadm kubectl --disableexcludes=kubernetes
systemctl enable kubelet && systemctl start kubelet
Disable swap:
swapoff -a
Join the cluster:
kubeadm join 192.41.233.196:6443 --token <token> --discovery-token-ca-cert-hash sha256:<hash>
Check status:
[root@sc-stor-dmd01 ~]# export KUBECONFIG=/etc/kubernetes/admin.conf
[root@sc-stor-dmd01 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
sc-stor-dmd01.osris.org Ready master 19m v1.12.2
sc-stor-nvm01.osris.org Ready <none> 38s v1.12.2
Taint the cluster to allow all pods on all nodes:
[root@sc-stor-dmd01 ~]# kubectl taint nodes --all node-role.kubernetes.io/master-
node/sc-stor-dmd01.osris.org untainted
error: taint "node-role.kubernetes.io/master:" not found
Edited the iptables rules again to add the SLATE API server IP on both boxes:
-A INPUT -s 128.135.158.222/32 -p tcp -m multiport --dports 6443 -m comment --comment ignore -j ACCEPT
Install the SLATE client on the 'head' node:
[root@sc-stor-dmd01 ~]# curl -O http://jenkins.slateci.io/artifacts/slate-linux.tar.gz
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 1367k 100 1367k 0 0 3956k 0 --:--:-- --:--:-- --:--:-- 3951k
[root@sc-stor-dmd01 ~]# tar -xvzf slate-linux.tar.gz
slate
[root@sc-stor-dmd01 ~]# mv slate /usr/local/bin
[root@sc-stor-dmd01 ~]#
Went to www-dev.slateci.io, got a token, and ran the script:
#!/bin/sh
mkdir -p -m 0700 "$HOME/.slate"
if [ "$?" -ne 0 ] ; then
echo "Not able to create $HOME/.slate" 1>&2
exit 1
fi
echo "<token>" > "$HOME/.slate/token"
if [ "$?" -ne 0 ] ; then
echo "Not able to write token data to $HOME/.slate/token" 1>&2
exit 1
fi
chmod 600 "$HOME/.slate/token"
echo 'https://api-dev.slateci.io:18080' > ~/.slate/endpoint
echo "SLATE access token successfully stored"
then
export KUBECONFIG=/etc/kubernetes/admin.conf
slate cluster create --kubeconfig /etc/kubernetes/admin.conf um-sc18 --vo slate-dev
Some (not complete, I ran into a bug and re-ran it) outputs:
Extracting kubeconfig from /etc/kubernetes/admin.conf...
Checking for privilege level/deployment controller status...
Controller is deployed
Checking for federation ClusterRole...
ClusterRole is defined
SLATE should be granted access using a ServiceAccount created with a Cluster
object by the nrp-controller. Do you want to create such a ServiceAccount
automatically now? [y]/n: y
Please enter the name you would like to give the ServiceAccount and core
SLATE namespace. The default is 'slate-system':
Creating Cluster 'slate-system'...
Locating ServiceAccount credentials...
Extracting CA data...
Determining server address...
Extracting ServiceAccount token...
Done generating config with limited privileges
Sending config to SLATE server...
Successfully created cluster um-sc18 with ID Cluster_<ID>
Once that's done we allow the atlas-xcache VO:
# slate cluster allow-vo um-sc18 atlas-xcache
Successfully granted VO atlas-xcache access to cluster um-sc18
Run parted
, then:
mklabel GPT
mkpart primary 2048s 100%
Then:
984 mount
985 iptables-save > iptables2
986 vi iptables2
987 iptables-restore iptables2
988 mount
989 umount /dev/rbd0
990 docker ps
991 umount /dev/rbd0
992 lsof | grep xcache
993 kill 61608
994 lsof | grep xcache
995 kill -9 61608
996 umount /xcache-osiris/
997 mount /dev/nvme0n1p1 /xcache-osiris/
998 mkfs.xfs /dev/nvme0n1p1
999 mount /dev/nvme0n1p1 /xcache-osiris/
1000 history