summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@suren.me>2018-03-22 04:37:46 +0100
committerSuren A. Chilingaryan <csa@suren.me>2018-03-22 04:37:46 +0100
commit0b0b9954c2d0602b1e9d0a387d2a195a790f8084 (patch)
treef0a3a26edd280b8fd84945944bd153f411b31fea
parente2c7b1305ca8495065dcf40fd2092d7c698dd6ea (diff)
downloadands-0b0b9954c2d0602b1e9d0a387d2a195a790f8084.tar.gz
ands-0b0b9954c2d0602b1e9d0a387d2a195a790f8084.tar.bz2
ands-0b0b9954c2d0602b1e9d0a387d2a195a790f8084.tar.xz
ands-0b0b9954c2d0602b1e9d0a387d2a195a790f8084.zip
Various fixes and provide ADEI admin container...
-rw-r--r--docs/databases.txt14
-rw-r--r--docs/info.txt31
-rw-r--r--docs/kickstart.txt13
-rw-r--r--docs/status.txt119
-rw-r--r--group_vars/OSEv3.yml5
-rwxr-xr-xkickstart/ipmi.sh13
-rw-r--r--opts.sh4
-rw-r--r--playbooks/ands-gluster-ganesha.yml6
-rw-r--r--playbooks/maintain.yml6
-rw-r--r--playbooks/openshift-setup-apps.yml5
-rw-r--r--playbooks/openshift-setup-project.yml5
-rw-r--r--playbooks/openshift-setup-projects.yml5
-rw-r--r--playbooks/openshift-setup-security.yml5
-rw-r--r--playbooks/openshift-setup-storage.yml5
-rw-r--r--playbooks/openshift-setup-users.yml5
-rw-r--r--playbooks/openshift-setup-vpn.yml5
-rw-r--r--playbooks/openshift-setup.yml6
-rw-r--r--playbooks/software.yml12
-rw-r--r--roles/ands_common/tasks/main.yml7
-rw-r--r--roles/ands_common/tasks/software.yml18
-rw-r--r--roles/ands_facts/tasks/main.yml5
-rw-r--r--roles/ands_facts/tasks/node.yml4
-rw-r--r--roles/ands_facts/tasks/nodes.yml1
-rw-r--r--roles/ands_kaas/tasks/do_apps.yml2
-rw-r--r--roles/ands_kaas/tasks/do_storage.yml4
-rw-r--r--roles/ands_kaas/tasks/project.yml2
-rw-r--r--roles/ands_kaas/tasks/template.yml4
-rw-r--r--roles/ands_network/tasks/nm_configure.yml3
-rw-r--r--roles/ands_storage/tasks/main.yml8
-rw-r--r--roles/openshift_resource/tasks/template.yml4
-rw-r--r--roles/role_includer/tasks/main.yml5
-rwxr-xr-xsetup.sh3
-rw-r--r--setup/projects/adei/templates/01-secret.yml.j22
-rw-r--r--setup/projects/adei/templates/01-webdav-secret.yml.j217
-rw-r--r--setup/projects/adei/templates/60-adei.yml.j25
-rw-r--r--setup/projects/adei/vars/apps.yml2
-rw-r--r--setup/projects/adei/vars/globals.yml69
-rw-r--r--setup/projects/adei/vars/mysql.yml8
-rw-r--r--setup/projects/adei/vars/mysql_galera.yml1
-rw-r--r--setup/projects/adei/vars/volumes.yml8
40 files changed, 332 insertions, 114 deletions
diff --git a/docs/databases.txt b/docs/databases.txt
index 331313b..7f8468e 100644
--- a/docs/databases.txt
+++ b/docs/databases.txt
@@ -8,7 +8,7 @@
Gluster/Block MyISAM (no logs) 5 MB/s slow, but OK 200% ~ 50% No problems on reboot, but requires manual work if node crashes to detach volume.
Galera INNODB 3.5 MB/s fast 3 x 200% - Should be perfect, but I am not sure about automatic recovery...
Galera/Hostnet INNODB 4.6 MB/s fast 3 x 200% -
- MySQL Slaves INNODB 5-8 MB/s fast 2 x 250% - Available data is HA, but caching is not. We can easily turn the slave to master.
+ MySQL Slaves INNODB 5-6 MB/s fast 2 x 250% - Available data is HA, but caching is not. We can easily turn the slave to master.
DRBD MyISAM (no logs) 4-6 exp. ? I expect it as an faster option, but does not fit the OpenShift concept that well.
@@ -150,5 +150,15 @@ Master/Slave replication
slave side. Network is not a problem, it is able to get logs from the master, but it is significantly
slower in applying it. The main performance killer is disk sync operations triggered by 'sync_binlog',
INNODB log flashing, etc. Disabling it allows to bring performance on reasonable level. Still,
- the master is caching at about 6-8 MB/s and slave at 4-5 MB/s only.
+ the master is caching at about 6-8 MB/s and slave at 4-5 MB/s only (sometimes drops bellow 2 MB/s).
+
+ - The trouble I think is that Slave performs a lot of disk writes 'mysql-relay-bin.*', 'mysql-bin.*'.
+ If compared all together we get ~ 18 MB/s. The solution is to disable binary logging on the slave
+ side. We need 'relay' log to perform replication, but binary-log on the client will only be needed
+ if another slave would chain replicate for it. However, it is better to disable just logging of
+ data replicated from master by disabling 'log_slave_updates'. Then, if the slave is converted to master
+ it will automatically start logging.
+
+
+
\ No newline at end of file
diff --git a/docs/info.txt b/docs/info.txt
deleted file mode 100644
index ea00f58..0000000
--- a/docs/info.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-oc -n adei patch dc/mysql --type=json --patch '[{"op": "remove", "path": "/spec/template/spec/nodeSelector"}]'
-oc process -f mysql.yml | oc -n adei replace dc/mysql -f -
-oc -n adei delete --force --grace-period=0 pod mysql-1-m4wcq
-We use rpcbind from the host.
-we need isciinitiators, rpcbind is used for host but check with telnet. The mother volumes are provisioned 100GiB large. So we can't allocate more.
-
-We can use rpcbind (and other services) from the host. Host networking.
-oc -n adei delete --force --grace-period=0 pod mysql-1-m4wcq
-| grep -oP '^GBID:\s*\K.*'
-
-Top level (nodeSelector restarPolciy SecurityContext)
- dnsPolicy: ClusterFirstWithHostNet
- dnsPolicy: ClusterFirst
- hostNetwork: true
-oc -n kaas adm policy add-scc-to-user hostnetwork -z default
-Check (in users list)
-oc get scc hostnetwork -o yaml
-firewall-cmd --add-port=5002/tcp
-
- OnDelete: This is the default update strategy for backward-compatibility. With OnDelete update strategy, after you update a DaemonSet template, new DaemonSet pods will only be created when you manually delete old DaemonSet pods. This is the same behavior of DaemonSet in Kubernetes version 1.5 or before.
- RollingUpdate: With RollingUpdate update strategy, after you update a DaemonSet template, old DaemonSet pods will be killed, and new DaemonSet pods will be created automatically, in a controlled fashion.
-
-Caveat: Updating DaemonSet created from Kubernetes version 1.5 or before
-.spec.updateStrategy.rollingUpdate.maxUnavailable (default to 1) and .spec.minReadySeconds
-
-
-
- “Default”: The Pod inherits the name resolution configuration from the node that the pods run on. See related discussion for more details.
- “ClusterFirst”: Any DNS query that does not match the configured cluster domain suffix, such as “www.kubernetes.io”, is forwarded to the upstream nameserver inherited from the node. Cluster administrators may have extra stub-domain and upstream DNS servers configured. See related discussion for details on how DNS queries are handled in those cases.
- “ClusterFirstWithHostNet”: For Pods running with hostNetwork, you should explicitly set its DNS policy “ClusterFirstWithHostNet”.
- “None”: A new option value introduced in Kubernetes v1.9. This Alpha feature allows a Pod to ignore DNS settings from the Kubernetes environment. All DNS settings are supposed to be provided using the dnsConfig field in the Pod Spec. See DNS config subsection below.
diff --git a/docs/kickstart.txt b/docs/kickstart.txt
new file mode 100644
index 0000000..fb2b5da
--- /dev/null
+++ b/docs/kickstart.txt
@@ -0,0 +1,13 @@
+Troubleshooting
+===============
+ - If re-installed, there is some leftovers from LVM/Device Mapper causing CentOS installer
+ to crash (even if 'clearpart' is specified). After first crash, it may be useful.
+ * Clean partition tables with
+ dd if=/dev/zero of=/dev/<device> bs=512 count=1024
+ * Destroy rogue LVM VGs
+ vgdestroy <vgname>
+ * Destroy rogue device mapper devices
+ dmsetup info -C
+ dmsetup remove <name>
+
+ \ No newline at end of file
diff --git a/docs/status.txt b/docs/status.txt
new file mode 100644
index 0000000..681c953
--- /dev/null
+++ b/docs/status.txt
@@ -0,0 +1,119 @@
+ OpenShift cluster is up and running. I don't plan to re-install it unless there is a new severe problems turn out for Bora/KatrinDB.
+It seems the system is fine for adei. You can take a look on http://adei-katrin.kaas.kit.edu
+
+So, that we have:
+ - Automatic kickstart of the servers. This is normally done over DHCP. Since I don't have direct control over DHCP,
+ I made a simple system to kickstart over IPMI. Scripts instruct servers to boot from Virtual CD and fetch kickstart
+ from the web server (ufo.kit.edu actually). The kickstart is generated by php script based on server name and/or
+ DHCP address.
+ - Ansible-based playbooks to configure the complete cluster. Kickstart produces minimal systems with ssh server up
+ and running. Here, I have the scripts to build complete cluster, including databases and adei installation. There
+ are also playbooks for maintenance tasks like adding new nodes, etc.
+ - Upgrades are not always (or actually rarely) running smoothely. To test new configurations before applying them
+ to the production system, I also support provisioning of the staging cluster in vagrant-controlled virtual machines.
+ This is currnetly running on ipepdvcompute3.
+ - Replicated GlusterFS storage and some security provisions to prevent conteiners in one project to destroy data
+ belonging to the another. The selected subset of data can be made available over NFS to external hosts, but I'd
+ rather prefer to not overuse this possibility.
+ - To simplify creating conteiners with complex storage requirements (like ADEI), there are also Ansible scripts
+ to generate OpenShift templates based on the configured storage and provided container specification.
+ - To ensure data integrity, the database engines do a lot of locking, syncing, and small writes. This does not
+ play well with network file systems like GlusterFS. It is possible to tune database parameters a bit and run
+ databases with small intensity of writes, but it is unsuitable for large and write-intensive workloads. The
+ alternative is to store data directly on local storage and use repliation engine of database itself to ensure
+ high availability. I have prepared containers to quickly bootstrap two options Master-Master replication with
+ Galera and standard MySQL Master-Slave replication. Master/Slave replication is assynchronous and because of
+ that significantly faster and I use it as a good compromise. It will take about 2 weeks to re-cache all Katrin
+ data. Quite long, but it is even longer with other options. If master server crashes the users will still have
+ access to all the historical archives and will be able to proxy data requests to the source datbase (i.e.
+ BORA will also work). And there is no need to re-cache everything as slave could be easily converted to master.
+ The Master/Master replication is about 50% slower, but still can be used for smaller databases if also uniterrupted
+ writes are crucial.
+ - Distributed ADEI. All setups now is completely independent (use different databases, can be stopped and started
+ independently, etc.). Each setup constists of 3 main components:
+ 1) Frontends: There are 3 frontends: production, debugging, and for logs. They are accessible individually,
+ like:
+ http://adei-katrin.kaas.kit.edu
+ http://adei-katrin-debug.kaas.kit.edu
+ http://adei-katrin-logs.kaas.kit.edu
+ * The production frontend can be scalled to run several replicas. This is not required for performance,
+ but if 2 copies are running, there will be no service interruption if one of servers crashed. Otherwise,
+ there is a dozen minutes outtage will OpenShift detects that node is gone for good.
+ 2) Maintenance: There is cron-style containers performing various maintenance tasks. Particularly, they
+ analyze current data source configuration and schedule the caching.
+ 3) Cachers: The caching is performed by a 3 groups of conteiners. One is responsible for current data,
+ the second for archives, and third for logs. Each group can be scalled independently. I.e. in the begining
+ I run multiple archive-caching replicas to get the data in. Then, focus is shifted to getting current data
+ faster. It also may differ significantly between setups. Katrin will run multiple caching replicas, but less
+ important or small data sources will get only one.
+ * This architecture also allows to remove 1-minute update latency. I am not sure we can be very fast with
+ larget Katrin setup on current minimalistic cluster, but technically the updates can be as fast as hardware
+ allows.
+ - There is an OpenShift template to instantiate all this containers in one go by providing a few parameters. The
+ only requirement is to have 'setup' configuration in my repository. I also included in ADEI sources a bunch of
+ scripts to start all known setups with preconfigured parameters and to perform various maintenance tasks. Like,
+ ./provision.sh -s katrin - to create launch katrin setup on the cluster
+ ./stop-caching.sh -s katrin - to temporary stop caching
+ ./start-caching.sh -s katrin - to restart caching with pre-configured number of replicas
+ ...
+ - Load-balancing and high-availability using 2 ping-pong IPs. By default katrin1 & katrin2 IPs are assigned to
+ both masters of the clusters. To balance load, the kaas.kit.edu is resolved in round-robin fashion to one of them.
+ If one master failed, its IP will migrate to remaining master and no service interruption will occur. Both masters
+ run OpenVPN tunnels to Katrin network. The remaining node is routing trough one of the masters. This configuration
+ is also highly available and should not suffer if one of the masters crashing.
+
+What we are still missing:
+ - Katrin datbasse. Marco have prepared containers using prototype I run last years. Hopefully, Jan can run it on
+ the new system with minimal number of problems.
+ - BORA still need to be moved.
+ - Then, I will decommision the old Katrin server.
+
+Fault-tolerance and high-availability
+=====================================
+ - I have tested a bit for fault tolerance and recoverability. Both GlusterFS and OpenShift work fine if a single
+ node failed. All data is available and new data can be written without problems. There is also no service
+ interruption as ADEI runs two frontends and also includes backup MySQL server. Only caching may stop if
+ master MySQL server is hit.
+ - If node recovers, it will-be re-intergrated automatically. We may only need to manually convert MySQL slave
+ to Master. Adding replacement nodes is also working quite easy using provided provisioning playbooks. But the
+ Gluster bricks needs to be migrated manually. I provide also some scripts to simplify this task.
+ - The situation is worse if cluster is completely turned off and turned on. Storage survive quite well, but
+ it is necessary to check that all volumes fully healthy (sometimes volume loose some bricks and needs to be
+ restarted to reconnect them). Also, some pods running before reboot may fail to start. Overall, it is better
+ to avoid. If reboot is needed for some reason, the best approach is to perform rolling reboot, restarting
+ one node after another to keep cluster always alive.
+
+Performance
+===========
+ I'd say it is more-or-less on pair with the old system (which is expected). The computing capabilities are
+ quite faster (still there is a significant load on master servers to run the cluster), but network and
+ storage have more-or-less the same speed.
+ - In fact we have only a single storage node. The second is used for replication and third node is required
+ for arbitrage in split-brain case. I will be able to use the third node also for storage, but I need at least
+ another 4th node in the cluster to do it. The new drives slightly faster, but added replication slows down
+ performance considerably.
+ - The containers can't use Infiniband network efficiently. The bridging is used to allow fast networking
+ in conteiners. Unfortunatelly, IPoIB is a high level network layer and does not provide Ethernet L2 support
+ required to create the bridge. Consequnetly, there is a lot of packet re-building going on and the network
+ performance is capped at about 3 GBit/s for containers. It is not realy a big problem now, as host systems
+ are not limited. So the storage is able to use all bandwidth.
+
+Upgrades
+========
+ We may need to scale the cluster slightly if it to be used beyond Katrin needs or with the significant
+ increase of load by Katrin. Having 1-2 more nodes should be helpful to storage system. It also may be worth
+ to add 40Gbit Ethernet switch. The Mellanox cards work in both Ethernet and Infiniband modes. Their switch
+ is actually also, but they want 5 kEUR for the license to enable this feature. I guess for this money we
+ should be able to buy a new piece of hardware.
+
+ Having more storage nodes, we can also prototype new data management solutions without disturbing Katrin
+ tasks. One idea would be run Apache Cassandra and try to re-use the data broker developed by university
+ group to get ADEI data in their cluster. Then, we can add analysis framework on top of ADEI using
+ Jupiter notebooks with Cassandra. Furthermore, there is Alpha version for NVIDIA GPUs support for
+ OpenShift. So, we can try to integrate also some computing workload and potentially run also WAVe inside.
+ I'd not do it on production system, but if we get new nodes we first may try to setup a second OpenShift
+ cluster for testing (pair of storage nodes + GPU node). And later re-integrate it with the main one.
+
+ As running without shutdowns is pretty crucial, another question if we can put the servers somewhere
+ at SCC with reliable power supplyies, air conditioning, etc. I guess we can't expect to run without shutdowns
+ in our server room.
diff --git a/group_vars/OSEv3.yml b/group_vars/OSEv3.yml
index 7bf2fb1..5000804 100644
--- a/group_vars/OSEv3.yml
+++ b/group_vars/OSEv3.yml
@@ -4,6 +4,9 @@ openshift_master_cluster_method: "native"
openshift_release: "v3.7.2"
#openshift_image_tag: "v3.7.2"
+# Still not available
+openshift_metrics_image_version: "v3.7.1"
+
#containerized: true
containerized: false
os_firewall_use_firewalld: true
@@ -21,7 +24,7 @@ os_firewall_use_firewalld: true
#openshift_pkg_version=-3.7.0
#openshift_cockpit_deployer_version=latest
#openshift_metrics_image_prefix=docker.io/openshift/origin-
-#openshift_metrics_image_version=v3.7
+#openshift_metrics_image_version=v3.7.1
#openshift_logging_image_prefix=docker.io/openshift/origin-
#openshift_logging_image_version=v3.7.0
#openshift_service_catalog_image_prefix=docker.io/openshift/origin-
diff --git a/kickstart/ipmi.sh b/kickstart/ipmi.sh
index 4ccd749..4571fb0 100755
--- a/kickstart/ipmi.sh
+++ b/kickstart/ipmi.sh
@@ -65,6 +65,17 @@ function reboot {
sleep $sleep
}
+function bios {
+ host=$1
+
+ ipmi $host power off
+ sleep 10
+ ipmi $host chassis bootdev bios
+ sleep $sleep
+ ipmi $host power on
+}
+
+
function status {
host=$1
@@ -117,6 +128,8 @@ elif [[ "$1" =~ reboot ]]; then
action="reboot"
elif [[ "$1" =~ boot ]]; then
action="boot"
+elif [[ "$1" =~ bios ]]; then
+ action="bios"
elif [[ "$1" =~ status ]]; then
action="status"
elif [[ "$1" =~ wait ]]; then
diff --git a/opts.sh b/opts.sh
index 10a2f07..5e77848 100644
--- a/opts.sh
+++ b/opts.sh
@@ -54,6 +54,10 @@ Actions:
check - check current setup and report if any maintenace should be peformed
setup <type> - executes specific configuration task from ands-openshift
Tasks: users, ssh, storage, heketi
+
+ Host system managment
+ software - Install additionaly configured software
+
Custom actions
playbook.yml - execute the specified playbook (after ands_facts)
role - generates temporary playbook and executes the role
diff --git a/playbooks/ands-gluster-ganesha.yml b/playbooks/ands-gluster-ganesha.yml
index a347c4f..cbdf72c 100644
--- a/playbooks/ands-gluster-ganesha.yml
+++ b/playbooks/ands-gluster-ganesha.yml
@@ -1,8 +1,4 @@
-- name: Common setup procedures
- hosts: ands
- roles:
- - role: ands_facts
-
+- import_playbook: maintain.yml
- name: Configure GlusterFS cluster
hosts: masters, new_masters
diff --git a/playbooks/maintain.yml b/playbooks/maintain.yml
index 80061ec..0a8cfe4 100644
--- a/playbooks/maintain.yml
+++ b/playbooks/maintain.yml
@@ -4,3 +4,9 @@
- role: ands_facts
- { role: ands_network, action: maintain }
+# We need all mount points ready
+- name: "Run mount -a"
+ hosts: ands
+ tasks:
+ - command: mount -a
+ changed_when: false
diff --git a/playbooks/openshift-setup-apps.yml b/playbooks/openshift-setup-apps.yml
index 0719f5d..6cc8a0c 100644
--- a/playbooks/openshift-setup-apps.yml
+++ b/playbooks/openshift-setup-apps.yml
@@ -1,7 +1,4 @@
-- name: Analyze Ands configuration
- hosts: nodes
- roles:
- - { role: ands_facts }
+- import_playbook: maintain.yml
- name: "Configure apps for {{ kaas_single_project }}"
hosts: masters
diff --git a/playbooks/openshift-setup-project.yml b/playbooks/openshift-setup-project.yml
index b36301b..8a8c49a 100644
--- a/playbooks/openshift-setup-project.yml
+++ b/playbooks/openshift-setup-project.yml
@@ -1,7 +1,4 @@
-- name: Analyze Ands configuration
- hosts: nodes
- roles:
- - { role: ands_facts }
+- import_playbook: maintain.yml
- name: Configure per-node {{ kaas_single_project }} project storage
hosts: ands_storage_servers
diff --git a/playbooks/openshift-setup-projects.yml b/playbooks/openshift-setup-projects.yml
index 16b9e66..aac5eb0 100644
--- a/playbooks/openshift-setup-projects.yml
+++ b/playbooks/openshift-setup-projects.yml
@@ -1,7 +1,4 @@
-- name: Analyze Ands configuration
- hosts: nodes
- roles:
- - { role: ands_facts }
+- import_playbook: maintain.yml
- name: Configure users & user projects
hosts: masters
diff --git a/playbooks/openshift-setup-security.yml b/playbooks/openshift-setup-security.yml
index ba96354..af7b9e9 100644
--- a/playbooks/openshift-setup-security.yml
+++ b/playbooks/openshift-setup-security.yml
@@ -1,7 +1,4 @@
-- name: Analyze Ands configuration
- hosts: nodes
- roles:
- - { role: ands_facts }
+- import_playbook: maintain.yml
- name: Configure security
hosts: masters
diff --git a/playbooks/openshift-setup-storage.yml b/playbooks/openshift-setup-storage.yml
index 64099bc..7bc1b22 100644
--- a/playbooks/openshift-setup-storage.yml
+++ b/playbooks/openshift-setup-storage.yml
@@ -1,8 +1,5 @@
---
-- name: Analyze Ands configuration
- hosts: nodes
- roles:
- - { role: ands_facts }
+- import_playbook: maintain.yml
- name: Configure GlusterFS storage
hosts: nodes
diff --git a/playbooks/openshift-setup-users.yml b/playbooks/openshift-setup-users.yml
index 998dd59..03057d9 100644
--- a/playbooks/openshift-setup-users.yml
+++ b/playbooks/openshift-setup-users.yml
@@ -1,7 +1,4 @@
-- name: Analyze Ands configuration
- hosts: nodes
- roles:
- - { role: ands_facts }
+- import_playbook: maintain.yml
- name: Configure users
hosts: masters
diff --git a/playbooks/openshift-setup-vpn.yml b/playbooks/openshift-setup-vpn.yml
index c6db977..ccac69e 100644
--- a/playbooks/openshift-setup-vpn.yml
+++ b/playbooks/openshift-setup-vpn.yml
@@ -1,7 +1,4 @@
-- name: Analyze Ands configuration
- hosts: ands
- roles:
- - role: ands_facts
+- import_playbook: maintain.yml
- name: OpenVPN service
hosts: nodes
diff --git a/playbooks/openshift-setup.yml b/playbooks/openshift-setup.yml
index d5675e4..4af10cb 100644
--- a/playbooks/openshift-setup.yml
+++ b/playbooks/openshift-setup.yml
@@ -1,7 +1,9 @@
-- name: Analyze Ands configuration
+- import_playbook: maintain.yml
+
+- name: Configure Firewall
hosts: ands
roles:
- - role: ands_facts
+ - { role: ands_network, action: firewall }
- name: Various OpenShift resources
hosts: nodes
diff --git a/playbooks/software.yml b/playbooks/software.yml
new file mode 100644
index 0000000..884be34
--- /dev/null
+++ b/playbooks/software.yml
@@ -0,0 +1,12 @@
+#- name: Determin Ands facts
+# hosts: ands
+# roles:
+# - role: ands_facts
+
+- name: Add missing software
+ hosts: ands
+ roles:
+ - role_includer
+ vars:
+ role_includer_name: ands_common
+ role_includer_tasks: software.yml
diff --git a/roles/ands_common/tasks/main.yml b/roles/ands_common/tasks/main.yml
index e9196ad..4027b7a 100644
--- a/roles/ands_common/tasks/main.yml
+++ b/roles/ands_common/tasks/main.yml
@@ -16,7 +16,6 @@
package: name={{item}} state=present
register: result
with_items:
- - mc
- bzr
- git
- yamllint
@@ -29,7 +28,6 @@
- PyYAML
- python-rhsm-certificates
- glusterfs-fuse
- - telnet
- yum-plugin-versionlock
# We always update on first install and if requested
@@ -42,6 +40,5 @@
# with_items:
# - nodejs
-- name: Ensure all extra packages are installed
- package: name={{item}} state=present
- with_items: "{{ extra_packages | default([]) }}"
+- name: Install additional software
+ include_tasks: software.yml
diff --git a/roles/ands_common/tasks/software.yml b/roles/ands_common/tasks/software.yml
new file mode 100644
index 0000000..ea37b51
--- /dev/null
+++ b/roles/ands_common/tasks/software.yml
@@ -0,0 +1,18 @@
+- name: Install various administrative tools
+ package: name={{item}} state=present
+ with_items:
+ - mc
+ - telnet
+ - lsof
+ - strace
+
+# We also can install something conditionally
+#- name: Install various administrative tools
+# package: name={{item}} state=present
+# when: 'ands_storage_servers' in group_names
+# with_items:
+
+
+- name: Ensure all extra packages are installed
+ package: name={{item}} state=present
+ with_items: "{{ extra_packages | default([]) }}"
diff --git a/roles/ands_facts/tasks/main.yml b/roles/ands_facts/tasks/main.yml
index 54c800a..5f1a5d7 100644
--- a/roles/ands_facts/tasks/main.yml
+++ b/roles/ands_facts/tasks/main.yml
@@ -1,9 +1,4 @@
---
-# We need all mount points ready
-- name: "Run mount -a"
- command: mount -a
- changed_when: false
-
# Here we set 'openshift_hostname', 'openshift_ip' and other variables
- name: "Configuring network facts"
include_tasks: "network.yml"
diff --git a/roles/ands_facts/tasks/node.yml b/roles/ands_facts/tasks/node.yml
index e30442c..59d2d80 100644
--- a/roles/ands_facts/tasks/node.yml
+++ b/roles/ands_facts/tasks/node.yml
@@ -1,5 +1,5 @@
-- name: "Associating public host names with ids"
+- name: "Associating public host names with ids (host_id: {{ host_id }})"
set_fact: "ands_host_{{ host_id }}_public_hostname={{ host.value['ands_openshift_public_hostname'] }}"
-- name: "Associating openshift fqdn with ids"
+- name: "Associating openshift fqdn with ids (host_id: {{ host_id }})"
set_fact: "ands_host_{{ host_id }}_openshift_fqdn={{ host.value['ands_openshift_fqdn'] }}"
diff --git a/roles/ands_facts/tasks/nodes.yml b/roles/ands_facts/tasks/nodes.yml
index ebe8091..d9f61df 100644
--- a/roles/ands_facts/tasks/nodes.yml
+++ b/roles/ands_facts/tasks/nodes.yml
@@ -3,6 +3,7 @@
run_once: true
delegate_to: "{{ groups['masters'][0] }}"
with_dict: "{{ hostvars }}"
+ when: host.key in groups['ands']
vars:
host_id: "{{ host.value['ands_host_id'] }}"
host_name: "{{ host.value['ansible_hostname'] }}"
diff --git a/roles/ands_kaas/tasks/do_apps.yml b/roles/ands_kaas/tasks/do_apps.yml
index 39283b4..0e49981 100644
--- a/roles/ands_kaas/tasks/do_apps.yml
+++ b/roles/ands_kaas/tasks/do_apps.yml
@@ -1,7 +1,7 @@
- name: "Process KaaS apps"
include_tasks: "template.yml"
run_once: true
- with_dict: "{{ kaas_project_apps }}"
+ with_dict: "{{ kaas_project_apps | default({}) }}"
loop_control:
loop_var: appitem
when:
diff --git a/roles/ands_kaas/tasks/do_storage.yml b/roles/ands_kaas/tasks/do_storage.yml
index e79db56..8a6a880 100644
--- a/roles/ands_kaas/tasks/do_storage.yml
+++ b/roles/ands_kaas/tasks/do_storage.yml
@@ -18,7 +18,7 @@
volume: "{{osv.value}}"
when:
- ( mntpath | length ) > 0
- - (kaas_storage_types is not defined) or ((osv.type | default("host")) in kaas_storage_types)
+ - (kaas_storage_types is not defined) or (voltype in kaas_storage_types)
- name: Check if static configuration exists
local_action: stat path="{{ kaas_project_path }}/files/"
@@ -50,4 +50,4 @@
hostpath: "{{ hostvars[inventory_hostname][hvar] }}/{{ file.path }}"
when:
- file.osv in kaas_project_volumes
- - (kaas_storage_types is not defined) or ((osv.type | default("host")) in kaas_storage_types)
+ - (kaas_storage_types is not defined) or (voltype in kaas_storage_types)
diff --git a/roles/ands_kaas/tasks/project.yml b/roles/ands_kaas/tasks/project.yml
index 26bd0cc..879f34a 100644
--- a/roles/ands_kaas/tasks/project.yml
+++ b/roles/ands_kaas/tasks/project.yml
@@ -59,7 +59,7 @@
kaas_project_volumes: "{{ kaas_project_config.volumes | default(kaas_project_config.extra_volumes | default({}) | combine(kaas_openshift_volumes)) }}"
kaas_project_local_volumes: "{{ kaas_project_config.local_volumes | default({}) }}"
kaas_project_pods: "{{ kaas_project_config.pods | default({}) }}"
- kaas_project_apps: "{{ kaas_project_config.apps | default([]) }}"
+ kaas_project_apps: "{{ kaas_project_config.apps | default({}) }}"
kaas_project_gids: "{{ kaas_project_config.gids | default(kaas_openshift_gids) }}"
kaas_project_uids: "{{ kaas_project_config.uids | default(kaas_openshift_uids) }}"
kaas_blockvol_info: "{{ block_info }}"
diff --git a/roles/ands_kaas/tasks/template.yml b/roles/ands_kaas/tasks/template.yml
index 87e45a6..841c80e 100644
--- a/roles/ands_kaas/tasks/template.yml
+++ b/roles/ands_kaas/tasks/template.yml
@@ -20,8 +20,8 @@
template: "{{ dest_name }}"
template_path: "{{ kaas_template_path }}"
project: "{{ kaas_project }}"
- recreate: "{{ result | changed | ternary (delete | ternary(true, false), false) }}"
- replace: "{{ result | changed | ternary (delete | ternary(false, true), false) }}"
+ recreate: "{{ result | changed | ternary (delete | default(true) | ternary(true, false), false) }}"
+ replace: "{{ result | changed | ternary (delete | default(true) | ternary(false, true), false) }}"
# alternatively load template
# TODO
diff --git a/roles/ands_network/tasks/nm_configure.yml b/roles/ands_network/tasks/nm_configure.yml
index 57e40ca..5484bb2 100644
--- a/roles/ands_network/tasks/nm_configure.yml
+++ b/roles/ands_network/tasks/nm_configure.yml
@@ -47,6 +47,3 @@
iface: "{{ ands_public_interface }}"
cidr: "{{ ands_openshift_public_cidr }}"
alias: true
-
-- name: Configure firewall
- include_tasks: firewall.yml
diff --git a/roles/ands_storage/tasks/main.yml b/roles/ands_storage/tasks/main.yml
index 8e9d44b..7146da0 100644
--- a/roles/ands_storage/tasks/main.yml
+++ b/roles/ands_storage/tasks/main.yml
@@ -43,14 +43,18 @@
lvol: vg="{{ ands_data_vg }}" lv="{{ ands_data_lv }}" size="{{ ands_data_volume_size }}"
- name: Ensure Ands Data Volume is formatted and resize if necessary
- filesystem: fstype="xfs" resizefs="yes" dev="/dev/{{ ands_data_vg }}/{{ ands_data_lv }}"
+ filesystem: fstype="xfs" dev="/dev/{{ ands_data_vg }}/{{ ands_data_lv }}"
- name: Mount Ands Data Volume
mount: name="{{ ands_data_path }}" src="/dev/{{ ands_data_vg }}/{{ ands_data_lv }}" fstype="{{ ands_data_fs }}" opts="defaults" state="mounted"
+# System complains if we try to resize unmounted file system
+- name: Ensure Ands Data Volume is formatted and resize if necessary
+ filesystem: fstype="xfs" resizefs="yes" dev="/dev/{{ ands_data_vg }}/{{ ands_data_lv }}"
+
- name: Provision Ands local storage domains
include_tasks: hostmount.yml
with_items: "{{ ands_local_storage_domains | default([]) }}"
- when: domain.servers | intersect(group_names) | length > 0
+ when: domain.servers in group_names
loop_control:
loop_var: domain
diff --git a/roles/openshift_resource/tasks/template.yml b/roles/openshift_resource/tasks/template.yml
index 3469464..f43b0f2 100644
--- a/roles/openshift_resource/tasks/template.yml
+++ b/roles/openshift_resource/tasks/template.yml
@@ -22,8 +22,8 @@
when: ((recreate|default(false)) or (results | changed)) and (results.results[item|int].rc == 0)
# Replace often complains on various immutable variables it can't change. We ignore.
- - name: "{{ template }}: Populate resources to {{project}} ({{ replace | ternary('replace', 'create') }})"
- shell: "oc process -n {{project}} -f '{{ template_path }}/{{template}}' {{ template_args | default('') }} | oc {{ replace | ternary('replace', 'create') }} -n {{project}} -f - {{ create_args | default('') }}"
+ - name: "{{ template }}: Populate resources to {{project}} ({{ replace | default(false) | ternary('replace', 'create') }})"
+ shell: "oc process -n {{project}} -f '{{ template_path }}/{{template}}' {{ template_args | default('') }} | oc {{ replace | default(false) | ternary('replace', 'create') }} -n {{project}} -f - {{ create_args | default('') }}"
register: status
failed_when: (status.rc != 0) and not (replace | default(false))
when:
diff --git a/roles/role_includer/tasks/main.yml b/roles/role_includer/tasks/main.yml
new file mode 100644
index 0000000..1044236
--- /dev/null
+++ b/roles/role_includer/tasks/main.yml
@@ -0,0 +1,5 @@
+# This is not working in prestine ansible 2.4.3, needs a patch back-ported from upstream (patch released in February 2018)
+- name: "Processing tasks in '{{ role_includer_tasks }}' from role '{{ role_includer_name }}'"
+ include_role:
+ name: "{{ role_includer_name }}"
+ tasks_from: "{{ role_includer_tasks | default('main.yml') }}"
diff --git a/setup.sh b/setup.sh
index 57e002c..ec862d6 100755
--- a/setup.sh
+++ b/setup.sh
@@ -117,6 +117,9 @@ case "$action" in
maintain)
apply playbooks/maintain.yml "$@" || exit
;;
+ software)
+ apply playbooks/software.yml "$@" || exit
+ ;;
setup)
subrole=$2
shift
diff --git a/setup/projects/adei/templates/01-secret.yml.j2 b/setup/projects/adei/templates/01-secret.yml.j2
index 44d5914..d792e4c 100644
--- a/setup/projects/adei/templates/01-secret.yml.j2
+++ b/setup/projects/adei/templates/01-secret.yml.j2
@@ -1,7 +1,7 @@
apiVersion: v1
kind: Template
metadata:
- name: adei-build
+ name: adei-secret
labels:
app: adei
annotations:
diff --git a/setup/projects/adei/templates/01-webdav-secret.yml.j2 b/setup/projects/adei/templates/01-webdav-secret.yml.j2
new file mode 100644
index 0000000..f09b1ec
--- /dev/null
+++ b/setup/projects/adei/templates/01-webdav-secret.yml.j2
@@ -0,0 +1,17 @@
+apiVersion: v1
+kind: Template
+metadata:
+ name: adei-webdav-secret
+ labels:
+ app: adei
+ annotations:
+ descriptions: "ADEI Repository Secrets"
+objects:
+- apiVersion: v1
+ kind: Secret
+ metadata:
+ annotations:
+ template.openshift.io/expose-repo: '{.data[''repo'']}'
+ name: webdav
+ stringData:
+ repo: "https+webdav://adei:{{ kaas_project_config.adei_password }}@darksoft.org/openshift"
diff --git a/setup/projects/adei/templates/60-adei.yml.j2 b/setup/projects/adei/templates/60-adei.yml.j2
index 7eafd33..b3e6755 100644
--- a/setup/projects/adei/templates/60-adei.yml.j2
+++ b/setup/projects/adei/templates/60-adei.yml.j2
@@ -24,7 +24,7 @@ objects:
{% set pull_policy = "Always" %}
{% set restart_policy = "Always" %}
{% else %}
-{% set pod_type = "cacher" %}
+{% set pod_type = cfg.type | default("cacher") %}
{% set pull_policy = "Always" %}
{% set restart_policy = "Always" %}
{% endif %}
@@ -247,3 +247,6 @@ parameters:
description: "Schedule of cleaning tasks"
- name: "adei_revision"
value: "last:1"
+ - name: "maintainer"
+ value: "Suren A. Chilingaryan <csa@suren.me>"
+ \ No newline at end of file
diff --git a/setup/projects/adei/vars/apps.yml b/setup/projects/adei/vars/apps.yml
index 20cdefe..1c2aad3 100644
--- a/setup/projects/adei/vars/apps.yml
+++ b/setup/projects/adei/vars/apps.yml
@@ -1,5 +1,5 @@
apps:
- mysql: { provision: true, instantiate: false }
+ mysql: { provision: true, instantiate: true }
galera: { provision: true, instantiate: false }
# simple_mysql: { provision: false, instantiate: false }
phpmyadmin: { provision: true, instantiate: true }
diff --git a/setup/projects/adei/vars/globals.yml b/setup/projects/adei/vars/globals.yml
index 8435926..fef5a5b 100644
--- a/setup/projects/adei/vars/globals.yml
+++ b/setup/projects/adei/vars/globals.yml
@@ -24,6 +24,13 @@ adei_pod_env:
value: "${adei_revision}"
- name: "ADEI_PATH"
value: "/adei/src"
+ - name: "ADEI_CACHE_ENGINE"
+ value: "INNODB"
+ - name: "ADEI_REPOSITORY"
+ valueFrom:
+ secretKeyRef:
+ name: webdav
+ key: repo
adei_prod_env:
- name: "MYSQL_SERVER"
@@ -81,6 +88,28 @@ adei_update_env:
- name: "ADEI_CONTINUOUS_CACHING"
value: "${continuous_caching}"
+adei_admin_env:
+ - name: "ADEI_ADMIN"
+ value: "1"
+ - name: "MYSQL_SERVER"
+ value: "mysql-master.adei.svc.cluster.local"
+ - name: "ADEI_SETUP"
+ value: "${setup}"
+ - name: "ADEI_URL"
+ value: "http://adei-${setup}-debug.adei.svc.cluster.local/adei"
+ - name: "ADEI_SCHEDULER"
+ value: "${sched_parallel}"
+ - name: "ADEI_PARALLEL"
+ value: "${cache_parallel}"
+ - name: "BZR_EMAIL"
+ value: "${maintainer}"
+ - name: "ENV"
+ value: "~/.bashrc"
+ - name: "HOME"
+ value: "/data/home"
+ - name: "SHELL"
+ value: "/bin/bash"
+
adei_cache_env:
- name: "MYSQL_SERVER"
value: "mysql-master.adei.svc.cluster.local"
@@ -119,9 +148,6 @@ adei_log_cache_env:
adei_pod_vols:
- - name: adei-etc
- persistentVolumeClaim:
- claimName: adei-etc
- name: adei-src
persistentVolumeClaim:
claimName: adei-src
@@ -138,15 +164,10 @@ adei_pod_vols:
persistentVolumeClaim:
claimName: adei-log
-adei_prod_mounts:
- - name: adei-src
- subPath: prod
- mountPath: /adei/src
-
-adei_dbg_mounts:
- - name: adei-src
- subPath: dbg
- mountPath: /adei/src
+adei_admin_vols:
+ - name: adei-data
+ persistentVolumeClaim:
+ claimName: adei-data
adei_pod_mounts:
- name: adei-cfg
@@ -162,6 +183,20 @@ adei_pod_mounts:
subPath: "${setup}/apache2"
mountPath: /var/log/apache2
+adei_prod_mounts:
+ - name: adei-src
+ subPath: prod
+ mountPath: /adei/src
+
+adei_dbg_mounts:
+ - name: adei-src
+ subPath: dbg
+ mountPath: /adei/src
+
+adei_admin_mounts:
+ - name: adei-data
+ mountPath: /data
+
adei_frontends:
frontend:
name: "adei-${setup}"
@@ -246,6 +281,16 @@ adei_frontends:
mounts: "{{ adei_prod_mounts | union(adei_pod_mounts) }}"
groups: [ "adei" ]
enabled: true
+ admin:
+ name: "adei-${setup}-admin"
+ type: admin
+ replicas: 0
+ cmd: [ "/docker-entrypoint.sh", "/adei/src/scripts/system/adminer.sh" ]
+ env: "{{ adei_pod_env | union(adei_admin_env) }}"
+ vols: "{{ adei_pod_vols | union(adei_admin_vols) }}"
+ mounts: "{{ adei_dbg_mounts | union(adei_pod_mounts) | union(adei_admin_mounts) }}"
+ groups: [ "adei" ]
+ enabled: true
# Extra options:
# start_tolerance: 30
diff --git a/setup/projects/adei/vars/mysql.yml b/setup/projects/adei/vars/mysql.yml
index 3349598..cf72c90 100644
--- a/setup/projects/adei/vars/mysql.yml
+++ b/setup/projects/adei/vars/mysql.yml
@@ -27,6 +27,8 @@ mysql:
- { name: "MYSQL_PMA_PASSWORD", value: "secret@adei/pma-password" }
- { name: "MYSQL_MAX_CONNECTIONS", value: "500" }
- { name: "MYSQL_SYNC_BINLOG", value: "0" }
+ - { name: "MYSQL_BINLOG_SYNC_DELAY", value: "25000" }
+ - { name: "MYSQL_BINLOG_NODELAY_COUNT", value: "32" }
- { name: "MYSQL_FLUSH_LOG_TYPE", value: "2" }
- { name: "MYSQL_FLUSH_LOG_TIMEOUT", value: "300" }
- { name: "MYSQL_BINLOG_FORMAT", value: "MIXED" }
@@ -75,10 +77,14 @@ mysql:
- { name: "MYSQL_MASTER_PASSWORD", value: "secret@adei/service-password" }
- { name: "MYSQL_PMA_PASSWORD", value: "secret@adei/pma-password" }
- { name: "MYSQL_MAX_CONNECTIONS", value: "500" }
+ - { name: "MYSQL_LOG_BIN", value: "1" }
- { name: "MYSQL_SYNC_BINLOG", value: "0" }
+ - { name: "MYSQL_LOG_SLAVE_UPDATES", value: "0" }
+ - { name: "MYSQL_BINLOG_SYNC_DELAY", value: "25000" }
+ - { name: "MYSQL_BINLOG_NODELAY_COUNT", value: "32" }
- { name: "MYSQL_FLUSH_LOG_TYPE", value: "2" }
- { name: "MYSQL_FLUSH_LOG_TIMEOUT", value: "300" }
- - { name: "MYSQL_SLAVE_WORKERS", value: "8" }
+ - { name: "MYSQL_SLAVE_WORKERS", value: "16" }
- { name: "MYSQL_SLAVE_SKIP_ERRORS", value: "1007,1008,1050,1051,1054,1060,1061,1068,1094,1146,1304,1359,1476,1537" }
- { name: "MYSQL_BINLOG_FORMAT", value: "MIXED" }
mappings:
diff --git a/setup/projects/adei/vars/mysql_galera.yml b/setup/projects/adei/vars/mysql_galera.yml
index e986268..a927e5c 100644
--- a/setup/projects/adei/vars/mysql_galera.yml
+++ b/setup/projects/adei/vars/mysql_galera.yml
@@ -71,5 +71,4 @@ galera:
- { name: "POD_NAMESPACE", value: "fieldref@metadata.namespace" }
- { name: "MYSQL_GALERA_CLUSTER", value: "galera-ss" }
mappings:
- - { name: "adei_init", mount: "/var/lib/init" }
- { name: "adei_host", path: "galera", mount: "/var/lib/mysql/data" }
diff --git a/setup/projects/adei/vars/volumes.yml b/setup/projects/adei/vars/volumes.yml
index fdceaae..15795b3 100644
--- a/setup/projects/adei/vars/volumes.yml
+++ b/setup/projects/adei/vars/volumes.yml
@@ -4,11 +4,11 @@ gids:
volumes:
adei_host: { volume: "hostraid", path: "/adei", write: true } # mysql
- adei_init: { volume: "openshift", path: "/adei/init"} # mysql
- adei_etc: { volume: "openshift", path: "/adei/etc"} # mysql (maybe)
+ adei_data: { volume: "datastore", path: "/adei", write: true } # temporary home for administrator pods
+ adei_init: { volume: "openshift", path: "/adei/init"} # simple mysql (or obsolete)
adei_src: { volume: "openshift", path: "/adei/src", write: true } # prod & debug (init creates setup links)
adei_cfg: { volume: "openshift", path: "/adei/cfg", write: true } # per-setup configs (ADEI/wiki modifies setup)
- adei_sys: { volume: "openshift", path: "/adei/sys" } # per-setup cron-jon overrides
+ adei_sys: { volume: "openshift", path: "/adei/sys", write: true } # per-setup cron-jon overrides (it seems log intensively complains if we mount the same volume read-only and read-write)
adei_tmp: { volume: "temporary", path: "/adei/tmp", write: true } # per-setup temporary files
adei_log: { volume: "temporary", path: "/adei/log", write: true } # per-replica (should be fine) temporary files
# adei_db: { volume: "databases", path: "/adei", write: true } # mysql
@@ -26,6 +26,8 @@ files:
- { osv: "adei_src", path: "/dbg", state: "directory", group: "adei", mode: "02775" }
- { osv: "adei_log", path: "/", state: "directory", group: "adei", mode: "02775" }
- { osv: "adei_tmp", path: "/", state: "directory", group: "adei", mode: "02775" }
+ - { osv: "adei_data",path: "/", state: "directory", group: "adei", mode: "02775" }
+ - { osv: "adei_data",path: "/home", state: "directory", group: "adei", mode: "02775" }
- { osv: "adei_host",path: "mysql", state: "directory", group: "adei_db", mode: "02775" }
- { osv: "adei_host",path: "galera", state: "directory", group: "adei_db", mode: "02775" }
- { osv: "adei_host",path: "mysql_master", state: "directory", group: "adei_db", mode: "02775" }