Commit 52c8246a authored by Hugo Hörnquist's avatar Hugo Hörnquist

Broke out into own module

parents
[global]
fsid = 65e079b0-3d97-4446-a3e0-0b625c489b99
mon_initial_members = vogon-0, vogon-1, vogon-2
mon_host = 10.44.1.98,10.44.1.99,10.44.1.100
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
NodeName=n1593 Name=gpu Type=k40 File=/dev/nvidia[0-1]
NodeName=n1594 Name=mic Type=31s1 Count=3
NodeName=n[1595,1596] Name=mic Type=7120 Count=1
#NodeName=n1597 Name=gpu Type=k20x File=/dev/nvidia0
#NodeName=n1598 Name=gpu Type=k20x File=/dev/nvidia[0-1]
NodeName=n1599 Name=gpu Type=k20x File=/dev/nvidia[0-2]
# Connect to home via infiniband
10.44.1.220 home
10.44.4.1 analysator-system.lysator.liu.se analysator-system
10.41.0.1 analysator-system-eth
10.41.0.185 ne1585
10.41.0.186 ne1586
10.41.0.187 ne1587
10.41.0.188 ne1588
10.41.0.193 ne1593
10.41.0.194 ne1594
10.41.0.195 ne1595
10.41.0.196 ne1596
10.41.0.197 ne1597
10.41.0.198 ne1598
10.41.0.199 ne1599
10.41.0.200 ne1600
10.41.0.201 ne1601
10.41.0.202 ne1602
10.44.4.185 n1585
10.44.4.186 n1586
10.44.4.187 n1587
10.44.4.188 n1588
10.44.4.193 n1593
10.44.4.194 n1594
10.44.4.195 n1595
10.44.4.196 n1596
10.44.4.197 n1597
10.44.4.198 n1598
10.44.4.199 n1599
10.44.4.200 n1600
10.44.4.201 n1601
10.44.4.202 n1602
\ No newline at end of file
# slurm.conf file generated by configurator easy.html.
# Put this file on all nodes of your cluster.
# See the slurm.conf man page for more information.
#
SlurmctldHost=analysator-system
#
#MailProg=/bin/mail
MpiDefault=none
#MpiParams=ports=#-#
ProctrackType=proctrack/linuxproc
ReturnToService=2
SlurmctldPidFile=/var/run/slurmctld.pid
#SlurmctldPort=6817
SlurmdPidFile=/var/run/slurmd.pid
#SlurmdPort=6818
SlurmdSpoolDir=/opt/slurm/spool
SlurmUser=slurm
#SlurmdUser=root
StateSaveLocation=/opt/slurm
SwitchType=switch/none
TaskPlugin=task/affinity
#
#
# TIMERS
#KillWait=30
#MinJobAge=300
#SlurmctldTimeout=120
#SlurmdTimeout=300
#
#
# SCHEDULING
FastSchedule=1
SchedulerType=sched/backfill
SelectType=select/cons_res
SelectTypeParameters=CR_Core
#
#
# LOGGING AND ACCOUNTING
AccountingStorageType=accounting_storage/filetxt
ClusterName=analysator
JobCompType=jobcomp/script
JobCompLoc=/etc/slurm/slurm_jobcomp_logger
#JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/linux
#SlurmctldDebug=info
#SlurmctldLogFile=
#SlurmdDebug=info
#SlurmdLogFile=
#
GresTypes=gpu,mic
#
# COMPUTE NODES
NodeName=n1599 CPUs=16 Boards=1 SocketsPerBoard=2 CoresPerSocket=8 ThreadsPerCore=1 RealMemory=64222 Gres=gpu:k20x:3 # K20 x 3
NodeName=n1596 CPUs=16 Boards=1 SocketsPerBoard=2 CoresPerSocket=8 ThreadsPerCore=1 RealMemory=64222 Gres=mic:7120:1 # Xeon Phi SE10/7120
NodeName=n1595 CPUs=16 Boards=1 SocketsPerBoard=2 CoresPerSocket=8 ThreadsPerCore=1 RealMemory=64222 Gres=mic:7120:1 # Xeon Phi SE10/7120
NodeName=n1593 CPUs=16 Boards=1 SocketsPerBoard=2 CoresPerSocket=8 ThreadsPerCore=1 RealMemory=64222 Gres=gpu:k40:2 # K40 x 2
NodeName=n1594 CPUs=16 Boards=1 SocketsPerBoard=2 CoresPerSocket=8 ThreadsPerCore=1 RealMemory=64222 Gres=mic:31s1:3 # Xeon Phi 31S1
NodeName=n1585 CPUs=16 Boards=1 SocketsPerBoard=2 CoresPerSocket=8 ThreadsPerCore=1 RealMemory=31968 # CPU-only
NodeName=n1586 CPUs=16 Boards=1 SocketsPerBoard=2 CoresPerSocket=8 ThreadsPerCore=1 RealMemory=31968 # CPU-only
NodeName=n1587 CPUs=16 Boards=1 SocketsPerBoard=2 CoresPerSocket=8 ThreadsPerCore=1 RealMemory=31968 # CPU-only
NodeName=n1588 CPUs=16 Boards=1 SocketsPerBoard=2 CoresPerSocket=8 ThreadsPerCore=1 RealMemory=31968 # CPU-only
# GPU/MIC Gres settings
#NodeName=n1593 Weight=10 Feature=vtune,test-sjd,test-sjr Gres=gpu:k40:2
#NodeName=n1594 Weight=10 Feature=vtune,test-sjd,test-sjr Gres=mic:31s1:3
#NodeName=n[1595,1596] Weight=10 Feature=vtune,test-sjd,test-sjr Gres=mic:7120:1
##NodeName=n1597 Weight=10 Feature=vtune,test-sjd,test-sjr Gres=gpu:k20x:1
##NodeName=n1598 Weight=10 Feature=vtune,test-sjd,test-sjr Gres=gpu:k20x:2
#NodeName=n1599 Weight=10 Feature=vtune,test-sjd,test-sjr Gres=gpu:k20x:3
PartitionName=compute Nodes=n1599,n1596,n1595,n1585,n1586,n1587,n1588,n1593,n1594 Default=YES MaxTime=INFINITE State=UP
#!/usr/bin/env python3
import datetime
import json
import os
def try_int(v):
if v.isdigit():
return int(v)
return v
extra_order_fields = [
'jobid',
'jobstate',
'jobname',
'username',
'start',
'end',
'nodecnt',
'procs',
'nodes',
]
extra_order = dict(zip(extra_order_fields, range(len(extra_order_fields))))
job_info = dict(sorted({k.lower(): try_int(v) for k, v in os.environ.items()}.items(), key=lambda x: (extra_order.get(x[0], 1000), x[0])))
now = datetime.datetime.utcnow()
with open(now.strftime('/var/log/slurm/accounting/%Y-%m-%d'), 'a') as f:
f.write(json.dumps({'timestamp': now.isoformat(timespec='microseconds')+'Z', **job_info}) + '\n')
allow bootp;
allow booting;
next-server 10.41.0.1;
filename "pxelinux.0";
default-lease-time 600;
max-lease-time 600;
subnet 10.41.0.0 netmask 255.255.255.0 {
option routers 10.41.0.1;
option subnet-mask 255.255.255.0;
option domain-search "lysator.liu.se";
option domain-name-servers 130.236.254.225;
range 10.41.0.10 10.41.0.60;
}
host n37 {
hardware ethernet 00:25:b3:24:43:50;
fixed-address 10.41.0.137;
option host-name "n37";
}
host n1593 {
hardware ethernet c8:cb:b8:cb:dc:d6;
fixed-address 10.41.0.193;
option host-name "n1593";
}
host n1594 {
hardware ethernet 9C:B6:54:01:ED:B6;
fixed-address 10.41.0.194;
option host-name "n1594";
}
host n1595 {
hardware ethernet 28:92:4a:38:e2:b2;
fixed-address 10.41.0.195;
option host-name "n1595";
}
host n1596 {
hardware ethernet c8:cb:b8:d0:dc:e2;
fixed-address 10.41.0.196;
option host-name "n1596";
}
host n1597 {
hardware ethernet c8:cb:b8:d0:df:6a;
fixed-address 10.41.0.197;
option host-name "n1597";
}
host n1598 {
hardware ethernet c8:cb:b8:d0:df:86;
fixed-address 10.41.0.198;
option host-name "n1598";
}
host n1599 {
hardware ethernet c8:cb:b8:d0:e5:1a;
fixed-address 10.41.0.199;
option host-name "n1599";
}
host n1600 {
hardware ethernet c8:cb:b8:d0:df:5e;
fixed-address 10.41.0.200;
option host-name "n1600";
}
host n1601 {
hardware ethernet e8:39:35:be:9f:60;
fixed-address 10.41.0.201;
option host-name "n1601";
}
host n1602 {
hardware ethernet 98:4b:e1:6c:30:fc;
fixed-address 10.41.0.202;
option host-name "n1602";
}
host n1585 {
hardware ethernet c8:cb:b8:d0:e7:aa;
fixed-address 10.41.0.185;
option host-name "n1585";
}
host n1586 {
hardware ethernet c8:cb:b8:cf:59:04;
fixed-address 10.41.0.186;
option host-name "n1586";
}
host n1587 {
hardware ethernet c8:cb:b8:d0:e5:8e;
fixed-address 10.41.0.187;
option host-name "n1587";
}
host n1588 {
hardware ethernet c8:cb:b8:d0:e5:4a;
fixed-address 10.41.0.188;
option host-name "n1588";
}
#version=DEVEL
# System authorization information
auth --enableshadow --passalgo=sha512
# Use network installation
url --url="http://ftp.lysator.liu.se/centos/7/os/x86_64/"
# Use text install
text
# Run the Setup Agent on first boot
firstboot --enable
ignoredisk --only-use=sda
# Keyboard layouts
keyboard --vckeymap=se --xlayouts='se'
# System language
lang en_US.UTF-8
# Reboot after installation
reboot
# Network information
network --bootproto=dhcp --device=eno1 --ipv6=auto --activate
network --bootproto=dhcp --device=eno2 --onboot=off --ipv6=auto
network --bootproto=dhcp --device=eno3d1 --onboot=off --ipv6=auto
network --hostname=localhost.localdomain
# Root password
rootpw --iscrypted $6$9ywFAgrR8M6Fe7cL$LIFw7kxQ4S8.C0jLpdBDSJBCO5ZZMCtWv6Y88xjSnRtdEmRO8unYQw1G.qDw9k2HS4Z.YbXXoj8oMos3ZzjKz.
# System services
services --enabled="chronyd"
# System timezone
timezone Europe/Stockholm --isUtc
# System bootloader configuration
bootloader --append=" crashkernel=auto" --location=mbr --boot-drive=sda
# Partition clearing information
clearpart --all --initlabel --drives=sda
# Disk partitioning information
part /boot --fstype="xfs" --ondisk=sda --size=1024
part pv.232 --fstype="lvmpv" --ondisk=sda --size=474631
volgroup centos_n1599 --pesize=4096 pv.232
logvol / --fstype="xfs" --size=442368 --name=root --vgname=centos_n1599
logvol swap --fstype="swap" --size=32256 --name=swap --vgname=centos_n1599
%packages
@^minimal
@core
chrony
kexec-tools
%end
%addon com_redhat_kdump --enable --reserve-mb='auto'
%end
%post --log=/root/ks-post.log
mkdir /root/.ssh
echo "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDEjpMM8PThEq6ZWksTHjH+zklicY2m/la7VS1FTXOUz2G0Z6eRI22icgSgVNKowuPsfEp0f1YKwmcdiMOOZLp4zvqdbIJEWwZRcXqbHhpQMgub+Wfu8i91CY/CCnniQen1LQzqDed3X/wOTjkfEiNMAUZo1pQnuaOYY/RmwPlmLsZVaTgNsqF8g6deVBQ463T4x3S5MajUgfEwIeLhv7k2aCFayrqRMhPl1ek5Tyw3FEngXi15ah+Jyt/pNK5vE/cH+jskEpd8vw6zYSJUCKvdnqfb6jzKQYRuqXXMYazntvBuPfl4iES+uc9XNEXMVFNO1wu547lE70lCspwlmuEl root@analysator-system.lysator.liu.se" > /root/.ssh/.authorized_keys
yes | rpm -ivh https://yum.puppetlabs.com/puppet5/puppet5-release-el-7.noarch.rpm
yum update -y
yum install -y puppet
%end
%anaconda
pwpolicy root --minlen=6 --minquality=1 --notstrict --nochanges --notempty
pwpolicy user --minlen=6 --minquality=1 --notstrict --nochanges --emptyok
pwpolicy luks --minlen=6 --minquality=1 --notstrict --nochanges --notempty
%end
default menu.c32
prompt 0
timeout 300
ONTIMEOUT local
menu title ###### ANALYSATOR PXE INSTALLER ######
label 1
menu label ^1) hx automagiserade sClentOS-installer
kernel vmlinuz
append initrd=initrd.img method=http://ftp.lysator.liu.se/centos/7/os/x86_64/ devfs=nomount ip=dhcp inst.geoloc=0 ks=http://10.41.0.1/ks.cfg
label 2
menu label ^2) Boot from HDD
# default: off
# description: The tftp server serves files using the trivial file transfer \
# protocol. The tftp protocol is often used to boot diskless \
# workstations, download configuration files to network-aware printers, \
# and to start the installation process for some operating systems.
service tftp
{
socket_type = dgram
protocol = udp
wait = yes
user = root
server = /usr/sbin/in.tftpd
server_args = -s /var/lib/tftpboot
disable = no
per_source = 11
cps = 100 2
flags = IPv4
}
class analysator::common {
include ::analysator::hosts
}
class analysator::desktop {
yum::group {
[
'X Window System',
'Fonts',
'GNOME Desktop',
'Xfce',
'MATE Desktop',
'KDE Plasma Workspaces',
]:
ensure => present,
timeout => 1200,
}
include analysator::packages::basic
package {
[
'ghc-xmonad',
'ghc-xmonad-contrib',
'ghc-xmonad-contrib-devel',
'ghc-xmonad-devel',
]:
ensure => latest,
}
}
class analysator::hosts {
require ::lysnetwork::hosts
require ::lyslagring::hosts
concat::fragment { '/etc/hosts/02-analysator':
target => '/etc/hosts',
source => 'puppet:///modules/analysator/hosts',
}
}
class analysator::login {
include analysator::packages::build_node
$eth_iface='eno2'
network::interface { $eth_iface:
ipaddress => '130.236.254.181',
netmask => '255.255.255.0',
}
}
class analysator::munge {
package { 'munge':
ensure => installed,
}
file { '/etc/munge/munge.key':
ensure => file,
content => 'supersecretreallygoodprivatekey!',
owner => 'munge',
group => 'munge',
mode => '0400',
}
service { 'munge':
ensure => running,
enable => true,
require => [ Package['munge'], File['/etc/munge/munge.key'] ],
subscribe => [ Package['munge'], File['/etc/munge/munge.key'] ],
}
}
class analysator::node::network
(
$login = false,
$public_ip = undef,
) {
require ::lysnetwork::iptables
$last_octet = split($facts['networking']['interfaces']['eno1']['bindings'][0]['address'], '\.')[3]
$ib_iface = 'ib0'
$eth_iface = 'eno1'
if(!$login) {
ipoib::interface { $ib_iface:
ipaddress => "10.44.4.${last_octet}",
netmask => '255.255.0.0',
gateway => '10.44.4.1',
domain => 'lysator.liu.se',
dns1 => '130.236.254.4',
dns2 => '130.236.254.225',
}
}
}
class analysator::node (
$login = false,
){
include ::stdlib
include ::analysator::common
require ::analysator::slurm
include ::analysator::munge
if(!$login) {
service { 'slurmd':
ensure => running,
enable => true,
subscribe => File['/etc/slurm/slurm.conf'],
}
}
ssh_authorized_key { 'root@analysator-system':
ensure => present,
user => 'root',
type => 'ssh-rsa',
key => 'AAAAB3NzaC1yc2EAAAADAQABAAABAQDEjpMM8PThEq6ZWksTHjH+zklicY2m/la7VS1FTXOUz2G0Z6eRI22icgSgVNKowuPsfEp0f1YKwmcdiMOOZLp4zvqdbIJEWwZRcXqbHhpQMgub+Wfu8i91CY/CCnniQen1LQzqDed3X/wOTjkfEiNMAUZo1pQnuaOYY/RmwPlmLsZVaTgNsqF8g6deVBQ463T4x3S5MajUgfEwIeLhv7k2aCFayrqRMhPl1ek5Tyw3FEngXi15ah+Jyt/pNK5vE/cH+jskEpd8vw6zYSJUCKvdnqfb6jzKQYRuqXXMYazntvBuPfl4iES+uc9XNEXMVFNO1wu547lE70lCspwlmuEl',
}
file_line { 'soft memlimit':
path => '/etc/security/limits.conf',
line => '* soft memlock unlimited',
}
file_line { 'hard memlimit':
path => '/etc/security/limits.conf',
line => '* hard memlock unlimited',
}
package { 'environment-modules':
ensure => absent,
}
package { ['Lmod', 'python2-pip']:
ensure => installed,
}
file { '/sw':
ensure => directory,
}
-> file_line { 'mount analysator-sw':
path => '/etc/fstab',
line => 'home:/ceph-home/analysator-sw /sw nfs defaults,bg 0 0',
}
~> exec { '/usr/bin/mount /sw':
refreshonly => true,
}
file { '/etc/profile.d/01-analysator-modulepath.sh':
ensure => file,
content => 'export MODULEPATH=/sw/easybuild/modules/all:$MODULEPATH',
}
package { ['dkms', 'kernel-devel', 'elfutils' ]:
ensure => installed,
}
include analysator::packages::compute_node
include ::analysator::storage
}
# Should be installed everywhere
class analysator::packages::basic
{
package {
[
'emacs', # We are not animals
'ctags-etags',
'ctags',
]:
ensure => installed;
}
}
# Packages needed for running compute workloads
class analysator::packages::compute_node
{
require analysator::packages::basic
package {
[
'flatpak-builder', # FIXME: move to build_node?
'gcc-c++',
'opencl-headers',
'opencl-filesystem',
'ocl-icd',
'perf',
'ruby',
]:
ensure => installed,
}
package {
[
'autofs', # auto.home will override the ceph /home mount
]:
ensure => purged,
}
}
# Packages needed for building, but not needed when running
# (In practise we only have build_node's at the moment)
class analysator::packages::build_node
{
require analysator::packages::compute_node
package {
[
'autoconf',
'automake',
'bison',
'bzip2-devel',
'cmake',
'clang',
'flex',
'ImageMagick-devel',
'libtool',
'libjpeg-turbo-devel',
'libtiff-devel',
'python3',
'ruby-devel',
'ucx-static', # unknown for what, but it's installed on almost all nodes
'qt5-qtbase-devel', # Mame requirement /zino
'qt-devel', # Old Mame requirement, useful but will not cry if it's removed /zino
'SDL2-devel', # Mame requirement /zino
'SDL2_ttf-devel', # Mame requirement /zino
'libXi-devel', # Mame requirement /zino
'alsa-lib-devel', # Mame requirement /zino
'fontconfig-devel', # Mame requirement /zino
'libXinerama-devel', # Mame requirement /zino
'libsqlite3x-devel', # For SQlite module in Pike /zino
]:
ensure => installed;
}
}
# Packages needed for running compute on Nvidia GPUs
#
# NOTE: The early installations on n[1593,1599] by hx where manually
# installed via
# http://us.download.nvidia.com/tesla/410.79/nvidia-diag-driver-local-repo-rhel7-410.79-1.0-1.x86_64.rpm
# No attempt has been made to perfectly clean them up
class analysator::packages::gpu_node
{
require analysator::repos::cuda
package {
[
'cuda',
]:
ensure => installed,
}
}
# Packages that makes life easier on the system node
class analysator::packages::system
{
require analysator::packages::basic
package {
[
'alien', # Handy package conversion tool
]:
ensure => installed,
}
}
class analysator::prometheus {
class {'::prometheus::server':
version => '2.0.0',
scrape_configs => [
{ 'job_name' => 'prometheus_analysator',