Last-Modified: 2023-03-22T12:42:45+00:00 SPDX-License-Identifier: CC0-1.0 SPDX-FileCopyrightText: 2023 Bruno Victal # Shepherd Heartbeat service Issue service restarts if they fail a “heartbeat” check. ## Implementation plan * Make a “clone” of mcron-service-type and name it heartbeat-service-type. Rationale: Avoid cluttering mcron-service-type schedule, keep a logical separation between cron and this service, which we will treat it as “Value Added Services for Guix”. ## Considerations Root privileges should be dropped when unnecessary. \\\\\ TODO: modularise the mympd-heartbeat-service prototype into something more generic. \\\\\ action: on failure do ...? with presets: syslog output, email, program-file, ... ### Case 1. HTTP Heartbeat BEGIN ↓ ⌌-----------------------------⌍ |⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅| | ⋅ ⌌---------------------⌍ ⋅ | false |⋅ ⋅| service running? |⋅ ⋅|---------⌍ | ⋅ ⌎---------------------⌏ ⋅ | | |⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅| | ⌎-----------------------------⌏ | ↓ | ⌌-----------------------------⌍ error | | Heartbeat Check |---------| ⌎-----------------------------⌏ | ↓ | ⌌-----------------------------⌍ | |⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅| | | ⋅ ⌌---------------------⌍ ⋅ | | |⋅ ⋅| service restart? |⋅ ⋅| | | ⋅ ⌎---------------------⌏ ⋅ | | |⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ ⋅| | ⌎-----------------------------⌏ | ↓ | END ←---------------------⌏ Figure 1: Illustrative Diagram (Notes: Shaded areas mean root privilege since it's required for interacting with shepherd.) Implemented as two program-files, for privilege separation. 'mympd-heartbeat' example implementation: configuration record-type, accounts, job and service-type: """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" (define-record-type* mympd-heartbeat-configuration make-mympd-heartbeat-configuration mympd-heartbeat-configuration? (user mympd-heartbeat-configuration-user (default "heartbeat-mympd")) ; string (group mympd-heartbeat-configuration-group (default "heartbeat-mympd")) ; string (host mympd-heartbeat-configuration-host) (provision mympd-heartbeat-configuration-provision ; symbol (default 'mympd))) ;; Heartbeat every 30 seconds. (define (mympd-heartbeat-job config) (list #~(job '(next-second (range 0 60 30)) #$(mympd-heartbeat-supervisor config) "mympd-heartbeat-service job"))) (define (mympd-heartbeat-accounts config) (match-record config (user group) (list (user-group (name group) (system? #t)) (user-account (name user) (group group) (shell (file-append shadow "/sbin/nologin")) (home-directory "/nonexistent") (create-home-directory? #f) (system? #t) (comment "Heartbeat user for mympd"))))) (define mympd-heartbeat-service-type (service-type (name 'mympd-heartbeat) (extensions (list (service-extension mcron-service-type mympd-heartbeat-job) (service-extension account-service-type mympd-heartbeat-accounts))) (description "myMPD heartbeat job."))) """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" program-file I: supervisor """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" (define (my-service-heartbeat-supervisor config) ;; Query service status and restart if needed. (program-file "mympd-heartbeat-supervisor.scm" (with-imported-modules (source-module-closure '((gnu services herd))) #~(begin (use-modules (gnu services herd) (srfi srfi-1)) (define (is-service-running? sym) (lambda (x) (and (live-service-running x) (memq sym (live-service-provision x))))) (let* ((service-symbol '#$(mympd-heartbeat-configuration-provision config)) (running? (not (null? (any (is-service-running? service-symbol) (current-services)))))) (unless running? (exit 0)) (case (status:exit-val (system* #$(mympd-heartbeat-worker config))) ((0) #t) ((125) (format #t "Heartbeat worker error~%")) (else (format #t "Issuing restart for service '~a'~%" service-symbol) (restart-service service-symbol)))) (exit))))) """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" program-file II: worker """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" ;; Using a (lambda ()) job might be troublesome. ;; See: (define (mympd-heartbeat-worker config) ;; Exit codes: ;; * 0 = Succeeding. ;; * 1 = Failed. ;; * 125 = Error occurred. (program-file "mympd-heartbeat-worker.scm" #~(begin (use-modules (ice-9 format) ;; used for pure guile http client mode ;(srfi srfi-71) ;(web client) ;(web response) ) ;; Drop root privileges (let* ((pw (getpwnam #$(mympd-heartbeat-configuration-user config))) (uid (passwd:uid pw)) (gid (passwd:gid pw))) (setgroups #()) ; clear supplementary groups (setgid gid) (setuid uid) ; setuid must come in last (unless (and (= uid (getuid)) (= gid (getgid))) (format (current-error-port) "Error: Failed to drop root privileges~%") (exit 125))) ;; XXX: Last updated: 13/03/2023, Guile 3.0.9 ;; Guile (web client) module doesn't support timeouts and doesn't do ;; multiple resolution on the hostname. If a server is listening on [::1] but ;; not on 127.0.0.1, (http-get "http://localhost") will fail with conn refused. ;; ;; Comment out until this is fixed upstream. ;; ;; TODO: timeouts #;(let* ((resp _ (http-head #$(mympd-heartbeat-configuration-endpoint config))) (resp-code (response-code resp)) (succeeding? (= resp-code 200))) (format #t "Healthcheck: ~:[FAIL~;OK~]~%" succeeding?) (if succeeding? (exit 0) (exit 1))) (let* ((hostname #$(mympd-heartbeat-configuration-host config)) (uri (format #f "http://~a/browse/" hostname)) (status (status:exit-val ;; XXX: (12/03/2023, Guile 3.0.9) ;; Guile bug? (fixed in main: https://issues.guix.gnu.org/61073) ;; > (spawn "uname" '("uname" "-a") #:output (%make-void-port "w")) ;; Segmentation fault ;; ;; Use /dev/null in the meantime. (system* #$(file-append curl "/bin/curl") "--silent" "--output" "/dev/null" "--max-time" "5" "--head" uri))) (succeeding? (= 0 status))) (format #t "Heartbeat: ~:[FAIL~;OK~]~%" succeeding?) (exit status))))) """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""