LCOV - code coverage report
Current view: top level - lib - lwan-thread.c (source / functions) Hit Total Coverage
Test: coverage.info.cleaned Lines: 232 644 36.0 %
Date: 2023-04-18 16:19:03 Functions: 16 34 47.1 %

          Line data    Source code
       1             : /*
       2             :  * lwan - web server
       3             :  * Copyright (c) 2012, 2013 L. A. F. Pereira <l@tia.mat.br>
       4             :  *
       5             :  * This program is free software; you can redistribute it and/or
       6             :  * modify it under the terms of the GNU General Public License
       7             :  * as published by the Free Software Foundation; either version 2
       8             :  * of the License, or any later version.
       9             :  *
      10             :  * This program is distributed in the hope that it will be useful,
      11             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      12             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13             :  * GNU General Public License for more details.
      14             :  *
      15             :  * You should have received a copy of the GNU General Public License
      16             :  * along with this program; if not, write to the Free Software
      17             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
      18             :  * USA.
      19             :  */
      20             : 
      21             : #define _GNU_SOURCE
      22             : #include <assert.h>
      23             : #include <errno.h>
      24             : #include <fcntl.h>
      25             : #include <pthread.h>
      26             : #include <sched.h>
      27             : #include <stdlib.h>
      28             : #include <string.h>
      29             : #include <sys/epoll.h>
      30             : #include <sys/ioctl.h>
      31             : #include <sys/socket.h>
      32             : #include <unistd.h>
      33             : 
      34             : #if defined(LWAN_HAVE_SO_ATTACH_REUSEPORT_CBPF)
      35             : #include <linux/filter.h>
      36             : #endif
      37             : 
      38             : #if defined(LWAN_HAVE_MBEDTLS)
      39             : #include <mbedtls/entropy.h>
      40             : #include <mbedtls/error.h>
      41             : #include <mbedtls/gcm.h>
      42             : #include <mbedtls/net_sockets.h>
      43             : #include <mbedtls/ssl_internal.h>
      44             : 
      45             : #include <linux/tls.h>
      46             : #include <netinet/tcp.h>
      47             : #endif
      48             : 
      49             : #include "list.h"
      50             : #include "murmur3.h"
      51             : #include "lwan-private.h"
      52             : #include "lwan-tq.h"
      53             : 
      54         237 : static void lwan_strbuf_free_defer(void *data)
      55             : {
      56         237 :     return lwan_strbuf_free((struct lwan_strbuf *)data);
      57             : }
      58             : 
      59          19 : static void graceful_close(struct lwan *l,
      60             :                            struct lwan_connection *conn,
      61             :                            char buffer[static DEFAULT_BUFFER_SIZE])
      62             : {
      63          19 :     int fd = lwan_connection_get_fd(l, conn);
      64             : 
      65           0 :     while (TIOCOUTQ) {
      66             :         /* This ioctl isn't probably doing what it says on the tin; the details
      67             :          * are subtle, but it seems to do the trick to allow gracefully closing
      68             :          * the connection in some cases with minimal system calls. */
      69             :         int bytes_waiting;
      70          19 :         int r = ioctl(fd, TIOCOUTQ, &bytes_waiting);
      71             : 
      72          19 :         if (!r && !bytes_waiting) /* See note about close(2) below. */
      73          19 :             return;
      74           0 :         if (r < 0 && errno == EINTR)
      75           0 :             continue;
      76             : 
      77           0 :         break;
      78             :     }
      79             : 
      80           0 :     if (UNLIKELY(shutdown(fd, SHUT_WR) < 0)) {
      81           0 :         if (UNLIKELY(errno == ENOTCONN))
      82           0 :             return;
      83             :     }
      84             : 
      85           0 :     for (int tries = 0; tries < 20; tries++) {
      86           0 :         ssize_t r = recv(fd, buffer, DEFAULT_BUFFER_SIZE, MSG_TRUNC);
      87             : 
      88           0 :         if (!r)
      89           0 :             break;
      90             : 
      91           0 :         if (r < 0) {
      92           0 :             switch (errno) {
      93           0 :             case EAGAIN:
      94           0 :                 break;
      95           0 :             case EINTR:
      96           0 :                 continue;
      97           0 :             default:
      98           0 :                 return;
      99             :             }
     100             :         }
     101             : 
     102           0 :         coro_yield(conn->coro, CONN_CORO_WANT_READ);
     103             :     }
     104             : 
     105             :     /* close(2) will be called when the coroutine yields with CONN_CORO_ABORT */
     106             : }
     107             : 
     108             : #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
     109             : static void lwan_random_seed_prng_for_thread(const struct lwan_thread *t)
     110             : {
     111             :     (void)t;
     112             : }
     113             : 
     114             : uint64_t lwan_random_uint64()
     115             : {
     116             :     static uint64_t value;
     117             : 
     118             :     return ATOMIC_INC(value);
     119             : }
     120             : #else
     121             : static __thread __uint128_t lehmer64_state;
     122             : 
     123         184 : static void lwan_random_seed_prng_for_thread(const struct lwan_thread *t)
     124             : {
     125         184 :     if (lwan_getentropy(&lehmer64_state, sizeof(lehmer64_state), 0) < 0) {
     126           0 :         lwan_status_warning("Couldn't get proper entropy for PRNG, using fallback seed");
     127           0 :         lehmer64_state |= murmur3_fmix64((uint64_t)(uintptr_t)t);
     128           0 :         lehmer64_state <<= 64;
     129           0 :         lehmer64_state |= murmur3_fmix64((uint64_t)t->epoll_fd);
     130             :     }
     131         184 : }
     132             : 
     133        1614 : uint64_t lwan_random_uint64()
     134             : {
     135             :     /* https://lemire.me/blog/2019/03/19/the-fastest-conventional-random-number-generator-that-can-pass-big-crush/ */
     136        1614 :     lehmer64_state *= 0xda942042e4dd58b5ull;
     137        1614 :     return (uint64_t)(lehmer64_state >> 64);
     138             : }
     139             : #endif
     140             : 
     141         909 : uint64_t lwan_request_get_id(struct lwan_request *request)
     142             : {
     143         909 :     struct lwan_request_parser_helper *helper = request->helper;
     144             : 
     145         909 :     if (helper->request_id == 0)
     146         499 :         helper->request_id = lwan_random_uint64();
     147             : 
     148         909 :     return helper->request_id;
     149             : }
     150             : 
     151             : #if defined(LWAN_HAVE_MBEDTLS)
     152             : static bool
     153           0 : lwan_setup_tls_keys(int fd, const mbedtls_ssl_context *ssl, int rx_or_tx)
     154             : {
     155           0 :     struct tls12_crypto_info_aes_gcm_128 info = {
     156             :         .info = {.version = TLS_1_2_VERSION,
     157             :                  .cipher_type = TLS_CIPHER_AES_GCM_128},
     158             :     };
     159             :     const unsigned char *salt, *iv, *rec_seq;
     160             :     const mbedtls_gcm_context *gcm_ctx;
     161             :     const mbedtls_aes_context *aes_ctx;
     162             : 
     163           0 :     switch (rx_or_tx) {
     164           0 :     case TLS_RX:
     165           0 :         salt = ssl->transform->iv_dec;
     166           0 :         rec_seq = ssl->in_ctr;
     167           0 :         gcm_ctx = ssl->transform->cipher_ctx_dec.cipher_ctx;
     168           0 :         break;
     169           0 :     case TLS_TX:
     170           0 :         salt = ssl->transform->iv_enc;
     171           0 :         rec_seq = ssl->cur_out_ctr;
     172           0 :         gcm_ctx = ssl->transform->cipher_ctx_enc.cipher_ctx;
     173           0 :         break;
     174           0 :     default:
     175           0 :         __builtin_unreachable();
     176             :     }
     177             : 
     178           0 :     iv = salt + 4;
     179           0 :     aes_ctx = gcm_ctx->cipher_ctx.cipher_ctx;
     180             : 
     181           0 :     memcpy(info.iv, iv, TLS_CIPHER_AES_GCM_128_IV_SIZE);
     182           0 :     memcpy(info.rec_seq, rec_seq, TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
     183           0 :     memcpy(info.key, aes_ctx->rk, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
     184           0 :     memcpy(info.salt, salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
     185             : 
     186           0 :     if (UNLIKELY(setsockopt(fd, SOL_TLS, rx_or_tx, &info, sizeof(info)) < 0)) {
     187           0 :         lwan_status_perror("Could not set %s kTLS keys for fd %d",
     188             :                            rx_or_tx == TLS_TX ? "transmission" : "reception",
     189             :                            fd);
     190           0 :         lwan_always_bzero(&info, sizeof(info));
     191           0 :         return false;
     192             :     }
     193             : 
     194           0 :     lwan_always_bzero(&info, sizeof(info));
     195           0 :     return true;
     196             : }
     197             : 
     198             : __attribute__((format(printf, 2, 3)))
     199             : __attribute__((noinline, cold))
     200           0 : static void lwan_status_mbedtls_error(int error_code, const char *fmt, ...)
     201             : {
     202             :     char *formatted;
     203             :     va_list ap;
     204             :     int r;
     205             : 
     206           0 :     va_start(ap, fmt);
     207           0 :     r = vasprintf(&formatted, fmt, ap);
     208           0 :     if (r >= 0) {
     209             :         char mbedtls_errbuf[128];
     210             : 
     211           0 :         mbedtls_strerror(error_code, mbedtls_errbuf, sizeof(mbedtls_errbuf));
     212           0 :         lwan_status_error("%s: %s", formatted, mbedtls_errbuf);
     213           0 :         free(formatted);
     214             :     }
     215           0 :     va_end(ap);
     216           0 : }
     217             : 
     218           0 : static void lwan_setup_tls_free_ssl_context(void *data)
     219             : {
     220           0 :     mbedtls_ssl_context *ssl = data;
     221             : 
     222           0 :     mbedtls_ssl_free(ssl);
     223           0 : }
     224             : 
     225             : struct lwan_mbedtls_handshake_ctx {
     226             :     int fd;
     227             :     bool last_was_send;
     228             : };
     229             : 
     230           0 : static int lwan_mbedtls_send(void *ctx, const unsigned char *buf, size_t len)
     231             : {
     232           0 :     struct lwan_mbedtls_handshake_ctx *hs_ctx = ctx;
     233             :     ssize_t r;
     234             : 
     235             :     /* We use MSG_MORE -- flushing when we transition from send() to recv()
     236             :      * -- rather than buffering on our side because this contains key
     237             :      * material that we would need to only copy, but also zero out after
     238             :      * finishing the handshake.  */
     239             : 
     240           0 :     r = send(hs_ctx->fd, buf, len, MSG_MORE);
     241           0 :     if (UNLIKELY(r < 0)) {
     242           0 :         switch (errno) {
     243           0 :         case EINTR:
     244             :         case EAGAIN:
     245           0 :             return MBEDTLS_ERR_SSL_WANT_WRITE;
     246             : 
     247           0 :         default:
     248             :             /* It's not an internal error here, but this seemed the least
     249             :              * innapropriate error code for this situation.  lwan_setup_tls()
     250             :              * doesn't care. */
     251           0 :             return MBEDTLS_ERR_SSL_INTERNAL_ERROR;
     252             :         }
     253             :     }
     254             : 
     255           0 :     if (UNLIKELY((ssize_t)(int)r != r))
     256           0 :         return MBEDTLS_ERR_SSL_INTERNAL_ERROR;
     257             : 
     258           0 :     hs_ctx->last_was_send = true;
     259           0 :     return (int)r;
     260             : }
     261             : 
     262           0 : static void flush_pending_output(int fd)
     263             : {
     264           0 :     int zero = 0;
     265           0 :     setsockopt(fd, SOL_TCP, TCP_CORK, &zero, sizeof(zero));
     266           0 : }
     267             : 
     268           0 : static int lwan_mbedtls_recv(void *ctx, unsigned char *buf, size_t len)
     269             : {
     270           0 :     struct lwan_mbedtls_handshake_ctx *hs_ctx = ctx;
     271             :     ssize_t r;
     272             : 
     273           0 :     if (hs_ctx->last_was_send) {
     274           0 :         flush_pending_output(hs_ctx->fd);
     275           0 :         hs_ctx->last_was_send = false;
     276             :     }
     277             : 
     278           0 :     r = recv(hs_ctx->fd, buf, len, 0);
     279           0 :     if (UNLIKELY(r < 0)) {
     280           0 :         switch (errno) {
     281           0 :         case EINTR:
     282             :         case EAGAIN:
     283           0 :             return MBEDTLS_ERR_SSL_WANT_READ;
     284             : 
     285           0 :         default:
     286           0 :             return MBEDTLS_ERR_SSL_INTERNAL_ERROR;
     287             :         }
     288             :     }
     289             : 
     290           0 :     if (UNLIKELY((ssize_t)(int)r != r))
     291           0 :         return MBEDTLS_ERR_SSL_INTERNAL_ERROR;
     292             : 
     293           0 :     return (int)r;
     294             : }
     295             : 
     296           0 : static bool lwan_setup_tls(const struct lwan *l, struct lwan_connection *conn)
     297             : {
     298             :     mbedtls_ssl_context ssl;
     299           0 :     bool retval = false;
     300             :     int r;
     301             : 
     302           0 :     mbedtls_ssl_init(&ssl);
     303             : 
     304           0 :     r = mbedtls_ssl_setup(&ssl, &l->tls->config);
     305           0 :     if (UNLIKELY(r != 0)) {
     306           0 :         lwan_status_mbedtls_error(r, "Could not setup TLS context");
     307           0 :         return false;
     308             :     }
     309             : 
     310             :     /* Yielding the coroutine during the handshake enables the I/O loop to
     311             :      * destroy this coro (e.g.  on connection hangup) before we have the
     312             :      * opportunity to free the SSL context.  Defer this call for these
     313             :      * cases. */
     314             :     coro_deferred defer =
     315           0 :         coro_defer(conn->coro, lwan_setup_tls_free_ssl_context, &ssl);
     316             : 
     317           0 :     if (UNLIKELY(!defer)) {
     318           0 :         lwan_status_error("Could not defer cleanup of the TLS context");
     319           0 :         return false;
     320             :     }
     321             : 
     322           0 :     int fd = lwan_connection_get_fd(l, conn);
     323             : 
     324           0 :     struct lwan_mbedtls_handshake_ctx ctx = { .fd = fd };
     325           0 :     mbedtls_ssl_set_bio(&ssl, &ctx, lwan_mbedtls_send,
     326             :                         lwan_mbedtls_recv, NULL);
     327             : 
     328             :     while (true) {
     329           0 :         switch (mbedtls_ssl_handshake(&ssl)) {
     330           0 :         case 0:
     331           0 :             flush_pending_output(fd);
     332           0 :             goto enable_tls_ulp;
     333           0 :         case MBEDTLS_ERR_SSL_ASYNC_IN_PROGRESS:
     334             :         case MBEDTLS_ERR_SSL_CRYPTO_IN_PROGRESS:
     335             :         case MBEDTLS_ERR_SSL_WANT_READ:
     336           0 :             coro_yield(conn->coro, CONN_CORO_WANT_READ);
     337           0 :             break;
     338           0 :         case MBEDTLS_ERR_SSL_WANT_WRITE:
     339           0 :             coro_yield(conn->coro, CONN_CORO_WANT_WRITE);
     340           0 :             break;
     341           0 :         default:
     342           0 :             goto fail;
     343             :         }
     344             :     }
     345             : 
     346           0 : enable_tls_ulp:
     347           0 :     if (UNLIKELY(setsockopt(fd, SOL_TCP, TCP_ULP, "tls", sizeof("tls")) < 0))
     348           0 :         goto fail;
     349           0 :     if (UNLIKELY(!lwan_setup_tls_keys(fd, &ssl, TLS_RX)))
     350           0 :         goto fail;
     351           0 :     if (UNLIKELY(!lwan_setup_tls_keys(fd, &ssl, TLS_TX)))
     352           0 :         goto fail;
     353             : 
     354           0 :     retval = true;
     355             : 
     356           0 : fail:
     357           0 :     coro_defer_disarm(conn->coro, defer);
     358           0 :     mbedtls_ssl_free(&ssl);
     359           0 :     return retval;
     360             : }
     361             : #endif
     362             : 
     363         332 : __attribute__((noreturn)) static int process_request_coro(struct coro *coro,
     364             :                                                           void *data)
     365             : {
     366             :     /* NOTE: This function should not return; coro_yield should be used
     367             :      * instead.  This ensures the storage for `strbuf` is alive when the
     368             :      * coroutine ends and lwan_strbuf_free() is called. */
     369         332 :     struct lwan_connection *conn = data;
     370         332 :     struct lwan *lwan = conn->thread->lwan;
     371         332 :     int fd = lwan_connection_get_fd(lwan, conn);
     372         332 :     enum lwan_request_flags flags = lwan->config.request_flags;
     373         332 :     struct lwan_strbuf strbuf = LWAN_STRBUF_STATIC_INIT;
     374             :     char request_buffer[DEFAULT_BUFFER_SIZE];
     375         332 :     struct lwan_value buffer = {.value = request_buffer, .len = 0};
     376         332 :     char *next_request = NULL;
     377             :     char *header_start[N_HEADER_START];
     378             :     struct lwan_proxy proxy;
     379         332 :     const int error_when_n_packets = lwan_calculate_n_packets(DEFAULT_BUFFER_SIZE);
     380             : 
     381         332 :     coro_defer(coro, lwan_strbuf_free_defer, &strbuf);
     382             : 
     383         332 :     const size_t init_gen = 1; /* 1 call to coro_defer() */
     384         332 :     assert(init_gen == coro_deferred_get_generation(coro));
     385             : 
     386             : #if defined(LWAN_HAVE_MBEDTLS)
     387         332 :     if (conn->flags & CONN_TLS) {
     388           0 :         if (UNLIKELY(!lwan_setup_tls(lwan, conn))) {
     389           0 :             coro_yield(conn->coro, CONN_CORO_ABORT);
     390           0 :             __builtin_unreachable();
     391             :         }
     392             :     }
     393             : #else
     394             :     assert(!(conn->flags & CONN_TLS));
     395             : #endif
     396             : 
     397         268 :     while (true) {
     398         600 :         struct lwan_request_parser_helper helper = {
     399             :             .buffer = &buffer,
     400             :             .next_request = next_request,
     401             :             .error_when_n_packets = error_when_n_packets,
     402             :             .header_start = header_start,
     403             :         };
     404         600 :         struct lwan_request request = {.conn = conn,
     405         600 :                                        .global_response_headers = &lwan->headers,
     406             :                                        .fd = fd,
     407             :                                        .response = {.buffer = &strbuf},
     408             :                                        .flags = flags,
     409             :                                        .proxy = &proxy,
     410             :                                        .helper = &helper};
     411             : 
     412         600 :         lwan_process_request(lwan, &request);
     413             : 
     414             :         /* Run the deferred instructions now (except those used to initialize
     415             :          * the coroutine), so that if the connection is gracefully closed,
     416             :          * the storage for ``helper'' is still there. */
     417         504 :         coro_deferred_run(coro, init_gen);
     418             : 
     419         504 :         if (UNLIKELY(!(conn->flags & CONN_IS_KEEP_ALIVE))) {
     420          19 :             graceful_close(lwan, conn, request_buffer);
     421          19 :             break;
     422             :         }
     423             : 
     424         485 :         if (next_request && *next_request) {
     425         255 :             conn->flags |= CONN_CORK;
     426             : 
     427         255 :             if (!(conn->flags & CONN_EVENTS_WRITE))
     428          13 :                 coro_yield(coro, CONN_CORO_WANT_WRITE);
     429             :         } else {
     430         230 :             conn->flags &= ~CONN_CORK;
     431         230 :             coro_yield(coro, CONN_CORO_WANT_READ);
     432             :         }
     433             : 
     434             :         /* Ensure string buffer is reset between requests, and that the backing
     435             :          * store isn't over 2KB. */
     436         268 :         lwan_strbuf_reset_trim(&strbuf, 2048);
     437             : 
     438             :         /* Only allow flags from config. */
     439         268 :         flags = request.flags & (REQUEST_PROXIED | REQUEST_ALLOW_CORS | REQUEST_WANTS_HSTS_HEADER);
     440         268 :         next_request = helper.next_request;
     441             :     }
     442             : 
     443          19 :     coro_yield(coro, CONN_CORO_ABORT);
     444           0 :     __builtin_unreachable();
     445             : }
     446             : 
     447             : static ALWAYS_INLINE uint32_t
     448             : conn_flags_to_epoll_events(enum lwan_connection_flags flags)
     449             : {
     450             :     static const uint32_t map[CONN_EVENTS_MASK + 1] = {
     451             :         [0 /* Suspended (timer or await) */] = EPOLLRDHUP,
     452             :         [CONN_EVENTS_WRITE] = EPOLLOUT | EPOLLRDHUP,
     453             :         [CONN_EVENTS_READ] = EPOLLIN | EPOLLRDHUP,
     454             :         [CONN_EVENTS_READ_WRITE] = EPOLLIN | EPOLLOUT | EPOLLRDHUP,
     455             :     };
     456             : 
     457           0 :     return map[flags & CONN_EVENTS_MASK];
     458             : }
     459             : 
     460         264 : static void update_epoll_flags(const struct timeout_queue *tq,
     461             :                                struct lwan_connection *conn,
     462             :                                int epoll_fd,
     463             :                                enum lwan_connection_coro_yield yield_result)
     464             : {
     465             :     static const enum lwan_connection_flags or_mask[CONN_CORO_MAX] = {
     466             :         [CONN_CORO_YIELD] = 0,
     467             : 
     468             :         [CONN_CORO_WANT_READ_WRITE] = CONN_EVENTS_READ_WRITE,
     469             :         [CONN_CORO_WANT_READ] = CONN_EVENTS_READ,
     470             :         [CONN_CORO_WANT_WRITE] = CONN_EVENTS_WRITE,
     471             : 
     472             :         /* While the coro is suspended, we're not interested in either EPOLLIN
     473             :          * or EPOLLOUT events.  We still want to track this fd in epoll, though,
     474             :          * so unset both so that only EPOLLRDHUP (plus the implicitly-set ones)
     475             :          * are set. */
     476             :         [CONN_CORO_SUSPEND] = CONN_SUSPENDED,
     477             : 
     478             :         /* Ideally, when suspending a coroutine, the current flags&CONN_EVENTS_MASK
     479             :          * would have to be stored and restored -- however, resuming as if the
     480             :          * client coroutine is interested in a write event always guarantees that
     481             :          * they'll be resumed as they're TCP sockets.  There's a good chance that
     482             :          * trying to read from a socket after resuming a coroutine will succeed,
     483             :          * but if it doesn't because read() returns -EAGAIN, the I/O wrappers will
     484             :          * yield with CONN_CORO_WANT_READ anyway.  */
     485             :         [CONN_CORO_RESUME] = CONN_EVENTS_WRITE,
     486             :     };
     487             :     static const enum lwan_connection_flags and_mask[CONN_CORO_MAX] = {
     488             :         [CONN_CORO_YIELD] = ~0,
     489             : 
     490             :         [CONN_CORO_WANT_READ_WRITE] = ~0,
     491             :         [CONN_CORO_WANT_READ] = ~CONN_EVENTS_WRITE,
     492             :         [CONN_CORO_WANT_WRITE] = ~CONN_EVENTS_READ,
     493             : 
     494             :         [CONN_CORO_SUSPEND] = ~CONN_EVENTS_READ_WRITE,
     495             :         [CONN_CORO_RESUME] = ~CONN_SUSPENDED,
     496             :     };
     497         264 :     enum lwan_connection_flags prev_flags = conn->flags;
     498             : 
     499         264 :     conn->flags |= or_mask[yield_result];
     500         264 :     conn->flags &= and_mask[yield_result];
     501             : 
     502         264 :     assert(!(conn->flags & (CONN_LISTENER_HTTP | CONN_LISTENER_HTTPS)));
     503         264 :     assert((conn->flags & CONN_TLS) == (prev_flags & CONN_TLS));
     504             : 
     505         264 :     if (conn->flags == prev_flags)
     506         235 :         return;
     507             : 
     508          29 :     struct epoll_event event = {.events = conn_flags_to_epoll_events(conn->flags),
     509             :                                 .data.ptr = conn};
     510          29 :     int fd = lwan_connection_get_fd(tq->lwan, conn);
     511             : 
     512          29 :     if (UNLIKELY(epoll_ctl(epoll_fd, EPOLL_CTL_MOD, fd, &event) < 0))
     513           0 :         lwan_status_perror("epoll_ctl");
     514             : }
     515             : 
     516           0 : static void clear_async_await_flag(void *data)
     517             : {
     518           0 :     struct lwan_connection *async_fd_conn = data;
     519             : 
     520           0 :     async_fd_conn->flags &= ~CONN_ASYNC_AWAIT;
     521           0 : }
     522             : 
     523             : static enum lwan_connection_coro_yield
     524           0 : resume_async(const struct timeout_queue *tq,
     525             :              enum lwan_connection_coro_yield yield_result,
     526             :              int64_t from_coro,
     527             :              struct lwan_connection *conn,
     528             :              int epoll_fd)
     529             : {
     530             :     static const enum lwan_connection_flags to_connection_flags[] = {
     531             :         [CONN_CORO_ASYNC_AWAIT_READ] = CONN_EVENTS_READ,
     532             :         [CONN_CORO_ASYNC_AWAIT_WRITE] = CONN_EVENTS_WRITE,
     533             :         [CONN_CORO_ASYNC_AWAIT_READ_WRITE] = CONN_EVENTS_READ_WRITE,
     534             :     };
     535           0 :     int await_fd = (int)((uint64_t)from_coro >> 32);
     536             :     enum lwan_connection_flags flags;
     537             :     int op;
     538             : 
     539           0 :     assert(await_fd >= 0);
     540           0 :     assert(yield_result >= CONN_CORO_ASYNC_AWAIT_READ &&
     541             :            yield_result <= CONN_CORO_ASYNC_AWAIT_READ_WRITE);
     542             : 
     543           0 :     flags = to_connection_flags[yield_result];
     544             : 
     545           0 :     struct lwan_connection *await_fd_conn = &tq->lwan->conns[await_fd];
     546           0 :     if (LIKELY(await_fd_conn->flags & CONN_ASYNC_AWAIT)) {
     547           0 :         if (LIKELY((await_fd_conn->flags & CONN_EVENTS_MASK) == flags))
     548           0 :             return CONN_CORO_SUSPEND;
     549             : 
     550           0 :         op = EPOLL_CTL_MOD;
     551             :     } else {
     552           0 :         op = EPOLL_CTL_ADD;
     553           0 :         flags |= CONN_ASYNC_AWAIT;
     554           0 :         coro_defer(conn->coro, clear_async_await_flag, await_fd_conn);
     555             :     }
     556             : 
     557           0 :     struct epoll_event event = {.events = conn_flags_to_epoll_events(flags),
     558             :                                 .data.ptr = conn};
     559           0 :     if (LIKELY(!epoll_ctl(epoll_fd, op, await_fd, &event))) {
     560           0 :         await_fd_conn->flags &= ~CONN_EVENTS_MASK;
     561           0 :         await_fd_conn->flags |= flags;
     562           0 :         return CONN_CORO_SUSPEND;
     563             :     }
     564             : 
     565           0 :     return CONN_CORO_ABORT;
     566             : }
     567             : 
     568             : static ALWAYS_INLINE void resume_coro(struct timeout_queue *tq,
     569             :                                       struct lwan_connection *conn,
     570             :                                       int epoll_fd)
     571             : {
     572         377 :     assert(conn->coro);
     573             : 
     574         377 :     int64_t from_coro = coro_resume(conn->coro);
     575         282 :     enum lwan_connection_coro_yield yield_result = from_coro & 0xffffffff;
     576             : 
     577         282 :     if (UNLIKELY(yield_result >= CONN_CORO_ASYNC)) {
     578             :         yield_result =
     579           0 :             resume_async(tq, yield_result, from_coro, conn, epoll_fd);
     580             :     }
     581             : 
     582         282 :     if (UNLIKELY(yield_result == CONN_CORO_ABORT)) {
     583          19 :         timeout_queue_expire(tq, conn);
     584             :     } else {
     585         263 :         update_epoll_flags(tq, conn, epoll_fd, yield_result);
     586         263 :         timeout_queue_move_to_last(tq, conn);
     587             :     }
     588         282 : }
     589             : 
     590         190 : static void update_date_cache(struct lwan_thread *thread)
     591             : {
     592         190 :     time_t now = time(NULL);
     593             : 
     594         190 :     lwan_format_rfc_time(now, thread->date.date);
     595         190 :     lwan_format_rfc_time(now + (time_t)thread->lwan->config.expires,
     596         190 :                          thread->date.expires);
     597         190 : }
     598             : 
     599             : __attribute__((cold))
     600           0 : static bool send_buffer_without_coro(int fd, const char *buf, size_t buf_len, int flags)
     601             : {
     602           0 :     size_t total_sent = 0;
     603             : 
     604           0 :     for (int try = 0; try < 10; try++) {
     605           0 :         size_t to_send = buf_len - total_sent;
     606           0 :         if (!to_send)
     607           0 :             return true;
     608             : 
     609           0 :         ssize_t sent = send(fd, buf + total_sent, to_send, flags);
     610           0 :         if (sent <= 0) {
     611           0 :             if (errno == EINTR)
     612           0 :                 continue;
     613           0 :             if (errno == EAGAIN)
     614           0 :                 continue;
     615           0 :             break;
     616             :         }
     617             : 
     618           0 :         total_sent += (size_t)sent;
     619             :     }
     620             : 
     621           0 :     return false;
     622             : }
     623             : 
     624             : __attribute__((cold))
     625           0 : static bool send_string_without_coro(int fd, const char *str, int flags)
     626             : {
     627           0 :     return send_buffer_without_coro(fd, str, strlen(str), flags);
     628             : }
     629             : 
     630             : __attribute__((cold)) static void
     631           0 : send_last_response_without_coro(const struct lwan *l,
     632             :                                 const struct lwan_connection *conn,
     633             :                                 enum lwan_http_status status)
     634             : {
     635           0 :     int fd = lwan_connection_get_fd(l, conn);
     636             : 
     637           0 :     if (conn->flags & CONN_TLS) {
     638             :         /* There's nothing that can be done here if a client is expecting a
     639             :          * TLS connection: the TLS handshake requires a coroutine as it
     640             :          * might yield.  (In addition, the TLS handshake might allocate
     641             :          * memory, and if you couldn't create a coroutine at this point,
     642             :          * it's unlikely you'd be able to allocate memory for the TLS
     643             :          * context anyway.) */
     644           0 :         goto shutdown_and_close;
     645             :     }
     646             : 
     647           0 :     if (!send_string_without_coro(fd, "HTTP/1.0 ", MSG_MORE))
     648           0 :         goto shutdown_and_close;
     649             : 
     650           0 :     if (!send_string_without_coro(
     651             :             fd, lwan_http_status_as_string_with_code(status), MSG_MORE))
     652           0 :         goto shutdown_and_close;
     653             : 
     654           0 :     if (!send_string_without_coro(fd, "\r\nConnection: close", MSG_MORE))
     655           0 :         goto shutdown_and_close;
     656             : 
     657           0 :     if (!send_string_without_coro(fd, "\r\nContent-Type: text/html", MSG_MORE))
     658           0 :         goto shutdown_and_close;
     659             : 
     660           0 :     if (send_buffer_without_coro(fd, l->headers.value, l->headers.len,
     661             :                                  MSG_MORE)) {
     662             :         struct lwan_strbuf buffer;
     663             : 
     664           0 :         lwan_strbuf_init(&buffer);
     665           0 :         lwan_fill_default_response(&buffer, status);
     666             : 
     667           0 :         send_buffer_without_coro(fd, lwan_strbuf_get_buffer(&buffer),
     668             :                                  lwan_strbuf_get_length(&buffer), 0);
     669             : 
     670           0 :         lwan_strbuf_free(&buffer);
     671             :     }
     672             : 
     673           0 : shutdown_and_close:
     674           0 :     shutdown(fd, SHUT_RDWR);
     675           0 :     close(fd);
     676           0 : }
     677             : 
     678             : static ALWAYS_INLINE bool spawn_coro(struct lwan_connection *conn,
     679             :                                      struct coro_switcher *switcher,
     680             :                                      struct timeout_queue *tq)
     681             : {
     682         332 :     struct lwan_thread *t = conn->thread;
     683             : #if defined(LWAN_HAVE_MBEDTLS)
     684         332 :     const enum lwan_connection_flags flags_to_keep = conn->flags & CONN_TLS;
     685             : #else
     686             :     const enum lwan_connection_flags flags_to_keep = 0;
     687             : #endif
     688             : 
     689         332 :     assert(!conn->coro);
     690         332 :     assert(!(conn->flags & CONN_ASYNC_AWAIT));
     691         332 :     assert(!(conn->flags & CONN_AWAITED_FD));
     692         332 :     assert(!(conn->flags & (CONN_LISTENER_HTTP | CONN_LISTENER_HTTPS)));
     693         332 :     assert(t);
     694         332 :     assert((uintptr_t)t >= (uintptr_t)tq->lwan->thread.threads);
     695         332 :     assert((uintptr_t)t <
     696             :            (uintptr_t)(tq->lwan->thread.threads + tq->lwan->thread.count));
     697             : 
     698         332 :     *conn = (struct lwan_connection){
     699         332 :         .coro = coro_new(switcher, process_request_coro, conn),
     700         332 :         .flags = CONN_EVENTS_READ | flags_to_keep,
     701         332 :         .time_to_expire = tq->current_time + tq->move_to_last_bump,
     702             :         .thread = t,
     703             :     };
     704         332 :     if (LIKELY(conn->coro)) {
     705         332 :         timeout_queue_insert(tq, conn);
     706         332 :         return true;
     707             :     }
     708             : 
     709           0 :     conn->flags = 0;
     710             : 
     711           0 :     int fd = lwan_connection_get_fd(tq->lwan, conn);
     712             : 
     713           0 :     lwan_status_error("Couldn't spawn coroutine for file descriptor %d", fd);
     714             : 
     715           0 :     send_last_response_without_coro(tq->lwan, conn, HTTP_UNAVAILABLE);
     716           0 :     return false;
     717             : }
     718             : 
     719           7 : static bool process_pending_timers(struct timeout_queue *tq,
     720             :                                    struct lwan_thread *t,
     721             :                                    int epoll_fd)
     722             : {
     723             :     struct timeout *timeout;
     724           7 :     bool should_expire_timers = false;
     725             : 
     726          14 :     while ((timeout = timeouts_get(t->wheel))) {
     727             :         struct lwan_request *request;
     728             : 
     729           7 :         if (timeout == &tq->timeout) {
     730           6 :             should_expire_timers = true;
     731           6 :             continue;
     732             :         }
     733             : 
     734           1 :         request = container_of(timeout, struct lwan_request, timeout);
     735           1 :         update_epoll_flags(tq, request->conn, epoll_fd, CONN_CORO_RESUME);
     736             :     }
     737             : 
     738           7 :     if (should_expire_timers) {
     739           6 :         timeout_queue_expire_waiting(tq);
     740             : 
     741             :         /* tq timeout expires every 1000ms if there are connections, so
     742             :          * update the date cache at this point as well.  */
     743           6 :         update_date_cache(t);
     744             : 
     745           6 :         if (!timeout_queue_empty(tq)) {
     746           3 :             timeouts_add(t->wheel, &tq->timeout, 1000);
     747           3 :             return true;
     748             :         }
     749             : 
     750           3 :         timeouts_del(t->wheel, &tq->timeout);
     751             :     }
     752             : 
     753           4 :     return false;
     754             : }
     755             : 
     756             : static int
     757         828 : turn_timer_wheel(struct timeout_queue *tq, struct lwan_thread *t, int epoll_fd)
     758             : {
     759         828 :     const int infinite_timeout = -1;
     760             :     timeout_t wheel_timeout;
     761             :     struct timespec now;
     762             : 
     763         828 :     if (UNLIKELY(clock_gettime(monotonic_clock_id, &now) < 0))
     764           0 :         lwan_status_critical("Could not get monotonic time");
     765             : 
     766         828 :     timeouts_update(t->wheel,
     767         828 :                     (timeout_t)(now.tv_sec * 1000 + now.tv_nsec / 1000000));
     768             : 
     769             :     /* Check if there's an expired timer. */
     770         828 :     wheel_timeout = timeouts_timeout(t->wheel);
     771         828 :     if (wheel_timeout > 0) {
     772         821 :         return (int)wheel_timeout; /* No, but will soon. Wake us up in
     773             :                                       wheel_timeout ms. */
     774             :     }
     775             : 
     776           7 :     if (UNLIKELY((int64_t)wheel_timeout < 0))
     777           0 :         return infinite_timeout; /* None found. */
     778             : 
     779           7 :     if (!process_pending_timers(tq, t, epoll_fd))
     780           4 :         return infinite_timeout; /* No more timers to process. */
     781             : 
     782             :     /* After processing pending timers, determine when to wake up. */
     783           3 :     return (int)timeouts_timeout(t->wheel);
     784             : }
     785             : 
     786         332 : static bool accept_waiting_clients(const struct lwan_thread *t,
     787             :                                    const struct lwan_connection *listen_socket)
     788             : {
     789         332 :     const uint32_t read_events = conn_flags_to_epoll_events(CONN_EVENTS_READ);
     790         332 :     struct lwan_connection *conns = t->lwan->conns;
     791         332 :     int listen_fd = (int)(intptr_t)(listen_socket - conns);
     792         332 :     enum lwan_connection_flags new_conn_flags = 0;
     793             : 
     794             : #if defined(LWAN_HAVE_MBEDTLS)
     795         332 :     if (listen_socket->flags & CONN_LISTENER_HTTPS) {
     796           0 :         assert(listen_fd == t->tls_listen_fd);
     797           0 :         assert(!(listen_socket->flags & CONN_LISTENER_HTTP));
     798           0 :         new_conn_flags = CONN_TLS;
     799             :     } else {
     800         332 :         assert(listen_fd == t->listen_fd);
     801         332 :         assert(listen_socket->flags & CONN_LISTENER_HTTP);
     802             :     }
     803             : #endif
     804             : 
     805         332 :     while (true) {
     806         664 :         int fd = accept4(listen_fd, NULL, NULL, SOCK_NONBLOCK | SOCK_CLOEXEC);
     807             : 
     808         664 :         if (LIKELY(fd >= 0)) {
     809         332 :             struct lwan_connection *conn = &conns[fd];
     810         332 :             struct epoll_event ev = {.data.ptr = conn, .events = read_events};
     811             :             int r;
     812             : 
     813         332 :             conn->flags = new_conn_flags;
     814             : 
     815         332 :             r = epoll_ctl(conn->thread->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
     816         332 :             if (UNLIKELY(r < 0)) {
     817           0 :                 lwan_status_perror("Could not add file descriptor %d to epoll "
     818             :                                    "set %d. Dropping connection",
     819             :                                    fd, conn->thread->epoll_fd);
     820           0 :                 send_last_response_without_coro(t->lwan, conn, HTTP_UNAVAILABLE);
     821           0 :                 conn->flags = 0;
     822             :             }
     823             : 
     824         332 :             continue;
     825             :         }
     826             : 
     827         332 :         switch (errno) {
     828           0 :         default:
     829           0 :             lwan_status_perror("Unexpected error while accepting connections");
     830             :             /* fallthrough */
     831             : 
     832         332 :         case EAGAIN:
     833         332 :             return true;
     834             : 
     835           0 :         case EBADF:
     836             :         case ECONNABORTED:
     837             :         case EINVAL:
     838           0 :             lwan_status_info("Listening socket closed");
     839           0 :             return false;
     840             :         }
     841             :     }
     842             : 
     843             :     __builtin_unreachable();
     844             : }
     845             : 
     846         184 : static int create_listen_socket(struct lwan_thread *t,
     847             :                                 unsigned int num,
     848             :                                 bool tls)
     849             : {
     850         184 :     const struct lwan *lwan = t->lwan;
     851             :     int listen_fd;
     852             : 
     853         184 :     listen_fd = lwan_create_listen_socket(lwan, num == 0, tls);
     854         184 :     if (listen_fd < 0)
     855           0 :         lwan_status_critical("Could not create listen_fd");
     856             : 
     857             :      /* Ignore errors here, as this is just a hint */
     858             : #if defined(LWAN_HAVE_SO_ATTACH_REUSEPORT_CBPF)
     859             :     /* From socket(7): "These  options may be set repeatedly at any time on
     860             :      * any socket in the group to replace the current BPF program used by
     861             :      * all sockets in the group." */
     862         184 :     if (num == 0) {
     863             :         /* From socket(7): "The  BPF program must return an index between 0 and
     864             :          * N-1 representing the socket which should receive the packet (where N
     865             :          * is the number of sockets in the group)." */
     866          92 :         const uint32_t cpu_ad_off = (uint32_t)SKF_AD_OFF + SKF_AD_CPU;
     867          92 :         struct sock_filter filter[] = {
     868             :             {BPF_LD | BPF_W | BPF_ABS, 0, 0, cpu_ad_off},   /* A = curr_cpu_index */
     869             :             {BPF_RET | BPF_A, 0, 0, 0},                     /* return A */
     870             :         };
     871          92 :         struct sock_fprog fprog = {.filter = filter, .len = N_ELEMENTS(filter)};
     872             : 
     873          92 :         (void)setsockopt(listen_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF,
     874             :                          &fprog, sizeof(fprog));
     875          92 :         (void)setsockopt(listen_fd, SOL_SOCKET, SO_LOCK_FILTER,
     876          92 :                          (int[]){1}, sizeof(int));
     877             :     }
     878             : #elif defined(LWAN_HAVE_SO_INCOMING_CPU) && defined(__x86_64__)
     879             :     (void)setsockopt(listen_fd, SOL_SOCKET, SO_INCOMING_CPU, &t->cpu,
     880             :                      sizeof(t->cpu));
     881             : #endif
     882             : 
     883         184 :     struct epoll_event event = {
     884             :         .events = EPOLLIN | EPOLLET | EPOLLERR,
     885         184 :         .data.ptr = &t->lwan->conns[listen_fd],
     886             :     };
     887         184 :     if (epoll_ctl(t->epoll_fd, EPOLL_CTL_ADD, listen_fd, &event) < 0)
     888           0 :         lwan_status_critical_perror("Could not add socket to epoll");
     889             : 
     890         184 :     return listen_fd;
     891             : }
     892             : 
     893         184 : static void *thread_io_loop(void *data)
     894             : {
     895         184 :     struct lwan_thread *t = data;
     896         184 :     int epoll_fd = t->epoll_fd;
     897         184 :     const int max_events = LWAN_MIN((int)t->lwan->thread.max_fd, 1024);
     898         184 :     struct lwan *lwan = t->lwan;
     899             :     struct epoll_event *events;
     900             :     struct coro_switcher switcher;
     901             :     struct timeout_queue tq;
     902             : 
     903         184 :     lwan_status_debug("Worker thread #%zd starting",
     904             :                       t - t->lwan->thread.threads + 1);
     905         184 :     lwan_set_thread_name("worker");
     906             : 
     907         184 :     events = calloc((size_t)max_events, sizeof(*events));
     908         184 :     if (UNLIKELY(!events))
     909           0 :         lwan_status_critical("Could not allocate memory for events");
     910             : 
     911         184 :     update_date_cache(t);
     912             : 
     913         184 :     timeout_queue_init(&tq, lwan);
     914             : 
     915         184 :     lwan_random_seed_prng_for_thread(t);
     916             : 
     917         184 :     pthread_barrier_wait(&lwan->thread.barrier);
     918             : 
     919         644 :     for (;;) {
     920         828 :         int timeout = turn_timer_wheel(&tq, t, epoll_fd);
     921         828 :         int n_fds = epoll_wait(epoll_fd, events, max_events, timeout);
     922         739 :         bool created_coros = false;
     923             : 
     924         739 :         if (UNLIKELY(n_fds < 0)) {
     925           0 :             if (errno == EBADF || errno == EINVAL)
     926             :                 break;
     927           0 :             continue;
     928             :         }
     929             : 
     930        1570 :         for (struct epoll_event *event = events; n_fds--; event++) {
     931         926 :             struct lwan_connection *conn = event->data.ptr;
     932             : 
     933         926 :             assert(!(conn->flags & CONN_ASYNC_AWAIT));
     934             : 
     935         926 :             if (conn->flags & (CONN_LISTENER_HTTP | CONN_LISTENER_HTTPS)) {
     936         332 :                 if (LIKELY(accept_waiting_clients(t, conn)))
     937         332 :                     continue;
     938           0 :                 close(epoll_fd);
     939           0 :                 epoll_fd = -1;
     940           0 :                 break;
     941             :             }
     942             : 
     943         594 :             if (UNLIKELY(event->events & (EPOLLRDHUP | EPOLLHUP))) {
     944         217 :                 if ((conn->flags & CONN_AWAITED_FD) != CONN_SUSPENDED) {
     945         217 :                     timeout_queue_expire(&tq, conn);
     946         217 :                     continue;
     947             :                 }
     948             :             }
     949             : 
     950         377 :             if (!conn->coro) {
     951         332 :                 if (UNLIKELY(!spawn_coro(conn, &switcher, &tq))) {
     952           0 :                     send_last_response_without_coro(t->lwan, conn, HTTP_UNAVAILABLE);
     953           0 :                     continue;
     954             :                 }
     955             : 
     956         332 :                 created_coros = true;
     957             :             }
     958             : 
     959             :             resume_coro(&tq, conn, epoll_fd);
     960             :         }
     961             : 
     962         644 :         if (created_coros)
     963         237 :             timeouts_add(t->wheel, &tq.timeout, 1000);
     964             :     }
     965             : 
     966           0 :     pthread_barrier_wait(&lwan->thread.barrier);
     967             : 
     968           0 :     timeout_queue_expire_all(&tq);
     969           0 :     free(events);
     970             : 
     971           0 :     return NULL;
     972             : }
     973             : 
     974         184 : static void create_thread(struct lwan *l, struct lwan_thread *thread)
     975             : {
     976             :     int ignore;
     977             :     pthread_attr_t attr;
     978             : 
     979         184 :     thread->lwan = l;
     980             : 
     981         184 :     thread->wheel = timeouts_open(&ignore);
     982         184 :     if (!thread->wheel)
     983           0 :         lwan_status_critical("Could not create timer wheel");
     984             : 
     985         184 :     if ((thread->epoll_fd = epoll_create1(EPOLL_CLOEXEC)) < 0)
     986           0 :         lwan_status_critical_perror("epoll_create");
     987             : 
     988         184 :     if (pthread_attr_init(&attr))
     989           0 :         lwan_status_critical_perror("pthread_attr_init");
     990             : 
     991         184 :     if (pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM))
     992           0 :         lwan_status_critical_perror("pthread_attr_setscope");
     993             : 
     994         184 :     if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE))
     995           0 :         lwan_status_critical_perror("pthread_attr_setdetachstate");
     996             : 
     997         184 :     if (pthread_create(&thread->self, &attr, thread_io_loop, thread))
     998           0 :         lwan_status_critical_perror("pthread_create");
     999             : 
    1000         184 :     if (pthread_attr_destroy(&attr))
    1001           0 :         lwan_status_critical_perror("pthread_attr_destroy");
    1002         184 : }
    1003             : 
    1004             : #if defined(__linux__) && defined(__x86_64__)
    1005           0 : static bool read_cpu_topology(struct lwan *l, uint32_t siblings[])
    1006             : {
    1007             :     char path[PATH_MAX];
    1008             : 
    1009           0 :     for (uint32_t i = 0; i < l->available_cpus; i++)
    1010           0 :         siblings[i] = 0xbebacafe;
    1011             : 
    1012           0 :     for (unsigned int i = 0; i < l->available_cpus; i++) {
    1013             :         FILE *sib;
    1014             :         uint32_t id, sibling;
    1015             :         char separator;
    1016             : 
    1017           0 :         snprintf(path, sizeof(path),
    1018             :                  "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
    1019             :                  i);
    1020             : 
    1021           0 :         sib = fopen(path, "re");
    1022           0 :         if (!sib) {
    1023           0 :             lwan_status_warning("Could not open `%s` to determine CPU topology",
    1024             :                                 path);
    1025           0 :             return false;
    1026             :         }
    1027             : 
    1028           0 :         switch (fscanf(sib, "%u%c%u", &id, &separator, &sibling)) {
    1029           0 :         case 2: /* No SMT */
    1030           0 :             siblings[i] = id;
    1031           0 :             break;
    1032           0 :         case 3: /* SMT */
    1033           0 :             if (!(separator == ',' || separator == '-')) {
    1034           0 :                 lwan_status_critical("Expecting either ',' or '-' for sibling separator");
    1035             :                 __builtin_unreachable();
    1036             :             }
    1037             : 
    1038           0 :             siblings[i] = sibling;
    1039           0 :             break;
    1040           0 :         default:
    1041           0 :             lwan_status_critical("%s has invalid format", path);
    1042             :             __builtin_unreachable();
    1043             :         }
    1044             : 
    1045           0 :         fclose(sib);
    1046             :     }
    1047             : 
    1048             :     /* Perform a sanity check here, as some systems seem to filter out the
    1049             :      * result of sysconf() to obtain the number of configured and online
    1050             :      * CPUs but don't bother changing what's available through sysfs as far
    1051             :      * as the CPU topology information goes.  It's better to fall back to a
    1052             :      * possibly non-optimal setup than just crash during startup while
    1053             :      * trying to perform an out-of-bounds array access.  */
    1054           0 :     for (unsigned int i = 0; i < l->available_cpus; i++) {
    1055           0 :         if (siblings[i] == 0xbebacafe) {
    1056           0 :             lwan_status_warning("Could not determine sibling for CPU %d", i);
    1057           0 :             return false;
    1058             :         }
    1059             : 
    1060           0 :         if (siblings[i] >= l->available_cpus) {
    1061           0 :             lwan_status_warning("CPU information topology says CPU %d exists, "
    1062             :                                 "but max available CPUs is %d (online CPUs: %d). "
    1063             :                                 "Is Lwan running in a (broken) container?",
    1064             :                                 siblings[i], l->available_cpus, l->online_cpus);
    1065           0 :             return false;
    1066             :         }
    1067             :     }
    1068             : 
    1069           0 :     return true;
    1070             : }
    1071             : 
    1072             : static void
    1073           0 : siblings_to_schedtbl(struct lwan *l, uint32_t siblings[], uint32_t schedtbl[])
    1074             : {
    1075           0 :     int32_t *seen = calloc(l->available_cpus, sizeof(int32_t));
    1076           0 :     unsigned int n_schedtbl = 0;
    1077             : 
    1078           0 :     if (!seen)
    1079           0 :         lwan_status_critical("Could not allocate the seen array");
    1080             : 
    1081           0 :     for (uint32_t i = 0; i < l->available_cpus; i++)
    1082           0 :         seen[i] = -1;
    1083             : 
    1084           0 :     for (uint32_t i = 0; i < l->available_cpus; i++) {
    1085           0 :         if (seen[siblings[i]] < 0) {
    1086           0 :             seen[siblings[i]] = (int32_t)i;
    1087             :         } else {
    1088           0 :             schedtbl[n_schedtbl++] = (uint32_t)seen[siblings[i]];
    1089           0 :             schedtbl[n_schedtbl++] = i;
    1090             :         }
    1091             :     }
    1092             : 
    1093           0 :     if (n_schedtbl != l->available_cpus)
    1094           0 :         memcpy(schedtbl, seen, l->available_cpus * sizeof(int));
    1095             : 
    1096           0 :     free(seen);
    1097           0 : }
    1098             : 
    1099             : static bool
    1100           0 : topology_to_schedtbl(struct lwan *l, uint32_t schedtbl[], uint32_t n_threads)
    1101             : {
    1102           0 :     uint32_t *siblings = calloc(l->available_cpus, sizeof(uint32_t));
    1103             : 
    1104           0 :     if (!siblings)
    1105           0 :         lwan_status_critical("Could not allocate siblings array");
    1106             : 
    1107           0 :     if (read_cpu_topology(l, siblings)) {
    1108           0 :         uint32_t *affinity = calloc(l->available_cpus, sizeof(uint32_t));
    1109             : 
    1110           0 :         if (!affinity)
    1111           0 :             lwan_status_critical("Could not allocate affinity array");
    1112             : 
    1113           0 :         siblings_to_schedtbl(l, siblings, affinity);
    1114             : 
    1115           0 :         for (uint32_t i = 0; i < n_threads; i++)
    1116           0 :             schedtbl[i] = affinity[i % l->available_cpus];
    1117             : 
    1118           0 :         free(affinity);
    1119           0 :         free(siblings);
    1120           0 :         return true;
    1121             :     }
    1122             : 
    1123           0 :     for (uint32_t i = 0; i < n_threads; i++)
    1124           0 :         schedtbl[i] = (i / 2) % l->thread.count;
    1125             : 
    1126           0 :     free(siblings);
    1127           0 :     return false;
    1128             : }
    1129             : 
    1130             : static void
    1131           0 : adjust_thread_affinity(const struct lwan_thread *thread)
    1132             : {
    1133             :     cpu_set_t set;
    1134             : 
    1135           0 :     CPU_ZERO(&set);
    1136           0 :     CPU_SET(thread->cpu, &set);
    1137             : 
    1138           0 :     if (pthread_setaffinity_np(thread->self, sizeof(set), &set))
    1139           0 :         lwan_status_warning("Could not set thread affinity");
    1140           0 : }
    1141             : #else
    1142             : #define adjust_thread_affinity(...)
    1143             : #endif
    1144             : 
    1145             : #if defined(LWAN_HAVE_MBEDTLS)
    1146           0 : static bool is_tls_ulp_supported(void)
    1147             : {
    1148           0 :     FILE *available_ulp = fopen("/proc/sys/net/ipv4/tcp_available_ulp", "re");
    1149             :     char buffer[512];
    1150           0 :     bool available = false;
    1151             : 
    1152           0 :     if (!available_ulp)
    1153           0 :         return false;
    1154             : 
    1155           0 :     if (fgets(buffer, 512, available_ulp)) {
    1156           0 :         if (strstr(buffer, "tls"))
    1157           0 :             available = true;
    1158             :     }
    1159             : 
    1160           0 :     fclose(available_ulp);
    1161           0 :     return available;
    1162             : }
    1163             : 
    1164          92 : static bool lwan_init_tls(struct lwan *l)
    1165             : {
    1166             :     static const int aes128_ciphers[] = {
    1167             :         /* Only allow Ephemeral Diffie-Hellman key exchange, so Perfect
    1168             :          * Forward Secrecy is possible.  */
    1169             :         MBEDTLS_TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
    1170             :         MBEDTLS_TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
    1171             :         MBEDTLS_TLS_DHE_RSA_WITH_AES_128_GCM_SHA256,
    1172             :         MBEDTLS_TLS_DHE_PSK_WITH_AES_128_GCM_SHA256,
    1173             : 
    1174             :         /* FIXME: Other ciphers are supported by kTLS, notably AES256 and
    1175             :          * ChaCha20-Poly1305.  Add those here and patch
    1176             :          * lwan_setup_tls_keys() to match.  */
    1177             : 
    1178             :         /* FIXME: Maybe allow this to be user-tunable like other servers do?  */
    1179             :         0,
    1180             :     };
    1181             :     int r;
    1182             : 
    1183          92 :     if (!l->config.ssl.cert || !l->config.ssl.key)
    1184          92 :         return false;
    1185             : 
    1186           0 :     if (!is_tls_ulp_supported()) {
    1187           0 :         lwan_status_critical(
    1188             :             "TLS ULP not loaded. Try running `modprobe tls` as root.");
    1189             :     }
    1190             : 
    1191           0 :     l->tls = calloc(1, sizeof(*l->tls));
    1192           0 :     if (!l->tls)
    1193           0 :         lwan_status_critical("Could not allocate memory for SSL context");
    1194             : 
    1195           0 :     lwan_status_debug("Initializing mbedTLS");
    1196             : 
    1197           0 :     mbedtls_ssl_config_init(&l->tls->config);
    1198           0 :     mbedtls_x509_crt_init(&l->tls->server_cert);
    1199           0 :     mbedtls_pk_init(&l->tls->server_key);
    1200           0 :     mbedtls_entropy_init(&l->tls->entropy);
    1201           0 :     mbedtls_ctr_drbg_init(&l->tls->ctr_drbg);
    1202             : 
    1203           0 :     r = mbedtls_x509_crt_parse_file(&l->tls->server_cert, l->config.ssl.cert);
    1204           0 :     if (r) {
    1205           0 :         lwan_status_mbedtls_error(r, "Could not parse certificate at %s",
    1206             :                                   l->config.ssl.cert);
    1207           0 :         abort();
    1208             :     }
    1209             : 
    1210           0 :     r = mbedtls_pk_parse_keyfile(&l->tls->server_key, l->config.ssl.key, NULL);
    1211           0 :     if (r) {
    1212           0 :         lwan_status_mbedtls_error(r, "Could not parse key file at %s",
    1213             :                                   l->config.ssl.key);
    1214           0 :         abort();
    1215             :     }
    1216             : 
    1217             :     /* Even though this points to files that will probably be outside
    1218             :      * the reach of the server (if straightjackets are used), wipe this
    1219             :      * struct to get rid of the paths to these files. */
    1220           0 :     lwan_always_bzero(l->config.ssl.cert, strlen(l->config.ssl.cert));
    1221           0 :     free(l->config.ssl.cert);
    1222           0 :     lwan_always_bzero(l->config.ssl.key, strlen(l->config.ssl.key));
    1223           0 :     free(l->config.ssl.key);
    1224           0 :     lwan_always_bzero(&l->config.ssl, sizeof(l->config.ssl));
    1225             : 
    1226           0 :     mbedtls_ssl_conf_ca_chain(&l->tls->config, l->tls->server_cert.next, NULL);
    1227           0 :     r = mbedtls_ssl_conf_own_cert(&l->tls->config, &l->tls->server_cert,
    1228           0 :                                   &l->tls->server_key);
    1229           0 :     if (r) {
    1230           0 :         lwan_status_mbedtls_error(r, "Could not set cert/key");
    1231           0 :         abort();
    1232             :     }
    1233             : 
    1234           0 :     r = mbedtls_ctr_drbg_seed(&l->tls->ctr_drbg, mbedtls_entropy_func,
    1235           0 :                               &l->tls->entropy, NULL, 0);
    1236           0 :     if (r) {
    1237           0 :         lwan_status_mbedtls_error(r, "Could not seed ctr_drbg");
    1238           0 :         abort();
    1239             :     }
    1240             : 
    1241           0 :     r = mbedtls_ssl_config_defaults(&l->tls->config, MBEDTLS_SSL_IS_SERVER,
    1242             :                                     MBEDTLS_SSL_TRANSPORT_STREAM,
    1243             :                                     MBEDTLS_SSL_PRESET_DEFAULT);
    1244           0 :     if (r) {
    1245           0 :         lwan_status_mbedtls_error(r, "Could not set mbedTLS default config");
    1246           0 :         abort();
    1247             :     }
    1248             : 
    1249           0 :     mbedtls_ssl_conf_rng(&l->tls->config, mbedtls_ctr_drbg_random,
    1250           0 :                          &l->tls->ctr_drbg);
    1251           0 :     mbedtls_ssl_conf_ciphersuites(&l->tls->config, aes128_ciphers);
    1252             : 
    1253           0 :     mbedtls_ssl_conf_renegotiation(&l->tls->config,
    1254             :                                    MBEDTLS_SSL_RENEGOTIATION_DISABLED);
    1255           0 :     mbedtls_ssl_conf_legacy_renegotiation(&l->tls->config,
    1256             :                                           MBEDTLS_SSL_LEGACY_NO_RENEGOTIATION);
    1257             : 
    1258             : #if defined(MBEDTLS_SSL_ALPN)
    1259             :     static const char *alpn_protos[] = {"http/1.1", NULL};
    1260           0 :     mbedtls_ssl_conf_alpn_protocols(&l->tls->config, alpn_protos);
    1261             : #endif
    1262             : 
    1263           0 :     return true;
    1264             : }
    1265             : #endif
    1266             : 
    1267          92 : void lwan_thread_init(struct lwan *l)
    1268             : {
    1269          92 :     const unsigned int total_conns = l->thread.max_fd * l->thread.count;
    1270             : #if defined(LWAN_HAVE_MBEDTLS)
    1271          92 :     const bool tls_initialized = lwan_init_tls(l);
    1272             : #else
    1273             :     const bool tls_initialized = false;
    1274             : #endif
    1275             : 
    1276          92 :     lwan_status_debug("Initializing threads");
    1277             : 
    1278          92 :     l->thread.threads =
    1279          92 :         calloc((size_t)l->thread.count, sizeof(struct lwan_thread));
    1280          92 :     if (!l->thread.threads)
    1281           0 :         lwan_status_critical("Could not allocate memory for threads");
    1282             : 
    1283             :     uint32_t *schedtbl;
    1284             :     bool adj_affinity;
    1285             : 
    1286             : #if defined(__x86_64__) && defined(__linux__)
    1287          92 :     if (l->online_cpus > 1) {
    1288             :         static_assert(sizeof(struct lwan_connection) == 32,
    1289             :                       "Two connections per cache line");
    1290             : #ifdef _SC_LEVEL1_DCACHE_LINESIZE
    1291           0 :         assert(sysconf(_SC_LEVEL1_DCACHE_LINESIZE) == 64);
    1292             : #endif
    1293           0 :         lwan_status_debug("%d CPUs of %d are online. "
    1294             :                           "Reading topology to pre-schedule clients",
    1295             :                           l->online_cpus, l->available_cpus);
    1296             :         /*
    1297             :          * Pre-schedule each file descriptor, to reduce some operations in the
    1298             :          * fast path.
    1299             :          *
    1300             :          * Since struct lwan_connection is guaranteed to be 32-byte long, two of
    1301             :          * them can fill up a cache line.  Assume siblings share cache lines and
    1302             :          * use the CPU topology to group two connections per cache line in such
    1303             :          * a way that false sharing is avoided.
    1304             :          */
    1305           0 :         schedtbl = calloc(l->thread.count, sizeof(uint32_t));
    1306           0 :         adj_affinity = topology_to_schedtbl(l, schedtbl, l->thread.count);
    1307             : 
    1308           0 :         for (unsigned int i = 0; i < total_conns; i++)
    1309           0 :             l->conns[i].thread = &l->thread.threads[schedtbl[i % l->thread.count]];
    1310             :     } else
    1311             : #endif /* __x86_64__ && __linux__ */
    1312             :     {
    1313          92 :         lwan_status_debug("Using round-robin to preschedule clients");
    1314             : 
    1315         276 :         for (unsigned int i = 0; i < l->thread.count; i++)
    1316         184 :             l->thread.threads[i].cpu = i % l->online_cpus;
    1317    48234588 :         for (unsigned int i = 0; i < total_conns; i++)
    1318    48234496 :             l->conns[i].thread = &l->thread.threads[i % l->thread.count];
    1319             : 
    1320          92 :         schedtbl = NULL;
    1321          92 :         adj_affinity = false;
    1322             :     }
    1323             : 
    1324         276 :     for (unsigned int i = 0; i < l->thread.count; i++) {
    1325         184 :         struct lwan_thread *thread = NULL;
    1326             : 
    1327         184 :         if (schedtbl) {
    1328             :             /* This is not the most elegant thing, but this assures that the
    1329             :              * listening sockets are added to the SO_REUSEPORT group in a
    1330             :              * specific order, because that's what the CBPF program to direct
    1331             :              * the incoming connection to the right CPU will use. */
    1332           0 :             for (uint32_t thread_id = 0; thread_id < l->thread.count;
    1333           0 :                  thread_id++) {
    1334           0 :                 if (schedtbl[thread_id % l->thread.count] == i) {
    1335           0 :                     thread = &l->thread.threads[thread_id];
    1336           0 :                     break;
    1337             :                 }
    1338             :             }
    1339           0 :             if (!thread) {
    1340             :                 /* FIXME: can this happen when we have a offline CPU? */
    1341           0 :                 lwan_status_critical(
    1342             :                     "Could not figure out which CPU thread %d should go to", i);
    1343             :             }
    1344             :         } else {
    1345         184 :             thread = &l->thread.threads[i % l->thread.count];
    1346             :         }
    1347             : 
    1348         184 :         if (pthread_barrier_init(&l->thread.barrier, NULL, 2))
    1349           0 :             lwan_status_critical("Could not create barrier");
    1350             : 
    1351         184 :         create_thread(l, thread);
    1352             : 
    1353         184 :         if ((thread->listen_fd = create_listen_socket(thread, i, false)) < 0)
    1354           0 :             lwan_status_critical_perror("Could not create listening socket");
    1355         184 :         l->conns[thread->listen_fd].flags |= CONN_LISTENER_HTTP;
    1356             : 
    1357         184 :         if (tls_initialized) {
    1358           0 :             if ((thread->tls_listen_fd = create_listen_socket(thread, i, true)) < 0)
    1359           0 :                 lwan_status_critical_perror("Could not create TLS listening socket");
    1360           0 :             l->conns[thread->tls_listen_fd].flags |= CONN_LISTENER_HTTPS;
    1361             :         } else {
    1362         184 :             thread->tls_listen_fd = -1;
    1363             :         }
    1364             : 
    1365         184 :         if (adj_affinity) {
    1366           0 :             l->thread.threads[i].cpu = schedtbl[i % l->thread.count];
    1367           0 :             adjust_thread_affinity(thread);
    1368             :         }
    1369             : 
    1370         184 :         pthread_barrier_wait(&l->thread.barrier);
    1371             :     }
    1372             : 
    1373          92 :     lwan_status_debug("Worker threads created and ready to serve");
    1374             : 
    1375          92 :     free(schedtbl);
    1376          92 : }
    1377             : 
    1378           0 : void lwan_thread_shutdown(struct lwan *l)
    1379             : {
    1380           0 :     lwan_status_debug("Shutting down threads");
    1381             : 
    1382           0 :     for (unsigned int i = 0; i < l->thread.count; i++) {
    1383           0 :         struct lwan_thread *t = &l->thread.threads[i];
    1384           0 :         int epoll_fd = t->epoll_fd;
    1385           0 :         int listen_fd = t->listen_fd;
    1386             : 
    1387           0 :         t->listen_fd = -1;
    1388           0 :         t->epoll_fd = -1;
    1389           0 :         close(epoll_fd);
    1390           0 :         close(listen_fd);
    1391             :     }
    1392             : 
    1393           0 :     pthread_barrier_wait(&l->thread.barrier);
    1394           0 :     pthread_barrier_destroy(&l->thread.barrier);
    1395             : 
    1396           0 :     for (unsigned int i = 0; i < l->thread.count; i++) {
    1397           0 :         struct lwan_thread *t = &l->thread.threads[i];
    1398             : 
    1399           0 :         pthread_join(l->thread.threads[i].self, NULL);
    1400           0 :         timeouts_close(t->wheel);
    1401             :     }
    1402             : 
    1403           0 :     free(l->thread.threads);
    1404             : 
    1405             : #if defined(LWAN_HAVE_MBEDTLS)
    1406           0 :     if (l->tls) {
    1407           0 :         mbedtls_ssl_config_free(&l->tls->config);
    1408           0 :         mbedtls_x509_crt_free(&l->tls->server_cert);
    1409           0 :         mbedtls_pk_free(&l->tls->server_key);
    1410           0 :         mbedtls_entropy_free(&l->tls->entropy);
    1411           0 :         mbedtls_ctr_drbg_free(&l->tls->ctr_drbg);
    1412           0 :         free(l->tls);
    1413             :     }
    1414             : #endif
    1415           0 : }

Generated by: LCOV version 1.15-2-gb9d6727