Line data Source code
1 : /*
2 : * lwan - web server
3 : * Copyright (c) 2012, 2013 L. A. F. Pereira <l@tia.mat.br>
4 : *
5 : * This program is free software; you can redistribute it and/or
6 : * modify it under the terms of the GNU General Public License
7 : * as published by the Free Software Foundation; either version 2
8 : * of the License, or any later version.
9 : *
10 : * This program is distributed in the hope that it will be useful,
11 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 : * GNU General Public License for more details.
14 : *
15 : * You should have received a copy of the GNU General Public License
16 : * along with this program; if not, write to the Free Software
17 : * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18 : * USA.
19 : */
20 :
21 : #define _GNU_SOURCE
22 : #include <assert.h>
23 : #include <errno.h>
24 : #include <fcntl.h>
25 : #include <pthread.h>
26 : #include <sched.h>
27 : #include <stdlib.h>
28 : #include <string.h>
29 : #include <sys/epoll.h>
30 : #include <sys/ioctl.h>
31 : #include <sys/socket.h>
32 : #include <unistd.h>
33 :
34 : #if defined(LWAN_HAVE_SO_ATTACH_REUSEPORT_CBPF)
35 : #include <linux/filter.h>
36 : #endif
37 :
38 : #if defined(LWAN_HAVE_MBEDTLS)
39 : #include <mbedtls/entropy.h>
40 : #include <mbedtls/error.h>
41 : #include <mbedtls/gcm.h>
42 : #include <mbedtls/net_sockets.h>
43 : #include <mbedtls/ssl_internal.h>
44 :
45 : #include <linux/tls.h>
46 : #include <netinet/tcp.h>
47 : #endif
48 :
49 : #include "list.h"
50 : #include "murmur3.h"
51 : #include "lwan-private.h"
52 : #include "lwan-tq.h"
53 :
54 237 : static void lwan_strbuf_free_defer(void *data)
55 : {
56 237 : return lwan_strbuf_free((struct lwan_strbuf *)data);
57 : }
58 :
59 19 : static void graceful_close(struct lwan *l,
60 : struct lwan_connection *conn,
61 : char buffer[static DEFAULT_BUFFER_SIZE])
62 : {
63 19 : int fd = lwan_connection_get_fd(l, conn);
64 :
65 0 : while (TIOCOUTQ) {
66 : /* This ioctl isn't probably doing what it says on the tin; the details
67 : * are subtle, but it seems to do the trick to allow gracefully closing
68 : * the connection in some cases with minimal system calls. */
69 : int bytes_waiting;
70 19 : int r = ioctl(fd, TIOCOUTQ, &bytes_waiting);
71 :
72 19 : if (!r && !bytes_waiting) /* See note about close(2) below. */
73 19 : return;
74 0 : if (r < 0 && errno == EINTR)
75 0 : continue;
76 :
77 0 : break;
78 : }
79 :
80 0 : if (UNLIKELY(shutdown(fd, SHUT_WR) < 0)) {
81 0 : if (UNLIKELY(errno == ENOTCONN))
82 0 : return;
83 : }
84 :
85 0 : for (int tries = 0; tries < 20; tries++) {
86 0 : ssize_t r = recv(fd, buffer, DEFAULT_BUFFER_SIZE, MSG_TRUNC);
87 :
88 0 : if (!r)
89 0 : break;
90 :
91 0 : if (r < 0) {
92 0 : switch (errno) {
93 0 : case EAGAIN:
94 0 : break;
95 0 : case EINTR:
96 0 : continue;
97 0 : default:
98 0 : return;
99 : }
100 : }
101 :
102 0 : coro_yield(conn->coro, CONN_CORO_WANT_READ);
103 : }
104 :
105 : /* close(2) will be called when the coroutine yields with CONN_CORO_ABORT */
106 : }
107 :
108 : #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
109 : static void lwan_random_seed_prng_for_thread(const struct lwan_thread *t)
110 : {
111 : (void)t;
112 : }
113 :
114 : uint64_t lwan_random_uint64()
115 : {
116 : static uint64_t value;
117 :
118 : return ATOMIC_INC(value);
119 : }
120 : #else
121 : static __thread __uint128_t lehmer64_state;
122 :
123 184 : static void lwan_random_seed_prng_for_thread(const struct lwan_thread *t)
124 : {
125 184 : if (lwan_getentropy(&lehmer64_state, sizeof(lehmer64_state), 0) < 0) {
126 0 : lwan_status_warning("Couldn't get proper entropy for PRNG, using fallback seed");
127 0 : lehmer64_state |= murmur3_fmix64((uint64_t)(uintptr_t)t);
128 0 : lehmer64_state <<= 64;
129 0 : lehmer64_state |= murmur3_fmix64((uint64_t)t->epoll_fd);
130 : }
131 184 : }
132 :
133 1614 : uint64_t lwan_random_uint64()
134 : {
135 : /* https://lemire.me/blog/2019/03/19/the-fastest-conventional-random-number-generator-that-can-pass-big-crush/ */
136 1614 : lehmer64_state *= 0xda942042e4dd58b5ull;
137 1614 : return (uint64_t)(lehmer64_state >> 64);
138 : }
139 : #endif
140 :
141 909 : uint64_t lwan_request_get_id(struct lwan_request *request)
142 : {
143 909 : struct lwan_request_parser_helper *helper = request->helper;
144 :
145 909 : if (helper->request_id == 0)
146 499 : helper->request_id = lwan_random_uint64();
147 :
148 909 : return helper->request_id;
149 : }
150 :
151 : #if defined(LWAN_HAVE_MBEDTLS)
152 : static bool
153 0 : lwan_setup_tls_keys(int fd, const mbedtls_ssl_context *ssl, int rx_or_tx)
154 : {
155 0 : struct tls12_crypto_info_aes_gcm_128 info = {
156 : .info = {.version = TLS_1_2_VERSION,
157 : .cipher_type = TLS_CIPHER_AES_GCM_128},
158 : };
159 : const unsigned char *salt, *iv, *rec_seq;
160 : const mbedtls_gcm_context *gcm_ctx;
161 : const mbedtls_aes_context *aes_ctx;
162 :
163 0 : switch (rx_or_tx) {
164 0 : case TLS_RX:
165 0 : salt = ssl->transform->iv_dec;
166 0 : rec_seq = ssl->in_ctr;
167 0 : gcm_ctx = ssl->transform->cipher_ctx_dec.cipher_ctx;
168 0 : break;
169 0 : case TLS_TX:
170 0 : salt = ssl->transform->iv_enc;
171 0 : rec_seq = ssl->cur_out_ctr;
172 0 : gcm_ctx = ssl->transform->cipher_ctx_enc.cipher_ctx;
173 0 : break;
174 0 : default:
175 0 : __builtin_unreachable();
176 : }
177 :
178 0 : iv = salt + 4;
179 0 : aes_ctx = gcm_ctx->cipher_ctx.cipher_ctx;
180 :
181 0 : memcpy(info.iv, iv, TLS_CIPHER_AES_GCM_128_IV_SIZE);
182 0 : memcpy(info.rec_seq, rec_seq, TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
183 0 : memcpy(info.key, aes_ctx->rk, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
184 0 : memcpy(info.salt, salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
185 :
186 0 : if (UNLIKELY(setsockopt(fd, SOL_TLS, rx_or_tx, &info, sizeof(info)) < 0)) {
187 0 : lwan_status_perror("Could not set %s kTLS keys for fd %d",
188 : rx_or_tx == TLS_TX ? "transmission" : "reception",
189 : fd);
190 0 : lwan_always_bzero(&info, sizeof(info));
191 0 : return false;
192 : }
193 :
194 0 : lwan_always_bzero(&info, sizeof(info));
195 0 : return true;
196 : }
197 :
198 : __attribute__((format(printf, 2, 3)))
199 : __attribute__((noinline, cold))
200 0 : static void lwan_status_mbedtls_error(int error_code, const char *fmt, ...)
201 : {
202 : char *formatted;
203 : va_list ap;
204 : int r;
205 :
206 0 : va_start(ap, fmt);
207 0 : r = vasprintf(&formatted, fmt, ap);
208 0 : if (r >= 0) {
209 : char mbedtls_errbuf[128];
210 :
211 0 : mbedtls_strerror(error_code, mbedtls_errbuf, sizeof(mbedtls_errbuf));
212 0 : lwan_status_error("%s: %s", formatted, mbedtls_errbuf);
213 0 : free(formatted);
214 : }
215 0 : va_end(ap);
216 0 : }
217 :
218 0 : static void lwan_setup_tls_free_ssl_context(void *data)
219 : {
220 0 : mbedtls_ssl_context *ssl = data;
221 :
222 0 : mbedtls_ssl_free(ssl);
223 0 : }
224 :
225 : struct lwan_mbedtls_handshake_ctx {
226 : int fd;
227 : bool last_was_send;
228 : };
229 :
230 0 : static int lwan_mbedtls_send(void *ctx, const unsigned char *buf, size_t len)
231 : {
232 0 : struct lwan_mbedtls_handshake_ctx *hs_ctx = ctx;
233 : ssize_t r;
234 :
235 : /* We use MSG_MORE -- flushing when we transition from send() to recv()
236 : * -- rather than buffering on our side because this contains key
237 : * material that we would need to only copy, but also zero out after
238 : * finishing the handshake. */
239 :
240 0 : r = send(hs_ctx->fd, buf, len, MSG_MORE);
241 0 : if (UNLIKELY(r < 0)) {
242 0 : switch (errno) {
243 0 : case EINTR:
244 : case EAGAIN:
245 0 : return MBEDTLS_ERR_SSL_WANT_WRITE;
246 :
247 0 : default:
248 : /* It's not an internal error here, but this seemed the least
249 : * innapropriate error code for this situation. lwan_setup_tls()
250 : * doesn't care. */
251 0 : return MBEDTLS_ERR_SSL_INTERNAL_ERROR;
252 : }
253 : }
254 :
255 0 : if (UNLIKELY((ssize_t)(int)r != r))
256 0 : return MBEDTLS_ERR_SSL_INTERNAL_ERROR;
257 :
258 0 : hs_ctx->last_was_send = true;
259 0 : return (int)r;
260 : }
261 :
262 0 : static void flush_pending_output(int fd)
263 : {
264 0 : int zero = 0;
265 0 : setsockopt(fd, SOL_TCP, TCP_CORK, &zero, sizeof(zero));
266 0 : }
267 :
268 0 : static int lwan_mbedtls_recv(void *ctx, unsigned char *buf, size_t len)
269 : {
270 0 : struct lwan_mbedtls_handshake_ctx *hs_ctx = ctx;
271 : ssize_t r;
272 :
273 0 : if (hs_ctx->last_was_send) {
274 0 : flush_pending_output(hs_ctx->fd);
275 0 : hs_ctx->last_was_send = false;
276 : }
277 :
278 0 : r = recv(hs_ctx->fd, buf, len, 0);
279 0 : if (UNLIKELY(r < 0)) {
280 0 : switch (errno) {
281 0 : case EINTR:
282 : case EAGAIN:
283 0 : return MBEDTLS_ERR_SSL_WANT_READ;
284 :
285 0 : default:
286 0 : return MBEDTLS_ERR_SSL_INTERNAL_ERROR;
287 : }
288 : }
289 :
290 0 : if (UNLIKELY((ssize_t)(int)r != r))
291 0 : return MBEDTLS_ERR_SSL_INTERNAL_ERROR;
292 :
293 0 : return (int)r;
294 : }
295 :
296 0 : static bool lwan_setup_tls(const struct lwan *l, struct lwan_connection *conn)
297 : {
298 : mbedtls_ssl_context ssl;
299 0 : bool retval = false;
300 : int r;
301 :
302 0 : mbedtls_ssl_init(&ssl);
303 :
304 0 : r = mbedtls_ssl_setup(&ssl, &l->tls->config);
305 0 : if (UNLIKELY(r != 0)) {
306 0 : lwan_status_mbedtls_error(r, "Could not setup TLS context");
307 0 : return false;
308 : }
309 :
310 : /* Yielding the coroutine during the handshake enables the I/O loop to
311 : * destroy this coro (e.g. on connection hangup) before we have the
312 : * opportunity to free the SSL context. Defer this call for these
313 : * cases. */
314 : coro_deferred defer =
315 0 : coro_defer(conn->coro, lwan_setup_tls_free_ssl_context, &ssl);
316 :
317 0 : if (UNLIKELY(!defer)) {
318 0 : lwan_status_error("Could not defer cleanup of the TLS context");
319 0 : return false;
320 : }
321 :
322 0 : int fd = lwan_connection_get_fd(l, conn);
323 :
324 0 : struct lwan_mbedtls_handshake_ctx ctx = { .fd = fd };
325 0 : mbedtls_ssl_set_bio(&ssl, &ctx, lwan_mbedtls_send,
326 : lwan_mbedtls_recv, NULL);
327 :
328 : while (true) {
329 0 : switch (mbedtls_ssl_handshake(&ssl)) {
330 0 : case 0:
331 0 : flush_pending_output(fd);
332 0 : goto enable_tls_ulp;
333 0 : case MBEDTLS_ERR_SSL_ASYNC_IN_PROGRESS:
334 : case MBEDTLS_ERR_SSL_CRYPTO_IN_PROGRESS:
335 : case MBEDTLS_ERR_SSL_WANT_READ:
336 0 : coro_yield(conn->coro, CONN_CORO_WANT_READ);
337 0 : break;
338 0 : case MBEDTLS_ERR_SSL_WANT_WRITE:
339 0 : coro_yield(conn->coro, CONN_CORO_WANT_WRITE);
340 0 : break;
341 0 : default:
342 0 : goto fail;
343 : }
344 : }
345 :
346 0 : enable_tls_ulp:
347 0 : if (UNLIKELY(setsockopt(fd, SOL_TCP, TCP_ULP, "tls", sizeof("tls")) < 0))
348 0 : goto fail;
349 0 : if (UNLIKELY(!lwan_setup_tls_keys(fd, &ssl, TLS_RX)))
350 0 : goto fail;
351 0 : if (UNLIKELY(!lwan_setup_tls_keys(fd, &ssl, TLS_TX)))
352 0 : goto fail;
353 :
354 0 : retval = true;
355 :
356 0 : fail:
357 0 : coro_defer_disarm(conn->coro, defer);
358 0 : mbedtls_ssl_free(&ssl);
359 0 : return retval;
360 : }
361 : #endif
362 :
363 332 : __attribute__((noreturn)) static int process_request_coro(struct coro *coro,
364 : void *data)
365 : {
366 : /* NOTE: This function should not return; coro_yield should be used
367 : * instead. This ensures the storage for `strbuf` is alive when the
368 : * coroutine ends and lwan_strbuf_free() is called. */
369 332 : struct lwan_connection *conn = data;
370 332 : struct lwan *lwan = conn->thread->lwan;
371 332 : int fd = lwan_connection_get_fd(lwan, conn);
372 332 : enum lwan_request_flags flags = lwan->config.request_flags;
373 332 : struct lwan_strbuf strbuf = LWAN_STRBUF_STATIC_INIT;
374 : char request_buffer[DEFAULT_BUFFER_SIZE];
375 332 : struct lwan_value buffer = {.value = request_buffer, .len = 0};
376 332 : char *next_request = NULL;
377 : char *header_start[N_HEADER_START];
378 : struct lwan_proxy proxy;
379 332 : const int error_when_n_packets = lwan_calculate_n_packets(DEFAULT_BUFFER_SIZE);
380 :
381 332 : coro_defer(coro, lwan_strbuf_free_defer, &strbuf);
382 :
383 332 : const size_t init_gen = 1; /* 1 call to coro_defer() */
384 332 : assert(init_gen == coro_deferred_get_generation(coro));
385 :
386 : #if defined(LWAN_HAVE_MBEDTLS)
387 332 : if (conn->flags & CONN_TLS) {
388 0 : if (UNLIKELY(!lwan_setup_tls(lwan, conn))) {
389 0 : coro_yield(conn->coro, CONN_CORO_ABORT);
390 0 : __builtin_unreachable();
391 : }
392 : }
393 : #else
394 : assert(!(conn->flags & CONN_TLS));
395 : #endif
396 :
397 268 : while (true) {
398 600 : struct lwan_request_parser_helper helper = {
399 : .buffer = &buffer,
400 : .next_request = next_request,
401 : .error_when_n_packets = error_when_n_packets,
402 : .header_start = header_start,
403 : };
404 600 : struct lwan_request request = {.conn = conn,
405 600 : .global_response_headers = &lwan->headers,
406 : .fd = fd,
407 : .response = {.buffer = &strbuf},
408 : .flags = flags,
409 : .proxy = &proxy,
410 : .helper = &helper};
411 :
412 600 : lwan_process_request(lwan, &request);
413 :
414 : /* Run the deferred instructions now (except those used to initialize
415 : * the coroutine), so that if the connection is gracefully closed,
416 : * the storage for ``helper'' is still there. */
417 504 : coro_deferred_run(coro, init_gen);
418 :
419 504 : if (UNLIKELY(!(conn->flags & CONN_IS_KEEP_ALIVE))) {
420 19 : graceful_close(lwan, conn, request_buffer);
421 19 : break;
422 : }
423 :
424 485 : if (next_request && *next_request) {
425 255 : conn->flags |= CONN_CORK;
426 :
427 255 : if (!(conn->flags & CONN_EVENTS_WRITE))
428 13 : coro_yield(coro, CONN_CORO_WANT_WRITE);
429 : } else {
430 230 : conn->flags &= ~CONN_CORK;
431 230 : coro_yield(coro, CONN_CORO_WANT_READ);
432 : }
433 :
434 : /* Ensure string buffer is reset between requests, and that the backing
435 : * store isn't over 2KB. */
436 268 : lwan_strbuf_reset_trim(&strbuf, 2048);
437 :
438 : /* Only allow flags from config. */
439 268 : flags = request.flags & (REQUEST_PROXIED | REQUEST_ALLOW_CORS | REQUEST_WANTS_HSTS_HEADER);
440 268 : next_request = helper.next_request;
441 : }
442 :
443 19 : coro_yield(coro, CONN_CORO_ABORT);
444 0 : __builtin_unreachable();
445 : }
446 :
447 : static ALWAYS_INLINE uint32_t
448 : conn_flags_to_epoll_events(enum lwan_connection_flags flags)
449 : {
450 : static const uint32_t map[CONN_EVENTS_MASK + 1] = {
451 : [0 /* Suspended (timer or await) */] = EPOLLRDHUP,
452 : [CONN_EVENTS_WRITE] = EPOLLOUT | EPOLLRDHUP,
453 : [CONN_EVENTS_READ] = EPOLLIN | EPOLLRDHUP,
454 : [CONN_EVENTS_READ_WRITE] = EPOLLIN | EPOLLOUT | EPOLLRDHUP,
455 : };
456 :
457 0 : return map[flags & CONN_EVENTS_MASK];
458 : }
459 :
460 264 : static void update_epoll_flags(const struct timeout_queue *tq,
461 : struct lwan_connection *conn,
462 : int epoll_fd,
463 : enum lwan_connection_coro_yield yield_result)
464 : {
465 : static const enum lwan_connection_flags or_mask[CONN_CORO_MAX] = {
466 : [CONN_CORO_YIELD] = 0,
467 :
468 : [CONN_CORO_WANT_READ_WRITE] = CONN_EVENTS_READ_WRITE,
469 : [CONN_CORO_WANT_READ] = CONN_EVENTS_READ,
470 : [CONN_CORO_WANT_WRITE] = CONN_EVENTS_WRITE,
471 :
472 : /* While the coro is suspended, we're not interested in either EPOLLIN
473 : * or EPOLLOUT events. We still want to track this fd in epoll, though,
474 : * so unset both so that only EPOLLRDHUP (plus the implicitly-set ones)
475 : * are set. */
476 : [CONN_CORO_SUSPEND] = CONN_SUSPENDED,
477 :
478 : /* Ideally, when suspending a coroutine, the current flags&CONN_EVENTS_MASK
479 : * would have to be stored and restored -- however, resuming as if the
480 : * client coroutine is interested in a write event always guarantees that
481 : * they'll be resumed as they're TCP sockets. There's a good chance that
482 : * trying to read from a socket after resuming a coroutine will succeed,
483 : * but if it doesn't because read() returns -EAGAIN, the I/O wrappers will
484 : * yield with CONN_CORO_WANT_READ anyway. */
485 : [CONN_CORO_RESUME] = CONN_EVENTS_WRITE,
486 : };
487 : static const enum lwan_connection_flags and_mask[CONN_CORO_MAX] = {
488 : [CONN_CORO_YIELD] = ~0,
489 :
490 : [CONN_CORO_WANT_READ_WRITE] = ~0,
491 : [CONN_CORO_WANT_READ] = ~CONN_EVENTS_WRITE,
492 : [CONN_CORO_WANT_WRITE] = ~CONN_EVENTS_READ,
493 :
494 : [CONN_CORO_SUSPEND] = ~CONN_EVENTS_READ_WRITE,
495 : [CONN_CORO_RESUME] = ~CONN_SUSPENDED,
496 : };
497 264 : enum lwan_connection_flags prev_flags = conn->flags;
498 :
499 264 : conn->flags |= or_mask[yield_result];
500 264 : conn->flags &= and_mask[yield_result];
501 :
502 264 : assert(!(conn->flags & (CONN_LISTENER_HTTP | CONN_LISTENER_HTTPS)));
503 264 : assert((conn->flags & CONN_TLS) == (prev_flags & CONN_TLS));
504 :
505 264 : if (conn->flags == prev_flags)
506 235 : return;
507 :
508 29 : struct epoll_event event = {.events = conn_flags_to_epoll_events(conn->flags),
509 : .data.ptr = conn};
510 29 : int fd = lwan_connection_get_fd(tq->lwan, conn);
511 :
512 29 : if (UNLIKELY(epoll_ctl(epoll_fd, EPOLL_CTL_MOD, fd, &event) < 0))
513 0 : lwan_status_perror("epoll_ctl");
514 : }
515 :
516 0 : static void clear_async_await_flag(void *data)
517 : {
518 0 : struct lwan_connection *async_fd_conn = data;
519 :
520 0 : async_fd_conn->flags &= ~CONN_ASYNC_AWAIT;
521 0 : }
522 :
523 : static enum lwan_connection_coro_yield
524 0 : resume_async(const struct timeout_queue *tq,
525 : enum lwan_connection_coro_yield yield_result,
526 : int64_t from_coro,
527 : struct lwan_connection *conn,
528 : int epoll_fd)
529 : {
530 : static const enum lwan_connection_flags to_connection_flags[] = {
531 : [CONN_CORO_ASYNC_AWAIT_READ] = CONN_EVENTS_READ,
532 : [CONN_CORO_ASYNC_AWAIT_WRITE] = CONN_EVENTS_WRITE,
533 : [CONN_CORO_ASYNC_AWAIT_READ_WRITE] = CONN_EVENTS_READ_WRITE,
534 : };
535 0 : int await_fd = (int)((uint64_t)from_coro >> 32);
536 : enum lwan_connection_flags flags;
537 : int op;
538 :
539 0 : assert(await_fd >= 0);
540 0 : assert(yield_result >= CONN_CORO_ASYNC_AWAIT_READ &&
541 : yield_result <= CONN_CORO_ASYNC_AWAIT_READ_WRITE);
542 :
543 0 : flags = to_connection_flags[yield_result];
544 :
545 0 : struct lwan_connection *await_fd_conn = &tq->lwan->conns[await_fd];
546 0 : if (LIKELY(await_fd_conn->flags & CONN_ASYNC_AWAIT)) {
547 0 : if (LIKELY((await_fd_conn->flags & CONN_EVENTS_MASK) == flags))
548 0 : return CONN_CORO_SUSPEND;
549 :
550 0 : op = EPOLL_CTL_MOD;
551 : } else {
552 0 : op = EPOLL_CTL_ADD;
553 0 : flags |= CONN_ASYNC_AWAIT;
554 0 : coro_defer(conn->coro, clear_async_await_flag, await_fd_conn);
555 : }
556 :
557 0 : struct epoll_event event = {.events = conn_flags_to_epoll_events(flags),
558 : .data.ptr = conn};
559 0 : if (LIKELY(!epoll_ctl(epoll_fd, op, await_fd, &event))) {
560 0 : await_fd_conn->flags &= ~CONN_EVENTS_MASK;
561 0 : await_fd_conn->flags |= flags;
562 0 : return CONN_CORO_SUSPEND;
563 : }
564 :
565 0 : return CONN_CORO_ABORT;
566 : }
567 :
568 : static ALWAYS_INLINE void resume_coro(struct timeout_queue *tq,
569 : struct lwan_connection *conn,
570 : int epoll_fd)
571 : {
572 377 : assert(conn->coro);
573 :
574 377 : int64_t from_coro = coro_resume(conn->coro);
575 282 : enum lwan_connection_coro_yield yield_result = from_coro & 0xffffffff;
576 :
577 282 : if (UNLIKELY(yield_result >= CONN_CORO_ASYNC)) {
578 : yield_result =
579 0 : resume_async(tq, yield_result, from_coro, conn, epoll_fd);
580 : }
581 :
582 282 : if (UNLIKELY(yield_result == CONN_CORO_ABORT)) {
583 19 : timeout_queue_expire(tq, conn);
584 : } else {
585 263 : update_epoll_flags(tq, conn, epoll_fd, yield_result);
586 263 : timeout_queue_move_to_last(tq, conn);
587 : }
588 282 : }
589 :
590 190 : static void update_date_cache(struct lwan_thread *thread)
591 : {
592 190 : time_t now = time(NULL);
593 :
594 190 : lwan_format_rfc_time(now, thread->date.date);
595 190 : lwan_format_rfc_time(now + (time_t)thread->lwan->config.expires,
596 190 : thread->date.expires);
597 190 : }
598 :
599 : __attribute__((cold))
600 0 : static bool send_buffer_without_coro(int fd, const char *buf, size_t buf_len, int flags)
601 : {
602 0 : size_t total_sent = 0;
603 :
604 0 : for (int try = 0; try < 10; try++) {
605 0 : size_t to_send = buf_len - total_sent;
606 0 : if (!to_send)
607 0 : return true;
608 :
609 0 : ssize_t sent = send(fd, buf + total_sent, to_send, flags);
610 0 : if (sent <= 0) {
611 0 : if (errno == EINTR)
612 0 : continue;
613 0 : if (errno == EAGAIN)
614 0 : continue;
615 0 : break;
616 : }
617 :
618 0 : total_sent += (size_t)sent;
619 : }
620 :
621 0 : return false;
622 : }
623 :
624 : __attribute__((cold))
625 0 : static bool send_string_without_coro(int fd, const char *str, int flags)
626 : {
627 0 : return send_buffer_without_coro(fd, str, strlen(str), flags);
628 : }
629 :
630 : __attribute__((cold)) static void
631 0 : send_last_response_without_coro(const struct lwan *l,
632 : const struct lwan_connection *conn,
633 : enum lwan_http_status status)
634 : {
635 0 : int fd = lwan_connection_get_fd(l, conn);
636 :
637 0 : if (conn->flags & CONN_TLS) {
638 : /* There's nothing that can be done here if a client is expecting a
639 : * TLS connection: the TLS handshake requires a coroutine as it
640 : * might yield. (In addition, the TLS handshake might allocate
641 : * memory, and if you couldn't create a coroutine at this point,
642 : * it's unlikely you'd be able to allocate memory for the TLS
643 : * context anyway.) */
644 0 : goto shutdown_and_close;
645 : }
646 :
647 0 : if (!send_string_without_coro(fd, "HTTP/1.0 ", MSG_MORE))
648 0 : goto shutdown_and_close;
649 :
650 0 : if (!send_string_without_coro(
651 : fd, lwan_http_status_as_string_with_code(status), MSG_MORE))
652 0 : goto shutdown_and_close;
653 :
654 0 : if (!send_string_without_coro(fd, "\r\nConnection: close", MSG_MORE))
655 0 : goto shutdown_and_close;
656 :
657 0 : if (!send_string_without_coro(fd, "\r\nContent-Type: text/html", MSG_MORE))
658 0 : goto shutdown_and_close;
659 :
660 0 : if (send_buffer_without_coro(fd, l->headers.value, l->headers.len,
661 : MSG_MORE)) {
662 : struct lwan_strbuf buffer;
663 :
664 0 : lwan_strbuf_init(&buffer);
665 0 : lwan_fill_default_response(&buffer, status);
666 :
667 0 : send_buffer_without_coro(fd, lwan_strbuf_get_buffer(&buffer),
668 : lwan_strbuf_get_length(&buffer), 0);
669 :
670 0 : lwan_strbuf_free(&buffer);
671 : }
672 :
673 0 : shutdown_and_close:
674 0 : shutdown(fd, SHUT_RDWR);
675 0 : close(fd);
676 0 : }
677 :
678 : static ALWAYS_INLINE bool spawn_coro(struct lwan_connection *conn,
679 : struct coro_switcher *switcher,
680 : struct timeout_queue *tq)
681 : {
682 332 : struct lwan_thread *t = conn->thread;
683 : #if defined(LWAN_HAVE_MBEDTLS)
684 332 : const enum lwan_connection_flags flags_to_keep = conn->flags & CONN_TLS;
685 : #else
686 : const enum lwan_connection_flags flags_to_keep = 0;
687 : #endif
688 :
689 332 : assert(!conn->coro);
690 332 : assert(!(conn->flags & CONN_ASYNC_AWAIT));
691 332 : assert(!(conn->flags & CONN_AWAITED_FD));
692 332 : assert(!(conn->flags & (CONN_LISTENER_HTTP | CONN_LISTENER_HTTPS)));
693 332 : assert(t);
694 332 : assert((uintptr_t)t >= (uintptr_t)tq->lwan->thread.threads);
695 332 : assert((uintptr_t)t <
696 : (uintptr_t)(tq->lwan->thread.threads + tq->lwan->thread.count));
697 :
698 332 : *conn = (struct lwan_connection){
699 332 : .coro = coro_new(switcher, process_request_coro, conn),
700 332 : .flags = CONN_EVENTS_READ | flags_to_keep,
701 332 : .time_to_expire = tq->current_time + tq->move_to_last_bump,
702 : .thread = t,
703 : };
704 332 : if (LIKELY(conn->coro)) {
705 332 : timeout_queue_insert(tq, conn);
706 332 : return true;
707 : }
708 :
709 0 : conn->flags = 0;
710 :
711 0 : int fd = lwan_connection_get_fd(tq->lwan, conn);
712 :
713 0 : lwan_status_error("Couldn't spawn coroutine for file descriptor %d", fd);
714 :
715 0 : send_last_response_without_coro(tq->lwan, conn, HTTP_UNAVAILABLE);
716 0 : return false;
717 : }
718 :
719 7 : static bool process_pending_timers(struct timeout_queue *tq,
720 : struct lwan_thread *t,
721 : int epoll_fd)
722 : {
723 : struct timeout *timeout;
724 7 : bool should_expire_timers = false;
725 :
726 14 : while ((timeout = timeouts_get(t->wheel))) {
727 : struct lwan_request *request;
728 :
729 7 : if (timeout == &tq->timeout) {
730 6 : should_expire_timers = true;
731 6 : continue;
732 : }
733 :
734 1 : request = container_of(timeout, struct lwan_request, timeout);
735 1 : update_epoll_flags(tq, request->conn, epoll_fd, CONN_CORO_RESUME);
736 : }
737 :
738 7 : if (should_expire_timers) {
739 6 : timeout_queue_expire_waiting(tq);
740 :
741 : /* tq timeout expires every 1000ms if there are connections, so
742 : * update the date cache at this point as well. */
743 6 : update_date_cache(t);
744 :
745 6 : if (!timeout_queue_empty(tq)) {
746 3 : timeouts_add(t->wheel, &tq->timeout, 1000);
747 3 : return true;
748 : }
749 :
750 3 : timeouts_del(t->wheel, &tq->timeout);
751 : }
752 :
753 4 : return false;
754 : }
755 :
756 : static int
757 828 : turn_timer_wheel(struct timeout_queue *tq, struct lwan_thread *t, int epoll_fd)
758 : {
759 828 : const int infinite_timeout = -1;
760 : timeout_t wheel_timeout;
761 : struct timespec now;
762 :
763 828 : if (UNLIKELY(clock_gettime(monotonic_clock_id, &now) < 0))
764 0 : lwan_status_critical("Could not get monotonic time");
765 :
766 828 : timeouts_update(t->wheel,
767 828 : (timeout_t)(now.tv_sec * 1000 + now.tv_nsec / 1000000));
768 :
769 : /* Check if there's an expired timer. */
770 828 : wheel_timeout = timeouts_timeout(t->wheel);
771 828 : if (wheel_timeout > 0) {
772 821 : return (int)wheel_timeout; /* No, but will soon. Wake us up in
773 : wheel_timeout ms. */
774 : }
775 :
776 7 : if (UNLIKELY((int64_t)wheel_timeout < 0))
777 0 : return infinite_timeout; /* None found. */
778 :
779 7 : if (!process_pending_timers(tq, t, epoll_fd))
780 4 : return infinite_timeout; /* No more timers to process. */
781 :
782 : /* After processing pending timers, determine when to wake up. */
783 3 : return (int)timeouts_timeout(t->wheel);
784 : }
785 :
786 332 : static bool accept_waiting_clients(const struct lwan_thread *t,
787 : const struct lwan_connection *listen_socket)
788 : {
789 332 : const uint32_t read_events = conn_flags_to_epoll_events(CONN_EVENTS_READ);
790 332 : struct lwan_connection *conns = t->lwan->conns;
791 332 : int listen_fd = (int)(intptr_t)(listen_socket - conns);
792 332 : enum lwan_connection_flags new_conn_flags = 0;
793 :
794 : #if defined(LWAN_HAVE_MBEDTLS)
795 332 : if (listen_socket->flags & CONN_LISTENER_HTTPS) {
796 0 : assert(listen_fd == t->tls_listen_fd);
797 0 : assert(!(listen_socket->flags & CONN_LISTENER_HTTP));
798 0 : new_conn_flags = CONN_TLS;
799 : } else {
800 332 : assert(listen_fd == t->listen_fd);
801 332 : assert(listen_socket->flags & CONN_LISTENER_HTTP);
802 : }
803 : #endif
804 :
805 332 : while (true) {
806 664 : int fd = accept4(listen_fd, NULL, NULL, SOCK_NONBLOCK | SOCK_CLOEXEC);
807 :
808 664 : if (LIKELY(fd >= 0)) {
809 332 : struct lwan_connection *conn = &conns[fd];
810 332 : struct epoll_event ev = {.data.ptr = conn, .events = read_events};
811 : int r;
812 :
813 332 : conn->flags = new_conn_flags;
814 :
815 332 : r = epoll_ctl(conn->thread->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
816 332 : if (UNLIKELY(r < 0)) {
817 0 : lwan_status_perror("Could not add file descriptor %d to epoll "
818 : "set %d. Dropping connection",
819 : fd, conn->thread->epoll_fd);
820 0 : send_last_response_without_coro(t->lwan, conn, HTTP_UNAVAILABLE);
821 0 : conn->flags = 0;
822 : }
823 :
824 332 : continue;
825 : }
826 :
827 332 : switch (errno) {
828 0 : default:
829 0 : lwan_status_perror("Unexpected error while accepting connections");
830 : /* fallthrough */
831 :
832 332 : case EAGAIN:
833 332 : return true;
834 :
835 0 : case EBADF:
836 : case ECONNABORTED:
837 : case EINVAL:
838 0 : lwan_status_info("Listening socket closed");
839 0 : return false;
840 : }
841 : }
842 :
843 : __builtin_unreachable();
844 : }
845 :
846 184 : static int create_listen_socket(struct lwan_thread *t,
847 : unsigned int num,
848 : bool tls)
849 : {
850 184 : const struct lwan *lwan = t->lwan;
851 : int listen_fd;
852 :
853 184 : listen_fd = lwan_create_listen_socket(lwan, num == 0, tls);
854 184 : if (listen_fd < 0)
855 0 : lwan_status_critical("Could not create listen_fd");
856 :
857 : /* Ignore errors here, as this is just a hint */
858 : #if defined(LWAN_HAVE_SO_ATTACH_REUSEPORT_CBPF)
859 : /* From socket(7): "These options may be set repeatedly at any time on
860 : * any socket in the group to replace the current BPF program used by
861 : * all sockets in the group." */
862 184 : if (num == 0) {
863 : /* From socket(7): "The BPF program must return an index between 0 and
864 : * N-1 representing the socket which should receive the packet (where N
865 : * is the number of sockets in the group)." */
866 92 : const uint32_t cpu_ad_off = (uint32_t)SKF_AD_OFF + SKF_AD_CPU;
867 92 : struct sock_filter filter[] = {
868 : {BPF_LD | BPF_W | BPF_ABS, 0, 0, cpu_ad_off}, /* A = curr_cpu_index */
869 : {BPF_RET | BPF_A, 0, 0, 0}, /* return A */
870 : };
871 92 : struct sock_fprog fprog = {.filter = filter, .len = N_ELEMENTS(filter)};
872 :
873 92 : (void)setsockopt(listen_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF,
874 : &fprog, sizeof(fprog));
875 92 : (void)setsockopt(listen_fd, SOL_SOCKET, SO_LOCK_FILTER,
876 92 : (int[]){1}, sizeof(int));
877 : }
878 : #elif defined(LWAN_HAVE_SO_INCOMING_CPU) && defined(__x86_64__)
879 : (void)setsockopt(listen_fd, SOL_SOCKET, SO_INCOMING_CPU, &t->cpu,
880 : sizeof(t->cpu));
881 : #endif
882 :
883 184 : struct epoll_event event = {
884 : .events = EPOLLIN | EPOLLET | EPOLLERR,
885 184 : .data.ptr = &t->lwan->conns[listen_fd],
886 : };
887 184 : if (epoll_ctl(t->epoll_fd, EPOLL_CTL_ADD, listen_fd, &event) < 0)
888 0 : lwan_status_critical_perror("Could not add socket to epoll");
889 :
890 184 : return listen_fd;
891 : }
892 :
893 184 : static void *thread_io_loop(void *data)
894 : {
895 184 : struct lwan_thread *t = data;
896 184 : int epoll_fd = t->epoll_fd;
897 184 : const int max_events = LWAN_MIN((int)t->lwan->thread.max_fd, 1024);
898 184 : struct lwan *lwan = t->lwan;
899 : struct epoll_event *events;
900 : struct coro_switcher switcher;
901 : struct timeout_queue tq;
902 :
903 184 : lwan_status_debug("Worker thread #%zd starting",
904 : t - t->lwan->thread.threads + 1);
905 184 : lwan_set_thread_name("worker");
906 :
907 184 : events = calloc((size_t)max_events, sizeof(*events));
908 184 : if (UNLIKELY(!events))
909 0 : lwan_status_critical("Could not allocate memory for events");
910 :
911 184 : update_date_cache(t);
912 :
913 184 : timeout_queue_init(&tq, lwan);
914 :
915 184 : lwan_random_seed_prng_for_thread(t);
916 :
917 184 : pthread_barrier_wait(&lwan->thread.barrier);
918 :
919 644 : for (;;) {
920 828 : int timeout = turn_timer_wheel(&tq, t, epoll_fd);
921 828 : int n_fds = epoll_wait(epoll_fd, events, max_events, timeout);
922 739 : bool created_coros = false;
923 :
924 739 : if (UNLIKELY(n_fds < 0)) {
925 0 : if (errno == EBADF || errno == EINVAL)
926 : break;
927 0 : continue;
928 : }
929 :
930 1570 : for (struct epoll_event *event = events; n_fds--; event++) {
931 926 : struct lwan_connection *conn = event->data.ptr;
932 :
933 926 : assert(!(conn->flags & CONN_ASYNC_AWAIT));
934 :
935 926 : if (conn->flags & (CONN_LISTENER_HTTP | CONN_LISTENER_HTTPS)) {
936 332 : if (LIKELY(accept_waiting_clients(t, conn)))
937 332 : continue;
938 0 : close(epoll_fd);
939 0 : epoll_fd = -1;
940 0 : break;
941 : }
942 :
943 594 : if (UNLIKELY(event->events & (EPOLLRDHUP | EPOLLHUP))) {
944 217 : if ((conn->flags & CONN_AWAITED_FD) != CONN_SUSPENDED) {
945 217 : timeout_queue_expire(&tq, conn);
946 217 : continue;
947 : }
948 : }
949 :
950 377 : if (!conn->coro) {
951 332 : if (UNLIKELY(!spawn_coro(conn, &switcher, &tq))) {
952 0 : send_last_response_without_coro(t->lwan, conn, HTTP_UNAVAILABLE);
953 0 : continue;
954 : }
955 :
956 332 : created_coros = true;
957 : }
958 :
959 : resume_coro(&tq, conn, epoll_fd);
960 : }
961 :
962 644 : if (created_coros)
963 237 : timeouts_add(t->wheel, &tq.timeout, 1000);
964 : }
965 :
966 0 : pthread_barrier_wait(&lwan->thread.barrier);
967 :
968 0 : timeout_queue_expire_all(&tq);
969 0 : free(events);
970 :
971 0 : return NULL;
972 : }
973 :
974 184 : static void create_thread(struct lwan *l, struct lwan_thread *thread)
975 : {
976 : int ignore;
977 : pthread_attr_t attr;
978 :
979 184 : thread->lwan = l;
980 :
981 184 : thread->wheel = timeouts_open(&ignore);
982 184 : if (!thread->wheel)
983 0 : lwan_status_critical("Could not create timer wheel");
984 :
985 184 : if ((thread->epoll_fd = epoll_create1(EPOLL_CLOEXEC)) < 0)
986 0 : lwan_status_critical_perror("epoll_create");
987 :
988 184 : if (pthread_attr_init(&attr))
989 0 : lwan_status_critical_perror("pthread_attr_init");
990 :
991 184 : if (pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM))
992 0 : lwan_status_critical_perror("pthread_attr_setscope");
993 :
994 184 : if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE))
995 0 : lwan_status_critical_perror("pthread_attr_setdetachstate");
996 :
997 184 : if (pthread_create(&thread->self, &attr, thread_io_loop, thread))
998 0 : lwan_status_critical_perror("pthread_create");
999 :
1000 184 : if (pthread_attr_destroy(&attr))
1001 0 : lwan_status_critical_perror("pthread_attr_destroy");
1002 184 : }
1003 :
1004 : #if defined(__linux__) && defined(__x86_64__)
1005 0 : static bool read_cpu_topology(struct lwan *l, uint32_t siblings[])
1006 : {
1007 : char path[PATH_MAX];
1008 :
1009 0 : for (uint32_t i = 0; i < l->available_cpus; i++)
1010 0 : siblings[i] = 0xbebacafe;
1011 :
1012 0 : for (unsigned int i = 0; i < l->available_cpus; i++) {
1013 : FILE *sib;
1014 : uint32_t id, sibling;
1015 : char separator;
1016 :
1017 0 : snprintf(path, sizeof(path),
1018 : "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
1019 : i);
1020 :
1021 0 : sib = fopen(path, "re");
1022 0 : if (!sib) {
1023 0 : lwan_status_warning("Could not open `%s` to determine CPU topology",
1024 : path);
1025 0 : return false;
1026 : }
1027 :
1028 0 : switch (fscanf(sib, "%u%c%u", &id, &separator, &sibling)) {
1029 0 : case 2: /* No SMT */
1030 0 : siblings[i] = id;
1031 0 : break;
1032 0 : case 3: /* SMT */
1033 0 : if (!(separator == ',' || separator == '-')) {
1034 0 : lwan_status_critical("Expecting either ',' or '-' for sibling separator");
1035 : __builtin_unreachable();
1036 : }
1037 :
1038 0 : siblings[i] = sibling;
1039 0 : break;
1040 0 : default:
1041 0 : lwan_status_critical("%s has invalid format", path);
1042 : __builtin_unreachable();
1043 : }
1044 :
1045 0 : fclose(sib);
1046 : }
1047 :
1048 : /* Perform a sanity check here, as some systems seem to filter out the
1049 : * result of sysconf() to obtain the number of configured and online
1050 : * CPUs but don't bother changing what's available through sysfs as far
1051 : * as the CPU topology information goes. It's better to fall back to a
1052 : * possibly non-optimal setup than just crash during startup while
1053 : * trying to perform an out-of-bounds array access. */
1054 0 : for (unsigned int i = 0; i < l->available_cpus; i++) {
1055 0 : if (siblings[i] == 0xbebacafe) {
1056 0 : lwan_status_warning("Could not determine sibling for CPU %d", i);
1057 0 : return false;
1058 : }
1059 :
1060 0 : if (siblings[i] >= l->available_cpus) {
1061 0 : lwan_status_warning("CPU information topology says CPU %d exists, "
1062 : "but max available CPUs is %d (online CPUs: %d). "
1063 : "Is Lwan running in a (broken) container?",
1064 : siblings[i], l->available_cpus, l->online_cpus);
1065 0 : return false;
1066 : }
1067 : }
1068 :
1069 0 : return true;
1070 : }
1071 :
1072 : static void
1073 0 : siblings_to_schedtbl(struct lwan *l, uint32_t siblings[], uint32_t schedtbl[])
1074 : {
1075 0 : int32_t *seen = calloc(l->available_cpus, sizeof(int32_t));
1076 0 : unsigned int n_schedtbl = 0;
1077 :
1078 0 : if (!seen)
1079 0 : lwan_status_critical("Could not allocate the seen array");
1080 :
1081 0 : for (uint32_t i = 0; i < l->available_cpus; i++)
1082 0 : seen[i] = -1;
1083 :
1084 0 : for (uint32_t i = 0; i < l->available_cpus; i++) {
1085 0 : if (seen[siblings[i]] < 0) {
1086 0 : seen[siblings[i]] = (int32_t)i;
1087 : } else {
1088 0 : schedtbl[n_schedtbl++] = (uint32_t)seen[siblings[i]];
1089 0 : schedtbl[n_schedtbl++] = i;
1090 : }
1091 : }
1092 :
1093 0 : if (n_schedtbl != l->available_cpus)
1094 0 : memcpy(schedtbl, seen, l->available_cpus * sizeof(int));
1095 :
1096 0 : free(seen);
1097 0 : }
1098 :
1099 : static bool
1100 0 : topology_to_schedtbl(struct lwan *l, uint32_t schedtbl[], uint32_t n_threads)
1101 : {
1102 0 : uint32_t *siblings = calloc(l->available_cpus, sizeof(uint32_t));
1103 :
1104 0 : if (!siblings)
1105 0 : lwan_status_critical("Could not allocate siblings array");
1106 :
1107 0 : if (read_cpu_topology(l, siblings)) {
1108 0 : uint32_t *affinity = calloc(l->available_cpus, sizeof(uint32_t));
1109 :
1110 0 : if (!affinity)
1111 0 : lwan_status_critical("Could not allocate affinity array");
1112 :
1113 0 : siblings_to_schedtbl(l, siblings, affinity);
1114 :
1115 0 : for (uint32_t i = 0; i < n_threads; i++)
1116 0 : schedtbl[i] = affinity[i % l->available_cpus];
1117 :
1118 0 : free(affinity);
1119 0 : free(siblings);
1120 0 : return true;
1121 : }
1122 :
1123 0 : for (uint32_t i = 0; i < n_threads; i++)
1124 0 : schedtbl[i] = (i / 2) % l->thread.count;
1125 :
1126 0 : free(siblings);
1127 0 : return false;
1128 : }
1129 :
1130 : static void
1131 0 : adjust_thread_affinity(const struct lwan_thread *thread)
1132 : {
1133 : cpu_set_t set;
1134 :
1135 0 : CPU_ZERO(&set);
1136 0 : CPU_SET(thread->cpu, &set);
1137 :
1138 0 : if (pthread_setaffinity_np(thread->self, sizeof(set), &set))
1139 0 : lwan_status_warning("Could not set thread affinity");
1140 0 : }
1141 : #else
1142 : #define adjust_thread_affinity(...)
1143 : #endif
1144 :
1145 : #if defined(LWAN_HAVE_MBEDTLS)
1146 0 : static bool is_tls_ulp_supported(void)
1147 : {
1148 0 : FILE *available_ulp = fopen("/proc/sys/net/ipv4/tcp_available_ulp", "re");
1149 : char buffer[512];
1150 0 : bool available = false;
1151 :
1152 0 : if (!available_ulp)
1153 0 : return false;
1154 :
1155 0 : if (fgets(buffer, 512, available_ulp)) {
1156 0 : if (strstr(buffer, "tls"))
1157 0 : available = true;
1158 : }
1159 :
1160 0 : fclose(available_ulp);
1161 0 : return available;
1162 : }
1163 :
1164 92 : static bool lwan_init_tls(struct lwan *l)
1165 : {
1166 : static const int aes128_ciphers[] = {
1167 : /* Only allow Ephemeral Diffie-Hellman key exchange, so Perfect
1168 : * Forward Secrecy is possible. */
1169 : MBEDTLS_TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
1170 : MBEDTLS_TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
1171 : MBEDTLS_TLS_DHE_RSA_WITH_AES_128_GCM_SHA256,
1172 : MBEDTLS_TLS_DHE_PSK_WITH_AES_128_GCM_SHA256,
1173 :
1174 : /* FIXME: Other ciphers are supported by kTLS, notably AES256 and
1175 : * ChaCha20-Poly1305. Add those here and patch
1176 : * lwan_setup_tls_keys() to match. */
1177 :
1178 : /* FIXME: Maybe allow this to be user-tunable like other servers do? */
1179 : 0,
1180 : };
1181 : int r;
1182 :
1183 92 : if (!l->config.ssl.cert || !l->config.ssl.key)
1184 92 : return false;
1185 :
1186 0 : if (!is_tls_ulp_supported()) {
1187 0 : lwan_status_critical(
1188 : "TLS ULP not loaded. Try running `modprobe tls` as root.");
1189 : }
1190 :
1191 0 : l->tls = calloc(1, sizeof(*l->tls));
1192 0 : if (!l->tls)
1193 0 : lwan_status_critical("Could not allocate memory for SSL context");
1194 :
1195 0 : lwan_status_debug("Initializing mbedTLS");
1196 :
1197 0 : mbedtls_ssl_config_init(&l->tls->config);
1198 0 : mbedtls_x509_crt_init(&l->tls->server_cert);
1199 0 : mbedtls_pk_init(&l->tls->server_key);
1200 0 : mbedtls_entropy_init(&l->tls->entropy);
1201 0 : mbedtls_ctr_drbg_init(&l->tls->ctr_drbg);
1202 :
1203 0 : r = mbedtls_x509_crt_parse_file(&l->tls->server_cert, l->config.ssl.cert);
1204 0 : if (r) {
1205 0 : lwan_status_mbedtls_error(r, "Could not parse certificate at %s",
1206 : l->config.ssl.cert);
1207 0 : abort();
1208 : }
1209 :
1210 0 : r = mbedtls_pk_parse_keyfile(&l->tls->server_key, l->config.ssl.key, NULL);
1211 0 : if (r) {
1212 0 : lwan_status_mbedtls_error(r, "Could not parse key file at %s",
1213 : l->config.ssl.key);
1214 0 : abort();
1215 : }
1216 :
1217 : /* Even though this points to files that will probably be outside
1218 : * the reach of the server (if straightjackets are used), wipe this
1219 : * struct to get rid of the paths to these files. */
1220 0 : lwan_always_bzero(l->config.ssl.cert, strlen(l->config.ssl.cert));
1221 0 : free(l->config.ssl.cert);
1222 0 : lwan_always_bzero(l->config.ssl.key, strlen(l->config.ssl.key));
1223 0 : free(l->config.ssl.key);
1224 0 : lwan_always_bzero(&l->config.ssl, sizeof(l->config.ssl));
1225 :
1226 0 : mbedtls_ssl_conf_ca_chain(&l->tls->config, l->tls->server_cert.next, NULL);
1227 0 : r = mbedtls_ssl_conf_own_cert(&l->tls->config, &l->tls->server_cert,
1228 0 : &l->tls->server_key);
1229 0 : if (r) {
1230 0 : lwan_status_mbedtls_error(r, "Could not set cert/key");
1231 0 : abort();
1232 : }
1233 :
1234 0 : r = mbedtls_ctr_drbg_seed(&l->tls->ctr_drbg, mbedtls_entropy_func,
1235 0 : &l->tls->entropy, NULL, 0);
1236 0 : if (r) {
1237 0 : lwan_status_mbedtls_error(r, "Could not seed ctr_drbg");
1238 0 : abort();
1239 : }
1240 :
1241 0 : r = mbedtls_ssl_config_defaults(&l->tls->config, MBEDTLS_SSL_IS_SERVER,
1242 : MBEDTLS_SSL_TRANSPORT_STREAM,
1243 : MBEDTLS_SSL_PRESET_DEFAULT);
1244 0 : if (r) {
1245 0 : lwan_status_mbedtls_error(r, "Could not set mbedTLS default config");
1246 0 : abort();
1247 : }
1248 :
1249 0 : mbedtls_ssl_conf_rng(&l->tls->config, mbedtls_ctr_drbg_random,
1250 0 : &l->tls->ctr_drbg);
1251 0 : mbedtls_ssl_conf_ciphersuites(&l->tls->config, aes128_ciphers);
1252 :
1253 0 : mbedtls_ssl_conf_renegotiation(&l->tls->config,
1254 : MBEDTLS_SSL_RENEGOTIATION_DISABLED);
1255 0 : mbedtls_ssl_conf_legacy_renegotiation(&l->tls->config,
1256 : MBEDTLS_SSL_LEGACY_NO_RENEGOTIATION);
1257 :
1258 : #if defined(MBEDTLS_SSL_ALPN)
1259 : static const char *alpn_protos[] = {"http/1.1", NULL};
1260 0 : mbedtls_ssl_conf_alpn_protocols(&l->tls->config, alpn_protos);
1261 : #endif
1262 :
1263 0 : return true;
1264 : }
1265 : #endif
1266 :
1267 92 : void lwan_thread_init(struct lwan *l)
1268 : {
1269 92 : const unsigned int total_conns = l->thread.max_fd * l->thread.count;
1270 : #if defined(LWAN_HAVE_MBEDTLS)
1271 92 : const bool tls_initialized = lwan_init_tls(l);
1272 : #else
1273 : const bool tls_initialized = false;
1274 : #endif
1275 :
1276 92 : lwan_status_debug("Initializing threads");
1277 :
1278 92 : l->thread.threads =
1279 92 : calloc((size_t)l->thread.count, sizeof(struct lwan_thread));
1280 92 : if (!l->thread.threads)
1281 0 : lwan_status_critical("Could not allocate memory for threads");
1282 :
1283 : uint32_t *schedtbl;
1284 : bool adj_affinity;
1285 :
1286 : #if defined(__x86_64__) && defined(__linux__)
1287 92 : if (l->online_cpus > 1) {
1288 : static_assert(sizeof(struct lwan_connection) == 32,
1289 : "Two connections per cache line");
1290 : #ifdef _SC_LEVEL1_DCACHE_LINESIZE
1291 0 : assert(sysconf(_SC_LEVEL1_DCACHE_LINESIZE) == 64);
1292 : #endif
1293 0 : lwan_status_debug("%d CPUs of %d are online. "
1294 : "Reading topology to pre-schedule clients",
1295 : l->online_cpus, l->available_cpus);
1296 : /*
1297 : * Pre-schedule each file descriptor, to reduce some operations in the
1298 : * fast path.
1299 : *
1300 : * Since struct lwan_connection is guaranteed to be 32-byte long, two of
1301 : * them can fill up a cache line. Assume siblings share cache lines and
1302 : * use the CPU topology to group two connections per cache line in such
1303 : * a way that false sharing is avoided.
1304 : */
1305 0 : schedtbl = calloc(l->thread.count, sizeof(uint32_t));
1306 0 : adj_affinity = topology_to_schedtbl(l, schedtbl, l->thread.count);
1307 :
1308 0 : for (unsigned int i = 0; i < total_conns; i++)
1309 0 : l->conns[i].thread = &l->thread.threads[schedtbl[i % l->thread.count]];
1310 : } else
1311 : #endif /* __x86_64__ && __linux__ */
1312 : {
1313 92 : lwan_status_debug("Using round-robin to preschedule clients");
1314 :
1315 276 : for (unsigned int i = 0; i < l->thread.count; i++)
1316 184 : l->thread.threads[i].cpu = i % l->online_cpus;
1317 48234588 : for (unsigned int i = 0; i < total_conns; i++)
1318 48234496 : l->conns[i].thread = &l->thread.threads[i % l->thread.count];
1319 :
1320 92 : schedtbl = NULL;
1321 92 : adj_affinity = false;
1322 : }
1323 :
1324 276 : for (unsigned int i = 0; i < l->thread.count; i++) {
1325 184 : struct lwan_thread *thread = NULL;
1326 :
1327 184 : if (schedtbl) {
1328 : /* This is not the most elegant thing, but this assures that the
1329 : * listening sockets are added to the SO_REUSEPORT group in a
1330 : * specific order, because that's what the CBPF program to direct
1331 : * the incoming connection to the right CPU will use. */
1332 0 : for (uint32_t thread_id = 0; thread_id < l->thread.count;
1333 0 : thread_id++) {
1334 0 : if (schedtbl[thread_id % l->thread.count] == i) {
1335 0 : thread = &l->thread.threads[thread_id];
1336 0 : break;
1337 : }
1338 : }
1339 0 : if (!thread) {
1340 : /* FIXME: can this happen when we have a offline CPU? */
1341 0 : lwan_status_critical(
1342 : "Could not figure out which CPU thread %d should go to", i);
1343 : }
1344 : } else {
1345 184 : thread = &l->thread.threads[i % l->thread.count];
1346 : }
1347 :
1348 184 : if (pthread_barrier_init(&l->thread.barrier, NULL, 2))
1349 0 : lwan_status_critical("Could not create barrier");
1350 :
1351 184 : create_thread(l, thread);
1352 :
1353 184 : if ((thread->listen_fd = create_listen_socket(thread, i, false)) < 0)
1354 0 : lwan_status_critical_perror("Could not create listening socket");
1355 184 : l->conns[thread->listen_fd].flags |= CONN_LISTENER_HTTP;
1356 :
1357 184 : if (tls_initialized) {
1358 0 : if ((thread->tls_listen_fd = create_listen_socket(thread, i, true)) < 0)
1359 0 : lwan_status_critical_perror("Could not create TLS listening socket");
1360 0 : l->conns[thread->tls_listen_fd].flags |= CONN_LISTENER_HTTPS;
1361 : } else {
1362 184 : thread->tls_listen_fd = -1;
1363 : }
1364 :
1365 184 : if (adj_affinity) {
1366 0 : l->thread.threads[i].cpu = schedtbl[i % l->thread.count];
1367 0 : adjust_thread_affinity(thread);
1368 : }
1369 :
1370 184 : pthread_barrier_wait(&l->thread.barrier);
1371 : }
1372 :
1373 92 : lwan_status_debug("Worker threads created and ready to serve");
1374 :
1375 92 : free(schedtbl);
1376 92 : }
1377 :
1378 0 : void lwan_thread_shutdown(struct lwan *l)
1379 : {
1380 0 : lwan_status_debug("Shutting down threads");
1381 :
1382 0 : for (unsigned int i = 0; i < l->thread.count; i++) {
1383 0 : struct lwan_thread *t = &l->thread.threads[i];
1384 0 : int epoll_fd = t->epoll_fd;
1385 0 : int listen_fd = t->listen_fd;
1386 :
1387 0 : t->listen_fd = -1;
1388 0 : t->epoll_fd = -1;
1389 0 : close(epoll_fd);
1390 0 : close(listen_fd);
1391 : }
1392 :
1393 0 : pthread_barrier_wait(&l->thread.barrier);
1394 0 : pthread_barrier_destroy(&l->thread.barrier);
1395 :
1396 0 : for (unsigned int i = 0; i < l->thread.count; i++) {
1397 0 : struct lwan_thread *t = &l->thread.threads[i];
1398 :
1399 0 : pthread_join(l->thread.threads[i].self, NULL);
1400 0 : timeouts_close(t->wheel);
1401 : }
1402 :
1403 0 : free(l->thread.threads);
1404 :
1405 : #if defined(LWAN_HAVE_MBEDTLS)
1406 0 : if (l->tls) {
1407 0 : mbedtls_ssl_config_free(&l->tls->config);
1408 0 : mbedtls_x509_crt_free(&l->tls->server_cert);
1409 0 : mbedtls_pk_free(&l->tls->server_key);
1410 0 : mbedtls_entropy_free(&l->tls->entropy);
1411 0 : mbedtls_ctr_drbg_free(&l->tls->ctr_drbg);
1412 0 : free(l->tls);
1413 : }
1414 : #endif
1415 0 : }
|