comparison netio.c @ 1051:359fba4b1a49

merge tcp fastopen
author Matt Johnston <matt@ucc.asn.au>
date Sat, 28 Feb 2015 23:24:30 +0800
parents c2a50c9f509e
children fd3712d1ff7f
comparison
equal deleted inserted replaced
1045:31727a8abd4b 1051:359fba4b1a49
1 #include "netio.h"
2 #include "list.h"
3 #include "dbutil.h"
4 #include "session.h"
5 #include "debug.h"
6
7 struct dropbear_progress_connection {
8 struct addrinfo *res;
9 struct addrinfo *res_iter;
10
11 char *remotehost, *remoteport; /* For error reporting */
12
13 connect_callback cb;
14 void *cb_data;
15
16 struct Queue *writequeue; /* A queue of encrypted packets to send with TCP fastopen,
17 or NULL. */
18
19 int sock;
20
21 char* errstring;
22 };
23
24 #if defined(__linux__) && defined(TCP_DEFER_ACCEPT)
25 static void set_piggyback_ack(int sock) {
26 /* Undocumented Linux feature - set TCP_DEFER_ACCEPT and data will be piggybacked
27 on the 3rd packet (ack) of the TCP handshake. Saves a IP packet.
28 http://thread.gmane.org/gmane.linux.network/224627/focus=224727
29 "Piggyback the final ACK of the three way TCP connection establishment with the data" */
30 int val = 1;
31 /* No error checking, this is opportunistic */
32 int err = setsockopt(sock, IPPROTO_TCP, TCP_DEFER_ACCEPT, (void*)&val, sizeof(val));
33 if (err)
34 {
35 TRACE(("Failed setsockopt TCP_DEFER_ACCEPT: %s", strerror(errno)))
36 }
37 }
38 #endif
39
40
41 /* Deallocate a progress connection. Removes from the pending list if iter!=NULL.
42 Does not close sockets */
43 static void remove_connect(struct dropbear_progress_connection *c, m_list_elem *iter) {
44 if (c->res) {
45 freeaddrinfo(c->res);
46 }
47 m_free(c->remotehost);
48 m_free(c->remoteport);
49 m_free(c->errstring);
50 m_free(c);
51
52 if (iter) {
53 list_remove(iter);
54 }
55 }
56
57 static void cancel_callback(int result, int sock, void* UNUSED(data), const char* UNUSED(errstring)) {
58 if (result == DROPBEAR_SUCCESS)
59 {
60 m_close(sock);
61 }
62 }
63
64 void cancel_connect(struct dropbear_progress_connection *c) {
65 c->cb = cancel_callback;
66 c->cb_data = NULL;
67 }
68
69 static void connect_try_next(struct dropbear_progress_connection *c) {
70 struct addrinfo *r;
71 int res = 0;
72 int fastopen = 0;
73 #ifdef DROPBEAR_TCP_FAST_OPEN
74 struct msghdr message;
75 #endif
76
77 for (r = c->res_iter; r; r = r->ai_next)
78 {
79 assert(c->sock == -1);
80
81 c->sock = socket(c->res_iter->ai_family, c->res_iter->ai_socktype, c->res_iter->ai_protocol);
82 if (c->sock < 0) {
83 continue;
84 }
85
86 ses.maxfd = MAX(ses.maxfd, c->sock);
87 set_sock_nodelay(c->sock);
88 setnonblocking(c->sock);
89
90 #if defined(__linux__) && defined(TCP_DEFER_ACCEPT)
91 set_piggyback_ack(c->sock);
92 #endif
93
94 #ifdef DROPBEAR_TCP_FAST_OPEN
95 fastopen = (c->writequeue != NULL);
96
97 memset(&message, 0x0, sizeof(message));
98 message.msg_name = r->ai_addr;
99 message.msg_namelen = r->ai_addrlen;
100
101 if (c->writequeue) {
102 int iovlen; /* Linux msg_iovlen is a size_t */
103 message.msg_iov = packet_queue_to_iovec(c->writequeue, &iovlen);
104 message.msg_iovlen = iovlen;
105 res = sendmsg(c->sock, &message, MSG_FASTOPEN);
106 if (res < 0 && errno != EINPROGRESS) {
107 /* Not entirely sure which kind of errors are normal - 2.6.32 seems to
108 return EPIPE for any (nonblocking?) sendmsg(). just fall back */
109 TRACE(("sendmsg tcp_fastopen failed, falling back. %s", strerror(errno)));
110 /* No kernel MSG_FASTOPEN support. Fall back below */
111 fastopen = 0;
112 /* Set to NULL to avoid trying again */
113 c->writequeue = NULL;
114 }
115 m_free(message.msg_iov);
116 packet_queue_consume(c->writequeue, res);
117 }
118 #endif
119
120 /* Normal connect(), used as fallback for TCP fastopen too */
121 if (!fastopen) {
122 res = connect(c->sock, r->ai_addr, r->ai_addrlen);
123 }
124
125 if (res < 0 && errno != EINPROGRESS) {
126 /* failure */
127 close(c->sock);
128 c->sock = -1;
129 continue;
130 } else {
131 /* new connection was successful, wait for it to complete */
132 break;
133 }
134 }
135
136 if (r) {
137 c->res_iter = r->ai_next;
138 } else {
139 c->res_iter = NULL;
140 }
141 }
142
143 /* Connect via TCP to a host. */
144 struct dropbear_progress_connection *connect_remote(const char* remotehost, const char* remoteport,
145 connect_callback cb, void* cb_data)
146 {
147 struct dropbear_progress_connection *c = NULL;
148 int err;
149 struct addrinfo hints;
150
151 c = m_malloc(sizeof(*c));
152 c->remotehost = m_strdup(remotehost);
153 c->remoteport = m_strdup(remoteport);
154 c->sock = -1;
155 c->cb = cb;
156 c->cb_data = cb_data;
157
158 list_append(&ses.conn_pending, c);
159
160 memset(&hints, 0, sizeof(hints));
161 hints.ai_socktype = SOCK_STREAM;
162 hints.ai_family = PF_UNSPEC;
163
164 err = getaddrinfo(remotehost, remoteport, &hints, &c->res);
165 if (err) {
166 int len;
167 len = 100 + strlen(gai_strerror(err));
168 c->errstring = (char*)m_malloc(len);
169 snprintf(c->errstring, len, "Error resolving '%s' port '%s'. %s",
170 remotehost, remoteport, gai_strerror(err));
171 TRACE(("Error resolving: %s", gai_strerror(err)))
172 return NULL;
173 }
174
175 c->res_iter = c->res;
176
177 return c;
178 }
179
180 void remove_connect_pending() {
181 while (ses.conn_pending.first) {
182 struct dropbear_progress_connection *c = ses.conn_pending.first->item;
183 remove_connect(c, ses.conn_pending.first);
184 }
185 }
186
187
188 void set_connect_fds(fd_set *writefd) {
189 m_list_elem *iter;
190 TRACE(("enter handle_connect_fds"))
191 for (iter = ses.conn_pending.first; iter; iter = iter->next) {
192 struct dropbear_progress_connection *c = iter->item;
193 /* Set one going */
194 while (c->res_iter && c->sock < 0)
195 {
196 connect_try_next(c);
197 }
198 if (c->sock >= 0) {
199 FD_SET(c->sock, writefd);
200 } else {
201 m_list_elem *remove_iter;
202 /* Final failure */
203 if (!c->errstring) {
204 c->errstring = m_strdup("unexpected failure");
205 }
206 c->cb(DROPBEAR_FAILURE, -1, c->cb_data, c->errstring);
207 /* Safely remove without invalidating iter */
208 remove_iter = iter;
209 iter = iter->prev;
210 remove_connect(c, remove_iter);
211 }
212 }
213 }
214
215 void handle_connect_fds(fd_set *writefd) {
216 m_list_elem *iter;
217 TRACE(("enter handle_connect_fds"))
218 for (iter = ses.conn_pending.first; iter; iter = iter->next) {
219 int val;
220 socklen_t vallen = sizeof(val);
221 struct dropbear_progress_connection *c = iter->item;
222
223 if (!FD_ISSET(c->sock, writefd)) {
224 continue;
225 }
226
227 TRACE(("handling %s port %s socket %d", c->remotehost, c->remoteport, c->sock));
228
229 if (getsockopt(c->sock, SOL_SOCKET, SO_ERROR, &val, &vallen) != 0) {
230 TRACE(("handle_connect_fds getsockopt(%d) SO_ERROR failed: %s", c->sock, strerror(errno)))
231 /* This isn't expected to happen - Unix has surprises though, continue gracefully. */
232 m_close(c->sock);
233 c->sock = -1;
234 } else if (val != 0) {
235 /* Connect failed */
236 TRACE(("connect to %s port %s failed.", c->remotehost, c->remoteport))
237 m_close(c->sock);
238 c->sock = -1;
239
240 m_free(c->errstring);
241 c->errstring = strerror(val);
242 } else {
243 /* New connection has been established */
244 c->cb(DROPBEAR_SUCCESS, c->sock, c->cb_data, NULL);
245 remove_connect(c, iter);
246 TRACE(("leave handle_connect_fds - success"))
247 /* Must return here - remove_connect() invalidates iter */
248 return;
249 }
250 }
251 TRACE(("leave handle_connect_fds - end iter"))
252 }
253
254 void connect_set_writequeue(struct dropbear_progress_connection *c, struct Queue *writequeue) {
255 c->writequeue = writequeue;
256 }
257
258 struct iovec * packet_queue_to_iovec(struct Queue *queue, int *ret_iov_count) {
259 struct iovec *iov = NULL;
260 struct Link *l;
261 unsigned int i;
262 int len;
263 buffer *writebuf;
264
265 #ifndef IOV_MAX
266 #define IOV_MAX UIO_MAXIOV
267 #endif
268
269 *ret_iov_count = MIN(queue->count, IOV_MAX);
270
271 iov = m_malloc(sizeof(*iov) * *ret_iov_count);
272 for (l = queue->head, i = 0; l; l = l->link, i++)
273 {
274 writebuf = (buffer*)l->item;
275 len = writebuf->len - 1 - writebuf->pos;
276 dropbear_assert(len > 0);
277 TRACE2(("write_packet writev #%d type %d len %d/%d", i, writebuf->data[writebuf->len-1],
278 len, writebuf->len-1))
279 iov[i].iov_base = buf_getptr(writebuf, len);
280 iov[i].iov_len = len;
281 }
282
283 return iov;
284 }
285
286 void packet_queue_consume(struct Queue *queue, ssize_t written) {
287 buffer *writebuf;
288 int len;
289 while (written > 0) {
290 writebuf = (buffer*)examine(queue);
291 len = writebuf->len - 1 - writebuf->pos;
292 if (len > written) {
293 /* partial buffer write */
294 buf_incrpos(writebuf, written);
295 written = 0;
296 } else {
297 written -= len;
298 dequeue(queue);
299 buf_free(writebuf);
300 }
301 }
302 }
303
304 void set_sock_nodelay(int sock) {
305 int val;
306
307 /* disable nagle */
308 val = 1;
309 setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (void*)&val, sizeof(val));
310 }
311
312 #ifdef DROPBEAR_TCP_FAST_OPEN
313 void set_listen_fast_open(int sock) {
314 int qlen = MAX(MAX_UNAUTH_PER_IP, 5);
315 if (setsockopt(sock, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) != 0) {
316 TRACE(("set_listen_fast_open failed for socket %d: %s", sock, strerror(errno)))
317 }
318 }
319
320 #endif
321
322 void set_sock_priority(int sock, enum dropbear_prio prio) {
323
324 int rc;
325 #ifdef IPTOS_LOWDELAY
326 int iptos_val = 0;
327 #endif
328 #ifdef SO_PRIORITY
329 int so_prio_val = 0;
330 #endif
331
332
333 /* Don't log ENOTSOCK errors so that this can harmlessly be called
334 * on a client '-J' proxy pipe */
335
336 /* set the TOS bit for either ipv4 or ipv6 */
337 #ifdef IPTOS_LOWDELAY
338 if (prio == DROPBEAR_PRIO_LOWDELAY) {
339 iptos_val = IPTOS_LOWDELAY;
340 } else if (prio == DROPBEAR_PRIO_BULK) {
341 iptos_val = IPTOS_THROUGHPUT;
342 }
343 #if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
344 rc = setsockopt(sock, IPPROTO_IPV6, IPV6_TCLASS, (void*)&iptos_val, sizeof(iptos_val));
345 if (rc < 0 && errno != ENOTSOCK) {
346 TRACE(("Couldn't set IPV6_TCLASS (%s)", strerror(errno)));
347 }
348 #endif
349 rc = setsockopt(sock, IPPROTO_IP, IP_TOS, (void*)&iptos_val, sizeof(iptos_val));
350 if (rc < 0 && errno != ENOTSOCK) {
351 TRACE(("Couldn't set IP_TOS (%s)", strerror(errno)));
352 }
353 #endif
354
355 #ifdef SO_PRIORITY
356 if (prio == DROPBEAR_PRIO_LOWDELAY) {
357 so_prio_val = TC_PRIO_INTERACTIVE;
358 } else if (prio == DROPBEAR_PRIO_BULK) {
359 so_prio_val = TC_PRIO_BULK;
360 }
361 /* linux specific, sets QoS class. see tc-prio(8) */
362 rc = setsockopt(sock, SOL_SOCKET, SO_PRIORITY, (void*) &so_prio_val, sizeof(so_prio_val));
363 if (rc < 0 && errno != ENOTSOCK)
364 dropbear_log(LOG_WARNING, "Couldn't set SO_PRIORITY (%s)",
365 strerror(errno));
366 #endif
367
368 }
369
370 /* Listen on address:port.
371 * Special cases are address of "" listening on everything,
372 * and address of NULL listening on localhost only.
373 * Returns the number of sockets bound on success, or -1 on failure. On
374 * failure, if errstring wasn't NULL, it'll be a newly malloced error
375 * string.*/
376 int dropbear_listen(const char* address, const char* port,
377 int *socks, unsigned int sockcount, char **errstring, int *maxfd) {
378
379 struct addrinfo hints, *res = NULL, *res0 = NULL;
380 int err;
381 unsigned int nsock;
382 struct linger linger;
383 int val;
384 int sock;
385
386 TRACE(("enter dropbear_listen"))
387
388 memset(&hints, 0, sizeof(hints));
389 hints.ai_family = AF_UNSPEC; /* TODO: let them flag v4 only etc */
390 hints.ai_socktype = SOCK_STREAM;
391
392 /* for calling getaddrinfo:
393 address == NULL and !AI_PASSIVE: local loopback
394 address == NULL and AI_PASSIVE: all interfaces
395 address != NULL: whatever the address says */
396 if (!address) {
397 TRACE(("dropbear_listen: local loopback"))
398 } else {
399 if (address[0] == '\0') {
400 TRACE(("dropbear_listen: all interfaces"))
401 address = NULL;
402 }
403 hints.ai_flags = AI_PASSIVE;
404 }
405 err = getaddrinfo(address, port, &hints, &res0);
406
407 if (err) {
408 if (errstring != NULL && *errstring == NULL) {
409 int len;
410 len = 20 + strlen(gai_strerror(err));
411 *errstring = (char*)m_malloc(len);
412 snprintf(*errstring, len, "Error resolving: %s", gai_strerror(err));
413 }
414 if (res0) {
415 freeaddrinfo(res0);
416 res0 = NULL;
417 }
418 TRACE(("leave dropbear_listen: failed resolving"))
419 return -1;
420 }
421
422
423 nsock = 0;
424 for (res = res0; res != NULL && nsock < sockcount;
425 res = res->ai_next) {
426
427 /* Get a socket */
428 socks[nsock] = socket(res->ai_family, res->ai_socktype,
429 res->ai_protocol);
430
431 sock = socks[nsock]; /* For clarity */
432
433 if (sock < 0) {
434 err = errno;
435 TRACE(("socket() failed"))
436 continue;
437 }
438
439 /* Various useful socket options */
440 val = 1;
441 /* set to reuse, quick timeout */
442 setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (void*) &val, sizeof(val));
443 linger.l_onoff = 1;
444 linger.l_linger = 5;
445 setsockopt(sock, SOL_SOCKET, SO_LINGER, (void*)&linger, sizeof(linger));
446
447 #if defined(IPPROTO_IPV6) && defined(IPV6_V6ONLY)
448 if (res->ai_family == AF_INET6) {
449 int on = 1;
450 if (setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
451 &on, sizeof(on)) == -1) {
452 dropbear_log(LOG_WARNING, "Couldn't set IPV6_V6ONLY");
453 }
454 }
455 #endif
456
457 set_sock_nodelay(sock);
458
459 if (bind(sock, res->ai_addr, res->ai_addrlen) < 0) {
460 err = errno;
461 close(sock);
462 TRACE(("bind(%s) failed", port))
463 continue;
464 }
465
466 if (listen(sock, DROPBEAR_LISTEN_BACKLOG) < 0) {
467 err = errno;
468 close(sock);
469 TRACE(("listen() failed"))
470 continue;
471 }
472
473 *maxfd = MAX(*maxfd, sock);
474
475 nsock++;
476 }
477
478 if (res0) {
479 freeaddrinfo(res0);
480 res0 = NULL;
481 }
482
483 if (nsock == 0) {
484 if (errstring != NULL && *errstring == NULL) {
485 int len;
486 len = 20 + strlen(strerror(err));
487 *errstring = (char*)m_malloc(len);
488 snprintf(*errstring, len, "Error listening: %s", strerror(err));
489 }
490 TRACE(("leave dropbear_listen: failure, %s", strerror(err)))
491 return -1;
492 }
493
494 TRACE(("leave dropbear_listen: success, %d socks bound", nsock))
495 return nsock;
496 }
497
498 void get_socket_address(int fd, char **local_host, char **local_port,
499 char **remote_host, char **remote_port, int host_lookup)
500 {
501 struct sockaddr_storage addr;
502 socklen_t addrlen;
503
504 if (local_host || local_port) {
505 addrlen = sizeof(addr);
506 if (getsockname(fd, (struct sockaddr*)&addr, &addrlen) < 0) {
507 dropbear_exit("Failed socket address: %s", strerror(errno));
508 }
509 getaddrstring(&addr, local_host, local_port, host_lookup);
510 }
511 if (remote_host || remote_port) {
512 addrlen = sizeof(addr);
513 if (getpeername(fd, (struct sockaddr*)&addr, &addrlen) < 0) {
514 dropbear_exit("Failed socket address: %s", strerror(errno));
515 }
516 getaddrstring(&addr, remote_host, remote_port, host_lookup);
517 }
518 }
519
520 /* Return a string representation of the socket address passed. The return
521 * value is allocated with malloc() */
522 void getaddrstring(struct sockaddr_storage* addr,
523 char **ret_host, char **ret_port,
524 int host_lookup) {
525
526 char host[NI_MAXHOST+1], serv[NI_MAXSERV+1];
527 unsigned int len;
528 int ret;
529
530 int flags = NI_NUMERICSERV | NI_NUMERICHOST;
531
532 #ifndef DO_HOST_LOOKUP
533 host_lookup = 0;
534 #endif
535
536 if (host_lookup) {
537 flags = NI_NUMERICSERV;
538 }
539
540 len = sizeof(struct sockaddr_storage);
541 /* Some platforms such as Solaris 8 require that len is the length
542 * of the specific structure. Some older linux systems (glibc 2.1.3
543 * such as debian potato) have sockaddr_storage.__ss_family instead
544 * but we'll ignore them */
545 #ifdef HAVE_STRUCT_SOCKADDR_STORAGE_SS_FAMILY
546 if (addr->ss_family == AF_INET) {
547 len = sizeof(struct sockaddr_in);
548 }
549 #ifdef AF_INET6
550 if (addr->ss_family == AF_INET6) {
551 len = sizeof(struct sockaddr_in6);
552 }
553 #endif
554 #endif
555
556 ret = getnameinfo((struct sockaddr*)addr, len, host, sizeof(host)-1,
557 serv, sizeof(serv)-1, flags);
558
559 if (ret != 0) {
560 if (host_lookup) {
561 /* On some systems (Darwin does it) we get EINTR from getnameinfo
562 * somehow. Eew. So we'll just return the IP, since that doesn't seem
563 * to exhibit that behaviour. */
564 getaddrstring(addr, ret_host, ret_port, 0);
565 return;
566 } else {
567 /* if we can't do a numeric lookup, something's gone terribly wrong */
568 dropbear_exit("Failed lookup: %s", gai_strerror(ret));
569 }
570 }
571
572 if (ret_host) {
573 *ret_host = m_strdup(host);
574 }
575 if (ret_port) {
576 *ret_port = m_strdup(serv);
577 }
578 }
579