chromium/third_party/grpc/src/src/core/lib/iomgr/tcp_windows.cc

//
//
// Copyright 2015 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//

#include <grpc/support/port_platform.h>

#include "src/core/lib/iomgr/port.h"

#ifdef GRPC_WINSOCK_SOCKET

#include <limits.h>

#include <grpc/slice_buffer.h>
#include <grpc/support/alloc.h>
#include <grpc/support/log.h>
#include <grpc/support/log_windows.h>
#include <grpc/support/string_util.h>

#include "src/core/lib/address_utils/sockaddr_utils.h"
#include "src/core/lib/gpr/string.h"
#include "src/core/lib/gpr/useful.h"
#include "src/core/lib/gprpp/crash.h"
#include "src/core/lib/iomgr/iocp_windows.h"
#include "src/core/lib/iomgr/sockaddr.h"
#include "src/core/lib/iomgr/sockaddr_windows.h"
#include "src/core/lib/iomgr/socket_windows.h"
#include "src/core/lib/iomgr/tcp_client.h"
#include "src/core/lib/iomgr/tcp_windows.h"
#include "src/core/lib/iomgr/timer.h"
#include "src/core/lib/slice/slice_internal.h"
#include "src/core/lib/slice/slice_string_helpers.h"

#if defined(__MSYS__) && defined(GPR_ARCH_64)
// Nasty workaround for nasty bug when using the 64 bits msys compiler
// in conjunction with Microsoft Windows headers.
#define GRPC_FIONBIO
#else
#define GRPC_FIONBIO
#endif

extern grpc_core::TraceFlag grpc_tcp_trace;

grpc_error_handle grpc_tcp_set_non_block(SOCKET sock) {
  int status;
  uint32_t param = 1;
  DWORD ret;
  status = WSAIoctl(sock, GRPC_FIONBIO, &param, sizeof(param), NULL, 0, &ret,
                    NULL, NULL);
  return status == 0
             ? absl::OkStatus()
             : GRPC_WSA_ERROR(WSAGetLastError(), "WSAIoctl(GRPC_FIONBIO)");
}

static grpc_error_handle set_dualstack(SOCKET sock) {
  int status;
  unsigned long param = 0;
  status = setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&param,
                      sizeof(param));
  return status == 0
             ? absl::OkStatus()
             : GRPC_WSA_ERROR(WSAGetLastError(), "setsockopt(IPV6_V6ONLY)");
}

static grpc_error_handle enable_socket_low_latency(SOCKET sock) {
  int status;
  BOOL param = TRUE;
  status = ::setsockopt(sock, IPPROTO_TCP, TCP_NODELAY,
                        reinterpret_cast<char*>(&param), sizeof(param));
  if (status == SOCKET_ERROR) {
    status = WSAGetLastError();
  }
  return status == 0 ? absl::OkStatus()
                     : GRPC_WSA_ERROR(status, "setsockopt(TCP_NODELAY)");
}

grpc_error_handle grpc_tcp_prepare_socket(SOCKET sock) {
  grpc_error_handle err;
  err = grpc_tcp_set_non_block(sock);
  if (!err.ok()) return err;
  err = set_dualstack(sock);
  if (!err.ok()) return err;
  err = enable_socket_low_latency(sock);
  if (!err.ok()) return err;
  return absl::OkStatus();
}

typedef struct grpc_tcp {
  // This is our C++ class derivation emulation.
  grpc_endpoint base;
  // The one socket this endpoint is using.
  grpc_winsocket* socket;
  // Refcounting how many operations are in progress.
  gpr_refcount refcount;

  grpc_closure on_read;
  grpc_closure on_write;

  grpc_closure* read_cb;
  grpc_closure* write_cb;

  // garbage after the last read
  grpc_slice_buffer last_read_buffer;

  grpc_slice_buffer* write_slices;
  grpc_slice_buffer* read_slices;

  // The IO Completion Port runs from another thread. We need some mechanism
  // to protect ourselves when requesting a shutdown.
  gpr_mu mu;
  int shutting_down;
  grpc_error_handle shutdown_error;

  std::string peer_string;
  std::string local_address;
} grpc_tcp;

static void tcp_free(grpc_tcp* tcp) {
  grpc_winsocket_destroy(tcp->socket);
  gpr_mu_destroy(&tcp->mu);
  grpc_slice_buffer_destroy(&tcp->last_read_buffer);
  delete tcp;
}

#ifndef NDEBUG
#define TCP_UNREF
#define TCP_REF
static void tcp_unref(grpc_tcp* tcp, const char* reason, const char* file,
                      int line) {
  if (grpc_tcp_trace.enabled()) {
    gpr_atm val = gpr_atm_no_barrier_load(&tcp->refcount.count);
    gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG,
            "TCP unref %p : %s %" PRIdPTR " -> %" PRIdPTR, tcp, reason, val,
            val - 1);
  }
  if (gpr_unref(&tcp->refcount)) {
    tcp_free(tcp);
  }
}

static void tcp_ref(grpc_tcp* tcp, const char* reason, const char* file,
                    int line) {
  if (grpc_tcp_trace.enabled()) {
    gpr_atm val = gpr_atm_no_barrier_load(&tcp->refcount.count);
    gpr_log(file, line, GPR_LOG_SEVERITY_DEBUG,
            "TCP   ref %p : %s %" PRIdPTR " -> %" PRIdPTR, tcp, reason, val,
            val + 1);
  }
  gpr_ref(&tcp->refcount);
}
#else
#define TCP_UNREF
#define TCP_REF
static void tcp_unref(grpc_tcp* tcp) {
  if (gpr_unref(&tcp->refcount)) {
    tcp_free(tcp);
  }
}

static void tcp_ref(grpc_tcp* tcp) { gpr_ref(&tcp->refcount); }
#endif

// Asynchronous callback from the IOCP, or the background thread.
static void on_read(void* tcpp, grpc_error_handle error) {
  grpc_tcp* tcp = (grpc_tcp*)tcpp;
  grpc_closure* cb = tcp->read_cb;
  grpc_winsocket* socket = tcp->socket;
  grpc_winsocket_callback_info* info = &socket->read_info;

  if (grpc_tcp_trace.enabled()) {
    gpr_log(GPR_INFO, "TCP:%p on_read", tcp);
  }

  if (error.ok()) {
    if (info->wsa_error != 0 && !tcp->shutting_down) {
      char* utf8_message = gpr_format_message(info->wsa_error);
      error = GRPC_ERROR_CREATE(utf8_message);
      gpr_free(utf8_message);
      grpc_slice_buffer_reset_and_unref(tcp->read_slices);
    } else {
      if (info->bytes_transferred != 0 && !tcp->shutting_down) {
        GPR_ASSERT((size_t)info->bytes_transferred <= tcp->read_slices->length);
        if (static_cast<size_t>(info->bytes_transferred) !=
            tcp->read_slices->length) {
          grpc_slice_buffer_trim_end(
              tcp->read_slices,
              tcp->read_slices->length -
                  static_cast<size_t>(info->bytes_transferred),
              &tcp->last_read_buffer);
        }
        GPR_ASSERT((size_t)info->bytes_transferred == tcp->read_slices->length);

        if (GRPC_TRACE_FLAG_ENABLED(grpc_tcp_trace) &&
            gpr_should_log(GPR_LOG_SEVERITY_INFO)) {
          size_t i;
          for (i = 0; i < tcp->read_slices->count; i++) {
            char* dump = grpc_dump_slice(tcp->read_slices->slices[i],
                                         GPR_DUMP_HEX | GPR_DUMP_ASCII);
            gpr_log(GPR_INFO, "READ %p (peer=%s): %s", tcp,
                    tcp->peer_string.c_str(), dump);
            gpr_free(dump);
          }
        }
      } else {
        if (grpc_tcp_trace.enabled()) {
          gpr_log(GPR_INFO, "TCP:%p unref read_slice", tcp);
        }
        grpc_slice_buffer_reset_and_unref(tcp->read_slices);
        error = grpc_error_set_int(
            tcp->shutting_down
                ? GRPC_ERROR_CREATE_REFERENCING("TCP stream shutting down",
                                                &tcp->shutdown_error, 1)
                : GRPC_ERROR_CREATE("End of TCP stream"),
            grpc_core::StatusIntProperty::kRpcStatus, GRPC_STATUS_UNAVAILABLE);
      }
    }
  }

  tcp->read_cb = NULL;
  TCP_UNREF(tcp, "read");
  grpc_core::ExecCtx::Run(DEBUG_LOCATION, cb, error);
}

#define DEFAULT_TARGET_READ_SIZE
#define MAX_WSABUF_COUNT
static void win_read(grpc_endpoint* ep, grpc_slice_buffer* read_slices,
                     grpc_closure* cb, bool /* urgent */,
                     int /* min_progress_size */) {
  grpc_tcp* tcp = (grpc_tcp*)ep;
  grpc_winsocket* handle = tcp->socket;
  grpc_winsocket_callback_info* info = &handle->read_info;
  int status;
  DWORD bytes_read = 0;
  DWORD flags = 0;
  WSABUF buffers[MAX_WSABUF_COUNT];
  size_t i;

  if (grpc_tcp_trace.enabled()) {
    gpr_log(GPR_INFO, "TCP:%p win_read", tcp);
  }

  if (tcp->shutting_down) {
    grpc_core::ExecCtx::Run(
        DEBUG_LOCATION, cb,
        grpc_error_set_int(
            GRPC_ERROR_CREATE_REFERENCING("TCP socket is shutting down",
                                          &tcp->shutdown_error, 1),
            grpc_core::StatusIntProperty::kRpcStatus, GRPC_STATUS_UNAVAILABLE));
    return;
  }

  tcp->read_cb = cb;
  tcp->read_slices = read_slices;
  grpc_slice_buffer_reset_and_unref(read_slices);
  grpc_slice_buffer_swap(read_slices, &tcp->last_read_buffer);

  if (tcp->read_slices->length < DEFAULT_TARGET_READ_SIZE / 2 &&
      tcp->read_slices->count < MAX_WSABUF_COUNT) {
    // TODO(jtattermusch): slice should be allocated using resource quota
    grpc_slice_buffer_add(tcp->read_slices,
                          GRPC_SLICE_MALLOC(DEFAULT_TARGET_READ_SIZE));
  }

  GPR_ASSERT(tcp->read_slices->count <= MAX_WSABUF_COUNT);
  for (i = 0; i < tcp->read_slices->count; i++) {
    buffers[i].len = (ULONG)GRPC_SLICE_LENGTH(
        tcp->read_slices->slices[i]);  // we know slice size fits in 32bit.
    buffers[i].buf = (char*)GRPC_SLICE_START_PTR(tcp->read_slices->slices[i]);
  }

  TCP_REF(tcp, "read");

  // First let's try a synchronous, non-blocking read.
  status = WSARecv(tcp->socket->socket, buffers, (DWORD)tcp->read_slices->count,
                   &bytes_read, &flags, NULL, NULL);
  info->wsa_error = status == 0 ? 0 : WSAGetLastError();

  // Did we get data immediately ? Yay.
  if (info->wsa_error != WSAEWOULDBLOCK) {
    info->bytes_transferred = bytes_read;
    grpc_core::ExecCtx::Run(DEBUG_LOCATION, &tcp->on_read, absl::OkStatus());
    return;
  }

  // Otherwise, let's retry, by queuing a read.
  memset(&tcp->socket->read_info.overlapped, 0, sizeof(OVERLAPPED));
  status = WSARecv(tcp->socket->socket, buffers, (DWORD)tcp->read_slices->count,
                   &bytes_read, &flags, &info->overlapped, NULL);

  if (status != 0) {
    int wsa_error = WSAGetLastError();
    if (wsa_error != WSA_IO_PENDING) {
      info->wsa_error = wsa_error;
      grpc_core::ExecCtx::Run(DEBUG_LOCATION, &tcp->on_read,
                              GRPC_WSA_ERROR(info->wsa_error, "WSARecv"));
      return;
    }
  }

  grpc_socket_notify_on_read(tcp->socket, &tcp->on_read);
}

// Asynchronous callback from the IOCP, or the background thread.
static void on_write(void* tcpp, grpc_error_handle error) {
  grpc_tcp* tcp = (grpc_tcp*)tcpp;
  grpc_winsocket* handle = tcp->socket;
  grpc_winsocket_callback_info* info = &handle->write_info;
  grpc_closure* cb;

  if (grpc_tcp_trace.enabled()) {
    gpr_log(GPR_INFO, "TCP:%p on_write", tcp);
  }

  gpr_mu_lock(&tcp->mu);
  cb = tcp->write_cb;
  tcp->write_cb = NULL;
  gpr_mu_unlock(&tcp->mu);

  if (error.ok()) {
    if (info->wsa_error != 0) {
      error = GRPC_WSA_ERROR(info->wsa_error, "WSASend");
    } else {
      GPR_ASSERT(info->bytes_transferred <= tcp->write_slices->length);
    }
  }

  TCP_UNREF(tcp, "write");
  grpc_core::ExecCtx::Run(DEBUG_LOCATION, cb, error);
}

// Initiates a write.
static void win_write(grpc_endpoint* ep, grpc_slice_buffer* slices,
                      grpc_closure* cb, void* /* arg */,
                      int /* max_frame_size */) {
  grpc_tcp* tcp = (grpc_tcp*)ep;
  grpc_winsocket* socket = tcp->socket;
  grpc_winsocket_callback_info* info = &socket->write_info;
  unsigned i;
  DWORD bytes_sent;
  int status;
  WSABUF local_buffers[MAX_WSABUF_COUNT];
  WSABUF* allocated = NULL;
  WSABUF* buffers = local_buffers;
  size_t len, async_buffers_offset = 0;

  if (GRPC_TRACE_FLAG_ENABLED(grpc_tcp_trace) &&
      gpr_should_log(GPR_LOG_SEVERITY_INFO)) {
    size_t i;
    for (i = 0; i < slices->count; i++) {
      char* data =
          grpc_dump_slice(slices->slices[i], GPR_DUMP_HEX | GPR_DUMP_ASCII);
      gpr_log(GPR_INFO, "WRITE %p (peer=%s): %s", tcp, tcp->peer_string.c_str(),
              data);
      gpr_free(data);
    }
  }

  if (tcp->shutting_down) {
    grpc_core::ExecCtx::Run(
        DEBUG_LOCATION, cb,
        grpc_error_set_int(
            GRPC_ERROR_CREATE_REFERENCING("TCP socket is shutting down",
                                          &tcp->shutdown_error, 1),
            grpc_core::StatusIntProperty::kRpcStatus, GRPC_STATUS_UNAVAILABLE));
    return;
  }

  tcp->write_cb = cb;
  tcp->write_slices = slices;
  GPR_ASSERT(tcp->write_slices->count <= UINT_MAX);
  if (tcp->write_slices->count > GPR_ARRAY_SIZE(local_buffers)) {
    buffers = (WSABUF*)gpr_malloc(sizeof(WSABUF) * tcp->write_slices->count);
    allocated = buffers;
  }

  for (i = 0; i < tcp->write_slices->count; i++) {
    len = GRPC_SLICE_LENGTH(tcp->write_slices->slices[i]);
    GPR_ASSERT(len <= ULONG_MAX);
    buffers[i].len = (ULONG)len;
    buffers[i].buf = (char*)GRPC_SLICE_START_PTR(tcp->write_slices->slices[i]);
  }

  // First, let's try a synchronous, non-blocking write.
  status = WSASend(socket->socket, buffers, (DWORD)tcp->write_slices->count,
                   &bytes_sent, 0, NULL, NULL);

  if (status == 0) {
    if (bytes_sent == tcp->write_slices->length) {
      info->wsa_error = 0;
      grpc_error_handle error = absl::OkStatus();
      grpc_core::ExecCtx::Run(DEBUG_LOCATION, cb, error);
      if (allocated) gpr_free(allocated);
      return;
    }

    // The data was not completely delivered, we should send the rest of
    // them by doing an async write operation.
    for (i = 0; i < tcp->write_slices->count; i++) {
      if (buffers[i].len > bytes_sent) {
        buffers[i].buf += bytes_sent;
        buffers[i].len -= bytes_sent;
        break;
      }
      bytes_sent -= buffers[i].len;
      async_buffers_offset++;
    }
  } else {
    info->wsa_error = WSAGetLastError();

    // We would kind of expect to get a WSAEWOULDBLOCK here, especially on a
    // busy connection that has its send queue filled up. But if we don't, then
    // we can avoid doing an async write operation at all.
    if (info->wsa_error != WSAEWOULDBLOCK) {
      grpc_error_handle error = GRPC_WSA_ERROR(info->wsa_error, "WSASend");
      grpc_core::ExecCtx::Run(DEBUG_LOCATION, cb, error);
      if (allocated) gpr_free(allocated);
      return;
    }
  }

  TCP_REF(tcp, "write");

  // If we got a WSAEWOULDBLOCK earlier, then we need to re-do the same
  // operation, this time asynchronously.
  memset(&socket->write_info.overlapped, 0, sizeof(OVERLAPPED));
  status = WSASend(socket->socket, buffers + async_buffers_offset,
                   (DWORD)(tcp->write_slices->count - async_buffers_offset),
                   NULL, 0, &socket->write_info.overlapped, NULL);
  if (allocated) gpr_free(allocated);

  if (status != 0) {
    int wsa_error = WSAGetLastError();
    if (wsa_error != WSA_IO_PENDING) {
      TCP_UNREF(tcp, "write");
      grpc_core::ExecCtx::Run(DEBUG_LOCATION, cb,
                              GRPC_WSA_ERROR(wsa_error, "WSASend"));
      return;
    }
  }

  // As all is now setup, we can now ask for the IOCP notification. It may
  // trigger the callback immediately however, but no matter.
  grpc_socket_notify_on_write(socket, &tcp->on_write);
}

static void win_add_to_pollset(grpc_endpoint* ep, grpc_pollset* ps) {
  grpc_tcp* tcp;
  (void)ps;
  tcp = (grpc_tcp*)ep;
  grpc_iocp_add_socket(tcp->socket);
}

static void win_add_to_pollset_set(grpc_endpoint* ep, grpc_pollset_set* pss) {
  grpc_tcp* tcp;
  (void)pss;
  tcp = (grpc_tcp*)ep;
  grpc_iocp_add_socket(tcp->socket);
}

static void win_delete_from_pollset_set(grpc_endpoint* /* ep */,
                                        grpc_pollset_set* /* pss */) {}

// Initiates a shutdown of the TCP endpoint. This will queue abort callbacks
// for the potential read and write operations. It is up to the caller to
// guarantee this isn't called in parallel to a read or write request, so
// we're not going to protect against these. However the IO Completion Port
// callback will happen from another thread, so we need to protect against
// concurrent access of the data structure in that regard.
static void win_shutdown(grpc_endpoint* ep, grpc_error_handle why) {
  grpc_tcp* tcp = (grpc_tcp*)ep;
  gpr_mu_lock(&tcp->mu);
  // At that point, what may happen is that we're already inside the IOCP
  // callback. See the comments in on_read and on_write.
  if (!tcp->shutting_down) {
    tcp->shutting_down = 1;
    tcp->shutdown_error = why;
  }
  grpc_winsocket_shutdown(tcp->socket);
  gpr_mu_unlock(&tcp->mu);
}

static void win_destroy(grpc_endpoint* ep) {
  grpc_tcp* tcp = (grpc_tcp*)ep;
  grpc_slice_buffer_reset_and_unref(&tcp->last_read_buffer);
  TCP_UNREF(tcp, "destroy");
}

static absl::string_view win_get_peer(grpc_endpoint* ep) {
  grpc_tcp* tcp = (grpc_tcp*)ep;
  return tcp->peer_string;
}

static absl::string_view win_get_local_address(grpc_endpoint* ep) {
  grpc_tcp* tcp = (grpc_tcp*)ep;
  return tcp->local_address;
}

static int win_get_fd(grpc_endpoint* /* ep */) { return -1; }

static bool win_can_track_err(grpc_endpoint* /* ep */) { return false; }

static grpc_endpoint_vtable vtable = {win_read,
                                      win_write,
                                      win_add_to_pollset,
                                      win_add_to_pollset_set,
                                      win_delete_from_pollset_set,
                                      win_shutdown,
                                      win_destroy,
                                      win_get_peer,
                                      win_get_local_address,
                                      win_get_fd,
                                      win_can_track_err};

grpc_endpoint* grpc_tcp_create(grpc_winsocket* socket,
                               absl::string_view peer_string) {
  // TODO(jtattermusch): C++ize grpc_tcp and its dependencies (i.e. add
  // constructors) to ensure proper initialization
  grpc_tcp* tcp = new grpc_tcp{};
  tcp->base.vtable = &vtable;
  tcp->socket = socket;
  gpr_mu_init(&tcp->mu);
  gpr_ref_init(&tcp->refcount, 1);
  GRPC_CLOSURE_INIT(&tcp->on_read, on_read, tcp, grpc_schedule_on_exec_ctx);
  GRPC_CLOSURE_INIT(&tcp->on_write, on_write, tcp, grpc_schedule_on_exec_ctx);
  grpc_resolved_address resolved_local_addr;
  resolved_local_addr.len = sizeof(resolved_local_addr.addr);
  absl::StatusOr<std::string> addr_uri;
  if (getsockname(tcp->socket->socket,
                  reinterpret_cast<sockaddr*>(resolved_local_addr.addr),
                  &resolved_local_addr.len) < 0 ||
      !(addr_uri = grpc_sockaddr_to_uri(&resolved_local_addr)).ok()) {
    tcp->local_address = "";
  } else {
    tcp->local_address = grpc_sockaddr_to_uri(&resolved_local_addr).value();
  }
  tcp->peer_string = std::string(peer_string);
  grpc_slice_buffer_init(&tcp->last_read_buffer);
  return &tcp->base;
}

#endif  // GRPC_WINSOCK_SOCKET