一、virtio-user說明
在網(wǎng)絡IO的半虛擬中,vhost-user是目前最優(yōu)的解決方案。在DPDK中,同樣也采用了這種方式。vhost-user是為了解決內(nèi)核狀態(tài)數(shù)據(jù)操作復雜的情況提出的一種解決方式,通過在用戶進程來替代內(nèi)核進程來實現(xiàn)數(shù)據(jù)交互的最少化。在vhost-user在應用場景中,虛擬化的容器支持是一個重點方向。起初的virtio-user就是為了支持容器內(nèi)部與DPDK通信的。后來也發(fā)展到虛擬設(shè)備間的通信。
DPDK與Kernel的通信也叫做“exception path”,通常來說,這種通信方式主要有幾種:
1、KNI,是目前DPDK中用戶使用的主要方案。即通過虛擬網(wǎng)絡接口,利用隊列和DPDK應用交的數(shù)據(jù),但無法實現(xiàn)upstream(一種負載均衡的手段)
2、Tun/Tap或者pcap PMD.需要內(nèi)核切換,效率差
3、Flow Bifurcation,虛擬多張網(wǎng)卡,依賴硬件,不靈活
4、virtio-user和vhost-net,這是比較好的一種實現(xiàn)機制。
virtio-user在DPDK和虛擬場景下的應用還是非常多的。virtio-user虛擬出的設(shè)備和真實的設(shè)備在上層看沒有區(qū)別,這個非常重要。
二、數(shù)據(jù)結(jié)構(gòu)
下面看一下在DPDK中相關(guān)的數(shù)據(jù)結(jié)構(gòu)定義:
struct virtio_user_queue {
uint16_t used_idx;
bool avail_wrap_counter;
bool used_wrap_counter;
};
struct virtio_user_dev {
/* for vhost_user backend */
int vhostfd;
int listenfd; /* listening fd */
bool is_server; /* server or client mode */
/* for vhost_kernel backend */
char *ifname;
int *vhostfds;
int *tapfds;
/* for both vhost_user and vhost_kernel */
int callfds[VIRTIO_MAX_VIRTQUEUES];
int kickfds[VIRTIO_MAX_VIRTQUEUES];
int mac_specified;
uint32_t max_queue_pairs;
uint32_t queue_pairs;
uint32_t queue_size;
uint64_t features; /* the negotiated features with driver,
* and will be sync with device
*/
uint64_t device_features; /* supported features by device */
uint64_t frontend_features; /* enabled frontend features */
uint64_t unsupported_features; /* unsupported features mask */
uint8_t status;
uint16_t net_status;
uint16_t port_id;
uint8_t mac_addr[RTE_ETHER_ADDR_LEN];
char path[PATH_MAX];
union {
struct vring vrings[VIRTIO_MAX_VIRTQUEUES];
struct vring_packed packed_vrings[VIRTIO_MAX_VIRTQUEUES];
};
struct virtio_user_queue packed_queues[VIRTIO_MAX_VIRTQUEUES];
bool qp_enabled[VIRTIO_MAX_VIRTQUEUE_PAIRS];
struct virtio_user_backend_ops *ops;
pthread_mutex_t mutex;
bool started;
};
除了虛擬設(shè)備外,其實它主要是和VHOST以及相關(guān)數(shù)據(jù)隊列的操作,而那些數(shù)據(jù)結(jié)構(gòu)在前面已經(jīng)基本都介紹過了。
三、基本流程
其實在前面說了,virtio-user在虛擬環(huán)境中應用非常廣泛,在virtio-user文件夾(driver/net/virtio)下可以看到,其實最主要的就是那幾個文件:
int
virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
int cq, int queue_size, const char *mac, char **ifname,
int server, int mrg_rxbuf, int in_order, int packed_vq)
{
pthread_mutex_init(&dev->mutex, NULL);
strlcpy(dev->path, path, PATH_MAX);
dev->started = 0;
dev->max_queue_pairs = queues;
dev->queue_pairs = 1; /* mq disabled by default */
dev->queue_size = queue_size;
dev->is_server = server;
dev->mac_specified = 0;
dev->frontend_features = 0;
dev->unsupported_features = ~VIRTIO_USER_SUPPORTED_FEATURES;
parse_mac(dev, mac);
if (*ifname) {
dev->ifname = *ifname;
*ifname = NULL;
}
if (virtio_user_dev_setup(dev) < 0) {
PMD_INIT_LOG(ERR, "backend set up fails");
return -1;
}
if (!dev->is_server) {
if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER,
NULL) < 0) {
PMD_INIT_LOG(ERR, "set_owner fails: %s",
strerror(errno));
return -1;
}
if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
&dev->device_features) < 0) {
PMD_INIT_LOG(ERR, "get_features failed: %s",
strerror(errno));
return -1;
}
} else {
/* We just pretend vhost-user can support all these features.
* Note that this could be problematic that if some feature is
* negotiated but not supported by the vhost-user which comes
* later.
*/
dev->device_features = VIRTIO_USER_SUPPORTED_FEATURES;
}
if (!mrg_rxbuf)
dev->unsupported_features |= (1ull << VIRTIO_NET_F_MRG_RXBUF);
if (!in_order)
dev->unsupported_features |= (1ull << VIRTIO_F_IN_ORDER);
if (!packed_vq)
dev->unsupported_features |= (1ull << VIRTIO_F_RING_PACKED);
if (dev->mac_specified)
dev->frontend_features |= (1ull << VIRTIO_NET_F_MAC);
else
dev->unsupported_features |= (1ull << VIRTIO_NET_F_MAC);
if (cq) {
/* device does not really need to know anything about CQ,
* so if necessary, we just claim to support CQ
*/
dev->frontend_features |= (1ull << VIRTIO_NET_F_CTRL_VQ);
} else {
dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VQ);
/* Also disable features that depend on VIRTIO_NET_F_CTRL_VQ */
dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_RX);
dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VLAN);
dev->unsupported_features |=
(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE);
dev->unsupported_features |= (1ull << VIRTIO_NET_F_MQ);
dev->unsupported_features |=
(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR);
}
/* The backend will not report this feature, we add it explicitly */
if (is_vhost_user_by_type(dev->path))
dev->frontend_features |= (1ull << VIRTIO_NET_F_STATUS);
/*
* Device features =
* (frontend_features | backend_features) & ~unsupported_features;
*/
dev->device_features |= dev->frontend_features;
dev->device_features &= ~dev->unsupported_features;
if (rte_mem_event_callback_register(VIRTIO_USER_MEM_EVENT_CLB_NAME,
virtio_user_mem_event_cb, dev)) {
if (rte_errno != ENOTSUP) {
PMD_INIT_LOG(ERR, "Failed to register mem event"
" callback\n");
return -1;
}
}
return 0;
}
先是對設(shè)備的初始化,然后進行Setup:
static int
virtio_user_dev_setup(struct virtio_user_dev *dev)
{
uint32_t q;
dev->vhostfd = -1;
dev->vhostfds = NULL;
dev->tapfds = NULL;
if (dev->is_server) {
if (access(dev->path, F_OK) == 0 &&
!is_vhost_user_by_type(dev->path)) {
PMD_DRV_LOG(ERR, "Server mode doesn't support vhost-kernel!");
return -1;
}
dev->ops = &virtio_ops_user;
} else {
if (is_vhost_user_by_type(dev->path)) {
dev->ops = &virtio_ops_user;
} else {
dev->ops = &virtio_ops_kernel;
dev->vhostfds = malloc(dev->max_queue_pairs *
sizeof(int));
dev->tapfds = malloc(dev->max_queue_pairs *
sizeof(int));
if (!dev->vhostfds || !dev->tapfds) {
PMD_INIT_LOG(ERR, "Failed to malloc");
return -1;
}
for (q = 0; q < dev->max_queue_pairs; ++q) {
dev->vhostfds[q] = -1;
dev->tapfds[q] = -1;
}
}
}
if (dev->ops->setup(dev) < 0)
return -1;
if (virtio_user_dev_init_notify(dev) < 0)
return -1;
if (virtio_user_fill_intr_handle(dev) < 0)
return -1;
return 0;
}
然后在處理用戶狀態(tài)時可以啟動:
//drivers/net/virtio/virtio_user_ethdev.c
static void
virtio_user_set_status(struct virtio_hw *hw, uint8_t status)
{
struct virtio_user_dev *dev = virtio_user_get_dev(hw);
if (status & VIRTIO_CONFIG_STATUS_DRIVER_OK)
virtio_user_start_device(dev);
else if (status == VIRTIO_CONFIG_STATUS_RESET)
virtio_user_reset(hw);
dev->status = status;
}
int
virtio_user_start_device(struct virtio_user_dev *dev)
{
uint64_t features;
int ret;
/*
* XXX workaround!
*
* We need to make sure that the locks will be
* taken in the correct order to avoid deadlocks.
*
* Before releasing this lock, this thread should
* not trigger any memory hotplug events.
*
* This is a temporary workaround, and should be
* replaced when we get proper supports from the
* memory subsystem in the future.
*/
rte_mcfg_mem_read_lock();
pthread_mutex_lock(&dev->mutex);
if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0)
goto error;
/* Step 0: tell vhost to create queues */
if (virtio_user_queue_setup(dev, virtio_user_create_queue) < 0)
goto error;
/* Step 1: set features */
features = dev->features;
/* Strip VIRTIO_NET_F_MAC, as MAC address is handled in vdev init */
features &= ~(1ull << VIRTIO_NET_F_MAC);
/* Strip VIRTIO_NET_F_CTRL_VQ, as devices do not really need to know */
features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ);
features &= ~(1ull << VIRTIO_NET_F_STATUS);
ret = dev->ops->send_request(dev, VHOST_USER_SET_FEATURES, &features);
if (ret < 0)
goto error;
PMD_DRV_LOG(INFO, "set features: %" PRIx64, features);
/* Step 2: share memory regions */
ret = dev->ops->send_request(dev, VHOST_USER_SET_MEM_TABLE, NULL);
if (ret < 0)
goto error;
/* Step 3: kick queues */
if (virtio_user_queue_setup(dev, virtio_user_kick_queue) < 0)
goto error;
/* Step 4: enable queues
* we enable the 1st queue pair by default.
*/
dev->ops->enable_qp(dev, 0, 1);
dev->started = true;
pthread_mutex_unlock(&dev->mutex);
rte_mcfg_mem_read_unlock();
return 0;
error:
pthread_mutex_unlock(&dev->mutex);
rte_mcfg_mem_read_unlock();
/* TODO: free resource here or caller to check */
return -1;
}
這里其實會調(diào)用send_request(dev, VHOST_USER_SET_MEM_TABLE, NULL)來傳遞內(nèi)存數(shù)據(jù)(ops中設(shè)置),如果后端為vhost-user時,即為vhost_user_sock。
//drivers/net/virtio/virtio-user
static int
vhost_user_sock(struct virtio_user_dev *dev,
enum vhost_user_request req,
void *arg)
{
struct vhost_user_msg msg;
struct vhost_vring_file *file = 0;
int need_reply = 0;
int fds[VHOST_MEMORY_MAX_NREGIONS];
int fd_num = 0;
int len;
int vhostfd = dev->vhostfd;
RTE_SET_USED(m);
PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]);
if (dev->is_server && vhostfd < 0)
return -1;
msg.request = req;
msg.flags = VHOST_USER_VERSION;
msg.size = 0;
switch (req) {
case VHOST_USER_GET_FEATURES:
need_reply = 1;
break;
case VHOST_USER_SET_FEATURES:
case VHOST_USER_SET_LOG_BASE:
msg.payload.u64 = *((__u64 *)arg);
msg.size = sizeof(m.payload.u64);
break;
case VHOST_USER_SET_OWNER:
case VHOST_USER_RESET_OWNER:
break;
case VHOST_USER_SET_MEM_TABLE:
if (prepare_vhost_memory_user(&msg, fds) < 0)
return -1;
fd_num = msg.payload.memory.nregions;
msg.size = sizeof(m.payload.memory.nregions);
msg.size += sizeof(m.payload.memory.padding);
msg.size += fd_num * sizeof(struct vhost_memory_region);
break;
case VHOST_USER_SET_LOG_FD:
fds[fd_num++] = *((int *)arg);
break;
case VHOST_USER_SET_VRING_NUM:
case VHOST_USER_SET_VRING_BASE:
case VHOST_USER_SET_VRING_ENABLE:
memcpy(&msg.payload.state, arg, sizeof(msg.payload.state));
msg.size = sizeof(m.payload.state);
break;
case VHOST_USER_GET_VRING_BASE:
memcpy(&msg.payload.state, arg, sizeof(msg.payload.state));
msg.size = sizeof(m.payload.state);
need_reply = 1;
break;
case VHOST_USER_SET_VRING_ADDR:
memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr));
msg.size = sizeof(m.payload.addr);
break;
case VHOST_USER_SET_VRING_KICK:
case VHOST_USER_SET_VRING_CALL:
case VHOST_USER_SET_VRING_ERR:
file = arg;
msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
msg.size = sizeof(m.payload.u64);
if (file->fd > 0)
fds[fd_num++] = file->fd;
else
msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
break;
default:
PMD_DRV_LOG(ERR, "trying to send unhandled msg type");
return -1;
}
len = VHOST_USER_HDR_SIZE + msg.size;
if (vhost_user_write(vhostfd, &msg, len, fds, fd_num) < 0) {
PMD_DRV_LOG(ERR, "%s failed: %s",
vhost_msg_strings[req], strerror(errno));
return -1;
}
if (need_reply) {
if (vhost_user_read(vhostfd, &msg) < 0) {
PMD_DRV_LOG(ERR, "Received msg failed: %s",
strerror(errno));
return -1;
}
if (req != msg.request) {
PMD_DRV_LOG(ERR, "Received unexpected msg type");
return -1;
}
switch (req) {
case VHOST_USER_GET_FEATURES:
if (msg.size != sizeof(m.payload.u64)) {
PMD_DRV_LOG(ERR, "Received bad msg size");
return -1;
}
*((__u64 *)arg) = msg.payload.u64;
break;
case VHOST_USER_GET_VRING_BASE:
if (msg.size != sizeof(m.payload.state)) {
PMD_DRV_LOG(ERR, "Received bad msg size");
return -1;
}
memcpy(arg, &msg.payload.state,
sizeof(struct vhost_vring_state));
break;
default:
PMD_DRV_LOG(ERR, "Received unexpected msg type");
return -1;
}
}
return 0;
}
找到相關(guān)的VHOST_USER_SET_MEM_TABLE選項設(shè)置就看了數(shù)據(jù)的準備,從調(diào)用函數(shù)就可以一路深入進去,明白整個過程。這里就不再做介紹。文章來源:http://www.zghlxwxcb.cn/news/detail-430867.html
四、總結(jié)
通過上面的分析可以看出,virtio-user既可以實現(xiàn)虛擬機前后端的通信,也可以實現(xiàn)不同設(shè)備間的通信,還可以實現(xiàn)與內(nèi)核間的通信。所以一種新的技術(shù)被提出后,會不斷的推動應用的向前發(fā)展,反過來,應用的發(fā)展又不斷要求前者提供更好的支持。互相促進,就會形成一個新的應用場景并有可能暴發(fā)。文章來源地址http://www.zghlxwxcb.cn/news/detail-430867.html
到了這里,關(guān)于DPDK系列之十六虛擬化virtio源碼分析之virtio-user的文章就介紹完了。如果您還想了解更多內(nèi)容,請在右上角搜索TOY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!