我们要坚定我们的asio信仰


#1

今天看到 http://my.oschina.net/u/200693/blog/34462 测评asio的效率, 结果如下表:

c++ boost::asio connect=10000,active connect=100,req=148791,time=60,req/sec=2479.85,msec/req=40.343

erlang kernel-poll false connect=10000,active connect=100,req=979803,time=60,req/sec=16330,msec/req=6.12356

node.js connect=10000,active connect=100,req=1378370,time=60,req/sec=22972.8,msec/req=4.35543

c libevent connect=10000,active connect=100,req=3719106,time=60,req/sec=61985.1,msec/req=1.61258

erlang kernel-poll true connect=10000,active connect=100,req=6377574,time=60,req/sec=106293,msec/req=0.939882

看到这个数据,立马吓尿了. 见过黑C++,黑Boost的,没见过黑这么狠的.这数据简直是要颠覆我的世界观啊!!!

没有办法,为了查明真相,只能自己写个程序测试一下. 原帖子里面包含了一个asio的echo_server, 稍微看了一下,代码没多大问题,就是delete this有点刺眼, 于是顺便改了一下程序.

// echo_server.cpp
// g++ -o echo_server -O3 echo_server.cpp -lboost_system -lboost_thread
#include <cstdlib>
#include <iostream>
#include <boost/bind.hpp>
#include <boost/asio.hpp>
#include <boost/thread/thread.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/enable_shared_from_this.hpp>

using boost::asio::ip::tcp;

int total_conn = 0;

class session
	: public boost::enable_shared_from_this<session>
{
public:
  session(boost::asio::io_service& io_service)
    : socket_(io_service)
  {
  }

  tcp::socket& socket()
  {
    return socket_;
  }

  void start()
  {
    socket_.async_read_some(boost::asio::buffer(data_, max_length),
        boost::bind(&session::handle_read, shared_from_this(),
          boost::asio::placeholders::error,
          boost::asio::placeholders::bytes_transferred));
  }

private:
  void handle_read(const boost::system::error_code& error,
      size_t bytes_transferred)
  {
    if (!error)
    {
      boost::asio::async_write(socket_,
          boost::asio::buffer(data_, bytes_transferred),
          boost::bind(&session::handle_write, shared_from_this(),
            boost::asio::placeholders::error));
    }
  }

  void handle_write(const boost::system::error_code& error)
  {
    if (!error)
    {
      socket_.async_read_some(boost::asio::buffer(data_, max_length),
          boost::bind(&session::handle_read, shared_from_this(),
            boost::asio::placeholders::error,
            boost::asio::placeholders::bytes_transferred));
    }
  }

  tcp::socket socket_;
  enum { max_length = 1024 };
  char data_[max_length];
};

class server
{
public:
  server(boost::asio::io_service& io_service, short port)
    : io_service_(io_service),
      acceptor_(io_service, tcp::endpoint(tcp::v4(), port))
  {
    start_accept();
  }

private:
  void start_accept()
  {
	  boost::shared_ptr<session> new_session(new session(io_service_));
    acceptor_.async_accept(new_session->socket(),
        boost::bind(&server::handle_accept, this, new_session,
          boost::asio::placeholders::error));
  }

  void handle_accept(boost::shared_ptr<session> new_session,
      const boost::system::error_code& error)
  {
    start_accept();

    if (!error)
    {
      std::cout << "total connect =" << ++total_conn <<std::endl;
      new_session->start();
    }
  }

  boost::asio::io_service& io_service_;
  tcp::acceptor acceptor_;
};

int main(int argc, char* argv[])
{
  try
  {
    if (argc < 2)
    {
      std::cerr << "Usage: async_tcp_echo_server <port>\n";
      return 1;
    }

    boost::asio::io_service io_service;

    using namespace std; // For atoi.
    server s(io_service, atoi(argv[1]));

    int thread_num = 6;
    if (argc > 2)
	  thread_num = atoi(argv[2]);

    boost::thread_group th_group; 
    for (int i=0; i<thread_num; ++i)
    {
	  th_group.add_thread(new boost::thread(boost::bind(&boost::asio::io_service::run, &io_service)));
    }

    th_group.join_all();
  }
  catch (std::exception& e)
  {
    std::cerr << "Exception: " << e.what() << "\n";
  }

  return 0;
}

然后把nodejs, erlang, libevent都拿来对比一下(nodejs还真是简洁)

/// echo_server.js
var net = require("net");

var server = net.createServer(function(req) {
    req.on('data', function(data) {
        req.write(data);
    });
});
server.listen(8000);

下面是erlang的

-module(echo_server).
-export([start/0]).

start() ->
        {ok, Listen} = gen_tcp:listen(9000, [binary,
                                                %{packet, 4},
                                                {reuseaddr, true},
                                                {backlog, 2000},
                                                {active, true}]),
        spawn(fun() -> par_connect(Listen, 0) end).

par_connect(Listen, Count) ->
        {ok, Socket} = gen_tcp:accept(Listen),
        New = Count + 1,
        io:format("Accept succ ~p~n", [New]),
        spawn(fun() -> par_connect(Listen, New) end),
        loop(Socket).

loop(Socket) ->
    receive
        {tcp, Socket, Bin} ->
            gen_tcp:send(Socket, Bin),
            loop(Socket);
        {tcp_closed, Socket} ->
            io:format("Server socket closed~n")
    end.

下面是libevent版本

#include <stdlib.h>
#include <unistd.h>
#include <netinet/in.h>  
#include <sys/socket.h>  
#include <sys/types.h>  
#include <sys/socket.h>
#include <event.h>  
#include <stdio.h>  
#include <time.h> 
#include <string.h>
#include <fcntl.h>

int buf_len = 8192;
int msg_len = 4096; 
int total = 0;

int setnonblock(int fd)
{       
	int flags;       
	flags = fcntl(fd, F_GETFL);       
	if (flags < 0)               
		return flags;       
	flags |= O_NONBLOCK;       
	if (fcntl(fd, F_SETFL, flags) < 0)               
		return -1;       
 
	return 0;
}

void connection_echo(int fd, short event, void *arg)
{
	struct event *ev = (struct event *)arg;
	event_add(ev, NULL);

	char buf[buf_len];
	int read_len = read(fd, buf, msg_len);
	write(fd, buf, read_len);
}

void connection_accept(int fd, short event, void *arg)   
{ 
    /* for debugging */ 
	//fprintf(stderr, "%s(): fd = %d, event = %d,	total = %d.\n", __func__, fd, event, ++total);  

    /* Accept a new connection. */ 
    struct sockaddr_in s_in;  
    socklen_t len = sizeof(s_in);  
    int ns = accept(fd, (struct sockaddr *) &s_in, &len);  
    if (ns < 0) {  
        perror("accept");  
        return;  
    }  

	setnonblock(ns);

    /* Install echo server. */ 
    struct event *ev = (struct event *)malloc(sizeof(struct event));  
    event_set(ev, ns, EV_READ, connection_echo, ev);  
    event_add(ev, NULL);  
} 

int main(void)  
{  
    /* Request socket. */ 
    int s = socket(PF_INET, SOCK_STREAM, 0);  
    if (s < 0) {  
        perror("socket");  
        exit(1);  
    }  

    /* bind() */ 
    struct sockaddr_in s_in;  
    memset(&s_in, 0, sizeof(s_in));  
    s_in.sin_family = AF_INET;  
    s_in.sin_port = htons(9000);  
    s_in.sin_addr.s_addr = INADDR_ANY;  
    if (bind(s, (struct sockaddr *) &s_in, sizeof(s_in)) < 0) {  
        perror("bind");  
        exit(1);  
    }  

    /* listen() */ 
    if (listen(s, 1000) < 0) {  
        perror("listen");  
        exit(1);  
    }  

    /* Initial libevent. */ 
    event_init();  

    /* Create event. */ 
    struct event ev;  
    event_set(&ev, s, EV_READ | EV_PERSIST, connection_accept, &ev);  

    /* Add event. */ 
    event_add(&ev, NULL);  

    event_dispatch();  

    return 0;  
} 

最后是测试程序,用我们最爱的asio写的, 发起10000的异步连接, 连接成功后写入"hello world",然后等待返回,返回后就断开连接

// echo_client.cpp
// g++ -o echo_client -O3 echo_client.cpp -lboost_system -lboost_thread
#include <boost/asio.hpp>
namespace asio = boost::asio;
using asio::ip::tcp;
#include <boost/bind.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/enable_shared_from_this.hpp>


class session 
	: public boost::enable_shared_from_this<session>
{
public:
	session(asio::io_service& io)
		: socket_(io)
	{ }

	tcp::socket& socket()
	{ return socket_; }

	void start()
	{
		asio::async_write(socket_, asio::buffer(output_buffer_, 12), boost::bind(&session::handle_write, shared_from_this(), _1, _2));
	}

	void handle_write(const boost::system::error_code& ec, std::size_t bytes_transfered)
	{
		if(!ec) 
		{
			asio::async_read(socket_, asio::buffer(input_buffer_, 12), boost::bind(&session::handle_read, shared_from_this(), _1, _2));
		} else {
			std::cerr << "write error:" << ec.message() << std:: endl;
		}
	}

	void handle_read(const boost::system::error_code& ec, std::size_t bytes_transfered)
	{
		if(ec)
		{
			std::cerr << "read error:" << ec.message() << std::endl;
		}
	}

private:
	tcp::socket socket_;
	char output_buffer_[12];
	char input_buffer_[12];
};


void handle_connect(boost::shared_ptr<session> session_ptr, const boost::system::error_code& ec)
{
	if(ec) 
	{
		std::cerr << "connect error:" << ec.message() << std::endl;
	} else {
		session_ptr->start();
	}
}


int main(int argc, char* argv[])
{
	asio::io_service io;
	tcp::resolver resolver(io);
	tcp::resolver::iterator endpoint = resolver.resolve(tcp::resolver::query("localhost", argv[1]));
	boost::shared_ptr<session> session_ptr;
	for(int i = 0; i < 10000; i++)
	{
		session_ptr.reset(new session(io));
		asio::async_connect(session_ptr->socket(), endpoint, boost::bind(handle_connect, session_ptr, _1));
	}
	io.run();
}

我首先测试了一下c++版本, 结果悲剧了,开2000个连接,居然要3s?

但是靠着坚定的asio/C++信仰, 仔细想了一想原因, 接着看了一下服务器日志,发现服务器连接数大概在1000多. 差不多每次都是这么多. 估计是ulimit的限制, 使用ulimit -n 99999放开服务器和客户端的限制. 果然,处理10000连接只需0.7秒.

c++测试结果:

localhost test # time ./echo_client 8000

real    0m0.798s
user    0m0.169s
sys     0m0.626s
localhost test # time ./echo_client 8000

real    0m0.843s
user    0m0.132s
sys     0m0.707s
localhost test # time ./echo_client 8000

real    0m0.762s
user    0m0.161s
sys     0m0.598s
localhost test # time ./echo_client 8000

real    0m0.774s
user    0m0.145s
sys     0m0.628s

但是测试过程中碰到了另一个问题,就是重复测试的时候,会在连接时出现 Cannot assign requested address 错误. 经过网上查找资料, 是可用端口被用光了,系统还没来得及回收.使用如下命令开启端口tw_reuse

echo "1" > /proc/sys/net/ipv4/tcp_tw_reuse

这些测试就ok了,每次都在0.7秒左右.

接下来测试 nodejs

nodejs测试结果:

localhost test # time ./echo_client 8000

real    0m3.193s
user    0m0.140s
sys     0m0.484s
localhost test # time ./echo_client 8000

real    0m0.729s
user    0m0.144s
sys     0m0.494s
localhost test # time ./echo_client 8000

real    0m3.176s
user    0m0.141s
sys     0m0.489s
localhost test # time ./echo_client 8000

real    0m3.727s
user    0m0.136s
sys     0m0.489s

下面是libevent的结果, 效率挺不错的, 比asio快了10%左右(有人想问,为什么只有一次测试? 那是因为server挂掉了, 我不是很懂c, 不知道内存泄露出在哪里)

localhost test # time ./echo_client 9000

real    0m0.643s
user    0m0.130s
sys     0m0.512s

最后是那位作者力挺的erlang, 还好我看过几天erlang,要不然我都不知道程序是怎么运行的 erl +K true 启用kernel-poll进入shell,c(echo_server). 编译echo_server程序. echo_server:start(). 开启服务器

/// 前面还有好多好多reset
..... 
read error:Connection reset by peer
read error:Connection reset by peer

real    0m13.207s
user    0m0.168s
sys     0m0.418s

结果就是这个结果, 要是不信的话可以自己去试试.


#2

GC 语言能有个鸡巴性能。


#3

糟糕的性能很多时候是语言本身带来的!所以不要跟我讲什么算法决定性能,语言并不重要,这种片面极端的观点。


#4

瓶颈往往不在 echo 上面