异步遍历文件夹


#1

同步遍历一个文件夹已经让一些人倒下了, 不会写了, 这次我说的是, 还可以异步遍历文件夹, 而且异步递归进入子文件夹, 你相信有这样的代码不?

不相信?

你信也得信, 不信也得信


首先先上测试代码! 这个测试代码的作用是遍历文件夹, 并删除日期超过1天的文件. 请看代码


#pragma once

#include <string>

#include <boost/regex.hpp>
#include <boost/asio.hpp>
#include <boost/avloop.hpp>
#include <boost/filesystem.hpp>
#include <boost/async_dir_walk.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/timedcall.hpp>

namespace detail
{

void clean_cache_dir_walk_handler( boost::asio::io_service & io_service, 
                           const boost::filesystem::path & item, 
                           boost::async_dir_walk_continue_handler continuehandler,
                          boost::asio::coroutine coro )
{
	using namespace boost::system;

    BOOST_ASIO_CORO_REENTER(&coro)
    {
    	if( boost::filesystem::is_regular_file( item ) ) 
     	{
			boost::posix_time::ptime now = boost::posix_time::from_time_t( std::time( 0 ) );

			// 删除最后访问时间超过一天的文件.

			boost::posix_time::ptime last_write_time
				= boost::posix_time::from_time_t( boost::filesystem::last_write_time( item ) );

			if( (now.date() - last_write_time.date()).days() > 0 )
			{
				// remove !
				boost::filesystem::remove( item );
			}

	     }else if ( fs::is_directory())
            {
             // 递归!
            BOOST_ASIO_CORO_YIELD
                 boost::async_dir_walk( 
                       io_service, boost::filesystem::current(),
                        boost::bind( 
                             clean_cache_dir_walk_handler,
                              boost::ref( io_service ),
                             _1, _2, boost::asio::coroutine()
                        ),
                        boost::bind( 
                             clean_cache_dir_walk_handler,
                              boost::ref( io_service ),
                              item ,  continuehandler , coro
                        )
                 );
          }
          continuehandler( error_code() );
    }
	
}

}

void clean_cache( boost::asio::io_service &io_service)
{
	boost::async_dir_walk( 
                 io_service, boost::filesystem::current(),
                 boost::bind( 
                     detail::clean_cache_dir_walk_handler,
                       boost::ref( io_service ),
                      _1, _2, boost::asio::coroutine()
                 )
        );
}



这个代码是完全异步的, 每此回调只操作一个文件.

至于这里面的魔法, 哈哈, 下回分解, 没人关注这个帖子咱就不分解咯


#2

再贴上 boost::async_dir_walk 的代码


#pragma once

#include <boost/bind.hpp>
#include <boost/function.hpp>
#include <boost/asio.hpp>
#include <boost/filesystem.hpp>

#include <boost/avloop.hpp>

namespace boost{
namespace detail{

template<class DirWalkHandler, class CompleteHandler>
class async_dir_walk
{
	boost::asio::io_service &io_service;
	boost::filesystem::directory_iterator dir_it_end;
	boost::filesystem::directory_iterator dir_it_cur;
	DirWalkHandler dir_walk_handler;
	CompleteHandler complete_handler;
public:
	typedef void result_type;

	async_dir_walk( boost::asio::io_service & _io_service, boost::filesystem::path path, DirWalkHandler _dir_walk_handler, CompleteHandler _complete_handler)
		: io_service( _io_service ), dir_it_cur(path), dir_walk_handler(_dir_walk_handler), complete_handler(_complete_handler)
	{
		avloop_idle_post(io_service, boost::asio::detail::bind_handler( *this, boost::asio::coroutine(), boost::system::error_code() ) );
	}

	void operator()( boost::asio::coroutine coro, boost::system::error_code ec )
	{
		// 好了,每次回调检查一个文件,这样才好,对吧.
		BOOST_ASIO_CORO_REENTER( &coro )
		{
			for( ; dir_it_cur != dir_it_end ; dir_it_cur++ )
			{
				// 好,处理 dir_it_cur dir_it_;
				BOOST_ASIO_CORO_YIELD dir_walk_handler(dir_it_cur->path(), io_service.wrap(boost::bind( *this, coro , _1)) );

				BOOST_ASIO_CORO_YIELD avloop_idle_post(io_service, boost::bind( *this, coro, ec ) );
			}

			avloop_idle_post(io_service, boost::asio::detail::bind_handler(complete_handler, ec));
		}
	}

};

class DirWalkDumyHandler{
public:
	void operator()(boost::system::error_code ec){}
};

template<class DirWalkHandler, class CompleteHandler>
async_dir_walk<DirWalkHandler, CompleteHandler> make_async_dir_walk_op( boost::asio::io_service & io_service, boost::filesystem::path path, DirWalkHandler dir_walk_handler, CompleteHandler complete_handler)
{
	return async_dir_walk<DirWalkHandler, CompleteHandler>( io_service, path, dir_walk_handler, complete_handler);
}


} // namespace detail

typedef function<void(boost::system::error_code ec) > async_dir_walk_continue_handler;

template<class DirWalkHandler>
void async_dir_walk(boost::asio::io_service & io_service, boost::filesystem::path path, DirWalkHandler dir_walk_handler)
{
	detail::make_async_dir_walk_op(io_service, path,  dir_walk_handler, detail::DirWalkDumyHandler());
}


template<class DirWalkHandler, class CompleteHandler>
void async_dir_walk(boost::asio::io_service & io_service, boost::filesystem::path path, DirWalkHandler dir_walk_handler, CompleteHandler complete_handler)
{
	detail::make_async_dir_walk_op(io_service, path,  dir_walk_handler, complete_handler);
}

}

#3

OMG,这个玩意,不是boost原装的吧


#4

不是原装的? 是利用原装写出来的啊!


#5

以前遍历目录从来没有想过异步,只是开个线程,你已经颠覆了我的思维!so beautiful 有时间,我把你的这点代码组装到我的一个程序里,看看效果如果


#6

竟然是异步递归非线程递归。。async_dir_walk强大哇


#7

何止是遍历目录, 遍历一切树结构都可以用这套算法 (当然代码要稍微修改修改)


#8

准备替换为更通用的算法

boost::async_foreach()


#9

菜菜威武。。。居然灌水都要超过14个字符


#10

准备改写为通用算法 async_foreach()

async_foreach() 的原型是


template<class InputIterator, class Pred, class Handler>
async_foreach(boost::asio::io_service &io_service, InputIterator first, InputIterator last , Pred pred, Handler handler );


#11

围绕asio的io_service真是玩出花来了。 这么实现轻量、快速,对堆栈没要求呀。


#12

老公好棒 ~~~~~~~~~~


#13

新的 async_dir_walk 异常简单, 就是这个

template<class DirWalkHandler, class CompleteHandler>
void async_dir_walk(boost::asio::io_service & io_service, boost::filesystem::path path, DirWalkHandler dir_walk_handler, CompleteHandler complete_handler)
{
	async_foreach(io_service, boost::filesystem::directory_iterator(path), boost::filesystem::directory_iterator(), dir_walk_handler, complete_handler);
}

#14

新的 async_foreach 的如下


#pragma once

#include <boost/asio/coroutine.hpp>
#include <boost/asio/io_service.hpp>
#include <boost/avloop.hpp>

namespace boost{
namespace detail{

template<class InputIterator, class Pred, class Handler>
class async_foreach_op : boost::asio::coroutine
{
	boost::asio::io_service & m_io_service;
	InputIterator	m_first, m_last, m_current;
	Pred _pred;
	Handler m_handler;
public:
	typedef void result_type;

	async_foreach_op(boost::asio::io_service & io_service, InputIterator first,  InputIterator last, Pred pred, Handler handler)
		: m_io_service(io_service), m_first(first), m_last(last), _pred(pred), m_handler(handler)
	{
		avloop_idle_post(m_io_service, boost::asio::detail::bind_handler(*this, boost::system::error_code()));
	}

	void operator()(boost::system::error_code ec)
	{
		// 好了,每次回调检查一个文件,这样才好,对吧.
		BOOST_ASIO_CORO_REENTER(this)
		{
			for( m_current = m_first; m_current != m_last ; ++m_current)
			{
				// 好,处理 dir_it_cur dir_it_;
				BOOST_ASIO_CORO_YIELD
					_pred(*m_current, m_io_service.wrap(boost::bind(*this, _1)));

				BOOST_ASIO_CORO_YIELD
					avloop_idle_post(m_io_service, boost::asio::detail::bind_handler(*this, ec));
			}

			avloop_idle_post(m_io_service, boost::asio::detail::bind_handler(m_handler, ec));
		}
	}

};

template<class InputIterator, class Pred, class Handler>
async_foreach_op<InputIterator, Pred, Handler>
make_async_foreach_op(boost::asio::io_service &io_service, InputIterator first, InputIterator last , Pred pred, Handler handler)
{
	return async_foreach_op<InputIterator, Pred, Handler>(io_service, first, last, pred, handler);
}

} // namespace detail

template<class InputIterator, class Pred, class Handler>
void async_foreach(boost::asio::io_service &io_service, InputIterator first, InputIterator last , Pred pred, Handler handler)
{
	detail::make_async_foreach_op(io_service, first, last, pred, handler);
}

} // namespace boost

#15

删文件有异步操作api吗 只把操作post出去没啥效果啊 当然遍历树形结构过程变成类链表的状态机很cool


#16

系统 API 限制啊, 好多 API都没有异步版本, 真不爽.

就是将递归遍历树结构的算法变成了形式递归实际上非递归的过程

可以用来解决一些没法不使用递归算法 又递归层数很深 的一些算法. 比如说象棋搜索算法.


#17

不是有想知道原理的人么???

都不积极回复啊

真是的


#18

完全看不懂,不敢乱说话、。。


#19

开讲吧,

虽然原理我大致有所了解了。

这个操作过程应该主要花费时间的地方就是删除文件的io操作,遍历很快,但整个操作的完成有大量的时间是在等io_service去一个一个处理队列中的每个删除单个文件的操作。


#20

博士把原理说说,使用了哪些基本的东东?