/* Boost.Flyweight example of parallel tokenization. * * Copyright 2024 Joaquin M Lopez Munoz. * Distributed under the Boost Software License, Version 1.0. * (See accompanying file LICENSE_1_0.txt or copy at * http://www.boost.org/LICENSE_1_0.txt) * * See http://www.boost.org/libs/flyweight for library home page. */ #include #include #include #include #include #include #include #include #include #include #include #include /* Handcrafted tokenizer for sequences of alphabetic characters */ inline bool match(char ch) { return (ch>='a' && ch<='z') || (ch>='A' && ch<='Z'); } template void tokenize(ForwardIterator first,ForwardIterator last,F f) { goto start; for(;;) { for(;;){ ++first; start: if(first==last)return; if(match(*first))break; } auto begin_word=first; for(;;){ if(++first==last||!match(*first)){ f(begin_word,first); if(first==last)return; else break; } } } } /* Tokenize a string into words in parallel and store the results into a * std::vector, String being std::string or a flyweight type. */ template void parse(const std::string& in,const char* type_name,std::size_t num_threads) { using namespace std::chrono; using string_iterator=std::string::const_iterator; auto t1=steady_clock::now(); /* Divide input in num_threads chunks, taking care that boundaries are not * placed in the middle of a token. */ std::vector boundaries(num_threads+1); boundaries[0]=in.begin(); for(std::size_t i=0;i threads(num_threads); std::vector partial_num_words(num_threads); for(std::size_t i=0;i thread_output_starts(num_threads); for(std::size_t i=0;i words(num_words,String()); for(std::size_t i=0;i>(t2-t1).count()<< " s\n"; } /* accept a file and parse it with std::string and various flyweight types */ int main(int argc, char** argv) { using namespace boost::flyweights; using regular_flyweight=flyweight; using concurrent_flyweight=flyweight< std::string, concurrent_factory<>, no_locking, no_tracking >; if(argc<2){ std::cout<<"specify a file\n"; std::exit(EXIT_FAILURE); } std::ifstream is(argv[1]); if(!is) { std::cout<<"can't open "<(is),std::istreambuf_iterator{}); parse(in,"std::string",1); parse(in,"std::string",8); parse(in,"regular flyweight",1); parse(in,"regular flyweight",8); parse(in,"concurrent flyweight",1); parse(in,"concurrent flyweight",8); }