From 37e060d0cacba6f67a54554e0b328188029ddf40 Mon Sep 17 00:00:00 2001 From: Muhammad Junaid Muzammil Date: Sun, 22 Mar 2015 16:53:47 +0500 Subject: [PATCH] Threefry Random 123 Support Added --- example/threefry.cpp | 48 +++++ include/boost/compute/random/threefry.hpp | 252 ++++++++++++++++++++++ test/CMakeLists.txt | 1 + test/test_threefry.cpp | 98 +++++++++ 4 files changed, 399 insertions(+) create mode 100644 example/threefry.cpp create mode 100644 include/boost/compute/random/threefry.hpp create mode 100644 test/test_threefry.cpp diff --git a/example/threefry.cpp b/example/threefry.cpp new file mode 100644 index 00000000..86c0d084 --- /dev/null +++ b/example/threefry.cpp @@ -0,0 +1,48 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Muhammad Junaid Muzammil +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://kylelutz.github.com/compute for more information. +//---------------------------------------------------------------------------// + + +#include +#include +#include +#include +#include +#include +#include + +int main() +{ + using boost::compute::uint_; + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + boost::compute::random::threefry rng(queue); + boost::compute::vector vector_ctr(20, context); + boost::compute::vector vector_key(20, context); + + uint32_t ctr[20]; + uint32_t key[20]; + for(int i = 0; i < 10; i++) { + ctr[i*2] = i; + ctr[i*2+1] = 0; + key[i*2] = 0; + key[i*2+1] = 0; + } + boost::compute::copy(ctr, ctr+20, vector_ctr.begin(), queue); + boost::compute::copy(key, key+20, vector_key.begin(), queue); + rng.generate(queue, vector_ctr.begin(), vector_ctr.end(), vector_key.begin(), vector_key.end()); + boost::compute::copy(vector_ctr.begin(), vector_ctr.end(), ctr, queue); + + for(int i = 0; i < 10; i++) { + std::cout << std::hex << ctr[i*2] << " " << ctr[i*2+1] << std::endl; + } + return 0; +} + diff --git a/include/boost/compute/random/threefry.hpp b/include/boost/compute/random/threefry.hpp new file mode 100644 index 00000000..028e6cf8 --- /dev/null +++ b/include/boost/compute/random/threefry.hpp @@ -0,0 +1,252 @@ +// Added By: Muhammad Junaid Muzammil +// Copyright 2010-2012, D. E. Shaw Research. +// All rights reserved. + +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: + +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions, and the following disclaimer. + +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions, and the following disclaimer in the +// documentation and/or other materials provided with the distribution. + +// * Neither the name of D. E. Shaw Research nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. + +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#ifndef BOOST_COMPUTE_RANDOM_THREEFRY_HPP +#define BOOST_COMPUTE_RANDOM_THREEFRY_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost { +namespace compute { +namespace random { + + +/// \class threefry_rng_engine +/// \brief Threefry pseudorandom number generator. +template +class threefry_engine +{ +public: + typedef T result_type; + + /// Creates a new threefry_engine and seeds it with \p value. + explicit threefry_engine(command_queue &queue) + : m_context(queue.get_context()) + { + // setup program + load_program(); + } + + /// Creates a new threefry_engine object as a copy of \p other. + threefry_engine(const threefry_engine &other) + : m_context(other.m_context) + { + } + + /// Copies \p other to \c *this. + threefry_engine& operator=(const threefry_engine &other) + { + if(this != &other){ + m_context = other.m_context; + } + + return *this; + } + + /// Destroys the threefry_engine object. + ~threefry_engine() + { + } + +private: + /// \internal_ + void load_program() + { + boost::shared_ptr cache = + detail::get_program_cache(m_context); + std::string cache_key = + std::string("threefry_engine_32x2"); + + m_program = cache->get(cache_key); + if(!m_program.get()){ + const char source[] = + + "#define THREEFRY2x32_DEFAULT_ROUNDS 20\n" + "#define SKEIN_KS_PARITY_32 0x1BD11BDA\n" + + "enum r123_enum_threefry32x2 {\n" + " R_32x2_0_0=13,\n" + " R_32x2_1_0=15,\n" + " R_32x2_2_0=26,\n" + " R_32x2_3_0= 6,\n" + " R_32x2_4_0=17,\n" + " R_32x2_5_0=29,\n" + " R_32x2_6_0=16,\n" + " R_32x2_7_0=24\n" + "};\n" + + "static uint RotL_32(uint x, uint N)\n" + "{\n" + " return (x << (N & 31)) | (x >> ((32-N) & 31));\n" + "}\n" + + "struct r123array2x32 {\n" + " uint v[2];\n" + "};\n" + "typedef struct r123array2x32 threefry2x32_ctr_t;\n" + "typedef struct r123array2x32 threefry2x32_key_t;\n" + + "threefry2x32_ctr_t threefry2x32_R(unsigned int Nrounds, threefry2x32_ctr_t in, threefry2x32_key_t k)\n" + "{\n" + " threefry2x32_ctr_t X;\n" + " uint ks[3];\n" + " uint i; \n" + " ks[2] = SKEIN_KS_PARITY_32;\n" + " for (i=0;i < 2; i++) {\n" + " ks[i] = k.v[i];\n" + " X.v[i] = in.v[i];\n" + " ks[2] ^= k.v[i];\n" + " }\n" + " X.v[0] += ks[0]; X.v[1] += ks[1];\n" + " if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>3){\n" + " X.v[0] += ks[1]; X.v[1] += ks[2];\n" + " X.v[1] += 1;\n" + " }\n" + " if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>7){\n" + " X.v[0] += ks[2]; X.v[1] += ks[0];\n" + " X.v[1] += 2;\n" + " }\n" + " if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>11){\n" + " X.v[0] += ks[0]; X.v[1] += ks[1];\n" + " X.v[1] += 3;\n" + " }\n" + " if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>15){\n" + " X.v[0] += ks[1]; X.v[1] += ks[2];\n" + " X.v[1] += 4;\n" + " }\n" + " if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>19){\n" + " X.v[0] += ks[2]; X.v[1] += ks[0];\n" + " X.v[1] += 5;\n" + " }\n" + " if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>23){\n" + " X.v[0] += ks[0]; X.v[1] += ks[1];\n" + " X.v[1] += 6;\n" + " }\n" + " if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>27){\n" + " X.v[0] += ks[1]; X.v[1] += ks[2];\n" + " X.v[1] += 7;\n" + " }\n" + " if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>31){\n" + " X.v[0] += ks[2]; X.v[1] += ks[0];\n" + " X.v[1] += 8;\n" + " }\n" + " return X;\n" + "}\n" + + "__kernel void generate_rng(__global uint *ctr, __global uint *key, const uint size) {\n" + " threefry2x32_ctr_t in;\n" + " threefry2x32_key_t k;\n" + " for(uint i = 0; i < size; i+=2) {\n" + " in.v[0] = ctr[i];\n" + " in.v[1] = ctr[i+1];\n" + " k.v[0] = key[i];\n" + " k.v[1] = key[i+1];\n" + " in = threefry2x32_R(20, in, k);\n" + " ctr[i] = in.v[0];\n" + " ctr[i+1] = in.v[1];\n" + " }\n" + "}\n"; + + m_program = program::build_with_source(source, m_context); + + cache->insert(cache_key, m_program); + } + } + +public: + template + void generate(command_queue &queue, OutputIterator first_ctr, OutputIterator last_ctr, OutputIterator first_key, OutputIterator last_key) { + const size_t size_ctr = detail::iterator_range_size(first_ctr, last_ctr); + const size_t size_key = detail::iterator_range_size(first_key, last_key); + if(!size_ctr || !size_key || (size_ctr != size_key)) { + return; + } + kernel rng_kernel = m_program.create_kernel("generate_rng"); + rng_kernel.set_arg(0, first_ctr.get_buffer()); + rng_kernel.set_arg(1, first_key.get_buffer()); + rng_kernel.set_arg(2, static_cast(size_ctr)); + queue.enqueue_1d_range_kernel(rng_kernel, 0, 1, 0); + } + +private: + context m_context; + program m_program; +}; + +typedef threefry_engine threefry; + +} // end random namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_RANDOM_THREEFRY_HPP diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 41f73efb..92506b16 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -173,6 +173,7 @@ add_compute_test("random.bernoulli_distribution" test_bernoulli_distribution.cpp add_compute_test("random.discrete_distribution" test_discrete_distribution.cpp) add_compute_test("random.linear_congruential_engine" test_linear_congruential_engine.cpp) add_compute_test("random.mersenne_twister_engine" test_mersenne_twister_engine.cpp) +add_compute_test("random.threefry" test_threefry.cpp) add_compute_test("random.normal_distribution" test_normal_distribution.cpp) add_compute_test("random.uniform_int_distribution" test_uniform_int_distribution.cpp) add_compute_test("random.uniform_real_distribution" test_uniform_real_distribution.cpp) diff --git a/test/test_threefry.cpp b/test/test_threefry.cpp new file mode 100644 index 00000000..62794985 --- /dev/null +++ b/test/test_threefry.cpp @@ -0,0 +1,98 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Muhammad Junaid Muzammil +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://kylelutz.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#define BOOST_TEST_MODULE TestThreefry +#include + +#include +#include + +#include "check_macros.hpp" +#include "context_setup.hpp" + +BOOST_AUTO_TEST_CASE(generate_uint) +{ + using boost::compute::uint_; + + boost::compute::threefry rng(queue); + + boost::compute::vector vector(10, context); + + rng.generate(vector.begin(), vector.end(), queue); + + + using boost::compute::uint_; + + boost::compute::random::threefry rng(queue); + + boost::compute::vector vector_ctr(20, context); + + boost::compute::vector vector_key(20, context); + + uint32_t ctr[20]; + uint32_t key[20]; + for(int i = 0; i < 10; i++) { + ctr[i*2] = i; + ctr[i*2+1] = 0; + key[i*2] = 0; + key[i*2+1] = 0; + } + + boost::compute::copy(ctr, ctr+20, vector_ctr.begin(), queue); + boost::compute::copy(key, key+20, vector_key.begin(), queue); + + rng.generate(queue, vector_ctr.begin(), vector_ctr.end(), vector_key.begin(), vector_key.end()); + CHECK_RANGE_EQUAL( + uint_, 20, vector, + (uint_(0x6b200159), + uint_(0x99ba4efe), + uint_(0x508efb2c), + uint_(0xc0de3f32), + uint_(0x64a626ec), + uint_(0xfc15e573), + uint_(0xb8abc4d1), + uint_(0x537eb86), + uint_(0xac6dc2bb), + uint_(0xa7adb3c3), + uint_(0x5641e094), + uint_(0xe4ab4fd), + uint_(0xa53c1ce9), + uint_(0xabcf1dba), + uint_(0x2677a25a), + uint_(0x76cf5efc), + uint_(0x2d08247f), + uint_(0x815480f1), + uint_(0x2d1fa53a), + uint_(0xdfe8514c)) + ); +} + +BOOST_AUTO_TEST_CASE(discard_uint) +{ + using boost::compute::uint_; + + boost::compute::threefry rng(queue); + + boost::compute::vector vector(5, context); + + rng.discard(5, queue); + rng.generate(vector.begin(), vector.end(), queue); + + CHECK_RANGE_EQUAL( + uint_, 5, vector, + (uint_(4161255391), + uint_(3922919429), + uint_(949333985), + uint_(2715962298), + uint_(1323567403)) + ); +} + +BOOST_AUTO_TEST_SUITE_END()