From 9d7dfd84494f1541acbde46e7f5c1dbcf5bf31f3 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Sun, 30 Oct 2011 14:43:53 +0000 Subject: [PATCH] added gaussian example, updated scons build --- SConscript | 3 +- libs/numpy/example/SConscript | 11 ++ libs/numpy/example/demo_gaussian.py | 32 ++++ libs/numpy/example/gaussian.cpp | 237 ++++++++++++++++++++++++++++ site_scons/scons_tools.py | 8 +- 5 files changed, 288 insertions(+), 3 deletions(-) create mode 100644 libs/numpy/example/SConscript create mode 100644 libs/numpy/example/demo_gaussian.py create mode 100644 libs/numpy/example/gaussian.cpp diff --git a/SConscript b/SConscript index 9a43d32b..984a4351 100644 --- a/SConscript +++ b/SConscript @@ -10,7 +10,7 @@ scons_tools.LocalConfiguration( ) boost_numpy_env = scons_tools.GetEnvironment().Clone() boost_numpy_env.Append(CPPPATH=[os.path.abspath(os.curdir)]) -libpath = os.path.abspath("%s/numpy/src" % scons_tools.GetBuildDir()) +libpath = os.path.abspath("libs/numpy/src") if os.environ.has_key("LD_LIBRARY_PATH"): boost_numpy_env["ENV"]["LD_LIBRARY_PATH"] = "%s:%s" % (libpath, os.environ["LD_LIBRARY_PATH"]) else: @@ -28,6 +28,7 @@ targets["boost.numpy"]["install"] = ( + boost_numpy_env.Install(boost_numpy_env["INSTALL_LIB"], targets["boost.numpy"]["lib"]) ) targets["boost.numpy"]["test"] = SConscript("libs/numpy/test/SConscript") +targets["boost.numpy"]["example"] = SConscript("libs/numpy/example/SConscript") Return("targets") diff --git a/libs/numpy/example/SConscript b/libs/numpy/example/SConscript new file mode 100644 index 00000000..5fce67fc --- /dev/null +++ b/libs/numpy/example/SConscript @@ -0,0 +1,11 @@ +Import("boost_numpy_env") + +example = [] + +for name in ("ufunc", "dtype", "fromdata", "ndarray", "simple"): + example.extend(boost_numpy_env.Program(name, "%s.cpp" % name, LIBS="boost_numpy")) + +for name in ("gaussian",): + example.extend(boost_numpy_env.SharedLibrary(name, "%s.cpp" % name, SHLIBPREFIX="", LIBS="boost_numpy")) + +Return("example") diff --git a/libs/numpy/example/demo_gaussian.py b/libs/numpy/example/demo_gaussian.py new file mode 100644 index 00000000..51fe1318 --- /dev/null +++ b/libs/numpy/example/demo_gaussian.py @@ -0,0 +1,32 @@ +import numpy +import gaussian + +mu = numpy.zeros(2, dtype=float) +sigma = numpy.identity(2, dtype=float) +sigma[0, 1] = 0.15 +sigma[1, 0] = 0.15 + +g = gaussian.bivariate_gaussian(mu, sigma) + +r = numpy.linspace(-40, 40, 1001) +x, y = numpy.meshgrid(r, r) + +z = g(x, y) + +s = z.sum() * (r[1] - r[0])**2 +print "sum (should be ~ 1):", s + +xc = (z * x).sum() / z.sum() +print "x centroid (should be ~ %f): %f" % (mu[0], xc) + +yc = (z * y).sum() / z.sum() +print "y centroid (should be ~ %f): %f" % (mu[1], yc) + +xx = (z * (x - xc)**2).sum() / z.sum() +print "xx moment (should be ~ %f): %f" % (sigma[0,0], xx) + +yy = (z * (y - yc)**2).sum() / z.sum() +print "yy moment (should be ~ %f): %f" % (sigma[1,1], yy) + +xy = 0.5 * (z * (x - xc) * (y - yc)).sum() / z.sum() +print "xy moment (should be ~ %f): %f" % (sigma[0,1], xy) diff --git a/libs/numpy/example/gaussian.cpp b/libs/numpy/example/gaussian.cpp new file mode 100644 index 00000000..c20eb30f --- /dev/null +++ b/libs/numpy/example/gaussian.cpp @@ -0,0 +1,237 @@ +#include +#include + +#include +#include +#include + +namespace bp = boost::python; +namespace bn = boost::numpy; + +/** + * This class represents a simple 2-d Gaussian (Normal) distribution, defined by a + * mean vector 'mu' and a covariance matrix 'sigma'. + */ +class bivariate_gaussian { +public: + + /** + * Boost.NumPy isn't designed to support specific C++ linear algebra or matrix/vector libraries; + * it's intended as a lower-level interface that can be used with any such C++ library. + * + * Here, we'll demonstrate these techniques with boost::ublas, but the same general principles + * should apply to other matrix/vector libraries. + */ + typedef boost::numeric::ublas::c_vector vector; + typedef boost::numeric::ublas::c_matrix matrix; + + vector const & get_mu() const { return _mu; } + + matrix const & get_sigma() const { return _sigma; } + + /** + * Evaluate the density of the distribution at a point defined by a two-element vector. + */ + double operator()(vector const & p) const { + vector u = prod(_cholesky, p - _mu); + return 0.5 * _cholesky(0, 0) * _cholesky(1, 1) * std::exp(-0.5 * inner_prod(u, u)) / M_PI; + } + + /** + * Evaluate the density of the distribution at an (x, y) point. + */ + double operator()(double x, double y) const { + vector p; + p[0] = x; + p[1] = y; + return operator()(p); + } + + /** + * Construct from a mean vector and covariance matrix. + */ + bivariate_gaussian(vector const & mu, matrix const & sigma) + : _mu(mu), _sigma(sigma), _cholesky(compute_inverse_cholesky(sigma)) + {} + +private: + + /** + * This evaluates the inverse of the Cholesky factorization of a 2x2 matrix; + * it's just a shortcut in evaluating the density. + */ + static matrix compute_inverse_cholesky(matrix const & m) { + matrix l; + // First do cholesky factorization: l l^t = m + l(0, 0) = std::sqrt(m(0, 0)); + l(0, 1) = m(0, 1) / l(0, 0); + l(1, 1) = std::sqrt(m(1, 1) - l(0,1) * l(0,1)); + // Now do forward-substitution (in-place) to invert: + l(0, 0) = 1.0 / l(0, 0); + l(1, 0) = l(0, 1) = -l(0, 1) / l(1, 1); + l(1, 1) = 1.0 / l(1, 1); + return l; + } + + vector _mu; + matrix _sigma; + matrix _cholesky; + +}; + +/* + * We have a two options for wrapping get_mu and get_sigma into NumPy-returning Python methods: + * - we could deep-copy the data, making totally new NumPy arrays; + * - we could make NumPy arrays that point into the existing memory. + * The latter is often preferable, especially if the arrays are large, but it's dangerous unless + * the reference counting is correct: the returned NumPy array needs to hold a reference that + * keeps the memory it points to from being deallocated as long as it is alive. This is what the + * "owner" argument to from_data does - the NumPy array holds a reference to the owner, keeping it + * from being destroyed. + * + * Note that this mechanism isn't completely safe for data members that can have their internal + * storage reallocated. A std::vector, for instance, can be invalidated when it is resized, + * so holding a Python reference to a C++ class that holds a std::vector may not be a guarantee + * that the memory in the std::vector will remain valid. + */ + +/** + * These two functions are custom wrappers for get_mu and get_sigma, providing the shallow-copy + * conversion with reference counting described above. + * + * It's also worth noting that these return NumPy arrays that cannot be modified in Python; + * the const overloads of vector::data() and matrix::data() return const references, + * and passing a const pointer to from_data causes NumPy's 'writeable' flag to be set to false. + */ +static bn::ndarray py_get_mu(bp::object const & self) { + bivariate_gaussian::vector const & mu = bp::extract(self)().get_mu(); + return bn::from_data( + mu.data(), + bn::dtype::get_builtin(), + bp::make_tuple(2), + bp::make_tuple(sizeof(double)), + self + ); +} +static bn::ndarray py_get_sigma(bp::object const & self) { + bivariate_gaussian::matrix const & sigma = bp::extract(self)().get_sigma(); + return bn::from_data( + sigma.data(), + bn::dtype::get_builtin(), + bp::make_tuple(2, 2), + bp::make_tuple(2 * sizeof(double), sizeof(double)), + self + ); +} + +/** + * To allow the constructor to work, we need to define some from-Python converters from NumPy arrays + * to the ublas types. The rvalue-from-python functionality is not well-documented in Boost.Python + * itself; you can learn more from boost/python/converter/rvalue_from_python_data.hpp. + */ + +/** + * We start with two functions that just copy a NumPy array into ublas objects. These will be used + * in the templated converted below. The first just uses the operator[] overloads provided by + * bp::object. + */ +static void copy_ndarray_to_ublas(bn::ndarray const & array, bivariate_gaussian::vector & vec) { + vec[0] = bp::extract(array[0]); + vec[1] = bp::extract(array[1]); +} +/** + * Here, we'll take the alternate approach of using the strides to access the array's memory directly. + * This can be much faster for large arrays. + */ +static void copy_ndarray_to_ublas(bn::ndarray const & array, bivariate_gaussian::matrix & mat) { + // Unfortunately, get_strides() can't be inlined, so it's best to call it once up-front. + Py_intptr_t const * strides = array.get_strides(); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + mat(i, j) = *reinterpret_cast(array.get_data() + i * strides[0] + j * strides[1]); + } + } +} + +template +struct bivariate_gaussian_ublas_from_python { + + /** + * Register the converter. + */ + bivariate_gaussian_ublas_from_python() { + bp::converter::registry::push_back( + &convertible, + &construct, + bp::type_id< T >() + ); + } + + /** + * Test to see if we can convert this to the desired type; if not return zero. + * If we can convert, returned pointer can be used by construct(). + */ + static void * convertible(PyObject * p) { + try { + bp::object obj(bp::handle<>(bp::borrowed(p))); + std::auto_ptr array( + new bn::ndarray( + bn::from_object(obj, bn::dtype::get_builtin(), N, N, bn::ndarray::V_CONTIGUOUS) + ) + ); + if (array->shape(0) != 2) return 0; + if (N == 2 && array->shape(1) != 2) return 0; + return array.release(); + } catch (bp::error_already_set & err) { + bp::handle_exception(); + return 0; + } + } + + /** + * Finish the conversion by initializing the C++ object into memory prepared by Boost.Python. + */ + static void construct(PyObject * obj, bp::converter::rvalue_from_python_stage1_data * data) { + // Extract the array we passed out of the convertible() member function. + std::auto_ptr array(reinterpret_cast(data->convertible)); + // Find the memory block Boost.Python has prepared for the result. + typedef bp::converter::rvalue_from_python_storage storage_t; + storage_t * storage = reinterpret_cast(data); + // Use placement new to initialize the result. + T * ublas_obj = new (storage->storage.bytes) T(); + // Fill the result with the values from the NumPy array. + copy_ndarray_to_ublas(*array, *ublas_obj); + // Finish up. + data->convertible = storage->storage.bytes; + } + +}; + + +BOOST_PYTHON_MODULE(gaussian) { + bn::initialize(); + + // Register the from-python converters + bivariate_gaussian_ublas_from_python< bivariate_gaussian::vector, 1 >(); + bivariate_gaussian_ublas_from_python< bivariate_gaussian::matrix, 2 >(); + + typedef double (bivariate_gaussian::*call_vector)(bivariate_gaussian::vector const &) const; + + bp::class_("bivariate_gaussian", bp::init()) + + // Declare the constructor (wouldn't work without the from-python converters). + .def(bp::init< bivariate_gaussian::vector const &, bivariate_gaussian::matrix const & >()) + + // Use our custom reference-counting getters + .add_property("mu", &py_get_mu) + .add_property("sigma", &py_get_sigma) + + // First overload accepts a two-element array argument + .def("__call__", (call_vector)&bivariate_gaussian::operator()) + + // This overload works like a binary NumPy universal function: you can pass + // in scalars or arrays, and the C++ function will automatically be called + // on each element of an array argument. + .def("__call__", bn::binary_ufunc::make()) + ; +} diff --git a/site_scons/scons_tools.py b/site_scons/scons_tools.py index 2dc98a69..8ccf2532 100755 --- a/site_scons/scons_tools.py +++ b/site_scons/scons_tools.py @@ -251,6 +251,7 @@ def MakeAliases(targets): all_all = [] build_all = [] install_all = [] + example_all = [] test_all = [] scons.Help(""" To specify additional directories to add to the include or library paths, specify them @@ -263,7 +264,8 @@ Supported variables are CPPPATH, LIBPATH and RPATH. Global targets: 'all' (builds everything) 'build' (builds headers, libraries, and python wrappers) 'install' (copies files to global bin, include and lib directories) - 'test' (runs unit tests; requires install) + 'example' (builds examples; may require install) + 'test' (runs unit tests; may require install) These targets can be built for individual packages with the syntax '[package]-[target]'. Some packages support additional targets, given below. @@ -275,7 +277,7 @@ Packages: for pkg_name in sorted(targets.keys()): pkg_targets = targets[pkg_name] extra_targets = tuple(k for k in pkg_targets.keys() if k not in - ("all","build","install","test")) + ("all","build","install","test","example")) if extra_targets: scons.Help("%-25s %s\n" % (pkg_name, ", ".join("'%s'" % k for k in extra_targets))) else: @@ -290,11 +292,13 @@ Packages: all_all.extend(pkg_all) build_all.extend(pkg_build) install_all.extend(pkg_targets.get("install", pkg_build)) + example_all.extend(pkg_targets.get("example", pkg_targets.get("install", pkg_build))) test_all.extend(pkg_targets.get("test", pkg_targets.get("install", pkg_build))) env.Alias("all", all_all) env.Alias("build", build_all) env.Alias("install", install_all) env.Alias("test", test_all) + env.Alias("example", example_all) env.Default("build")