diff --git a/doc/cross_module_dependencies.html b/doc/cross_module_dependencies.html new file mode 100644 index 00000000..3d416bf5 --- /dev/null +++ b/doc/cross_module_dependencies.html @@ -0,0 +1,253 @@ + + + Cross-extension-module dependencies + + + +c++boost.gif (8819 bytes) + + +
+

Cross-extension-module dependencies

+ +It is good programming practice to organize large projects as modules +that interact with each other via well defined interfaces. With +Boost.Python it is possible to reflect this organization at the C++ +level at the Python level. This is, each logical C++ module can be +organized as a separate Python extension module. + +

+At first sight this might seem natural and straightforward. However, it +is a fairly complex problem to establish cross-extension-module +dependencies while maintaining the same ease of use Boost.Python +provides for classes that are wrapped in the same extension module. To +a large extent this complexity can be hidden from the author of a +Boost.Python extension module, but not entirely. + +

The recipe

+ +Suppose there is an extension module that exposes certain instances of +the C++ std::vector template library such that it can be used from +Python in the following manner: + +
+import std_vector
+v = std_vector.double([1, 2, 3, 4])
+v.push_back(5)
+v.size()
+
+ +Suppose the std_vector module is done well and reflects all C++ +functions that are useful at the Python level, for all C++ built-in +data types (std_vector.int, std_vector.long, etc.). + +

+Suppose further that there is statistic module with a C++ class that +has constructors or member functions that use or return a std::vector. +For example: + +

+class xy {
+  private:
+    std::vector<double> m_x;
+    std::vector<double> m_y;
+  public:
+    xy(const std::vector<double>& x, const std::vector<double>& y) : m_x(x), m_y(y) {}
+    const std::vector<double>& x() const { return m_x; }
+    const std::vector<double>& y() const { return m_y; }
+    double correlation();
+}
+
+ +What is more natural then reusing the std_vector extension module to +expose these constructors or functions to Python? + +

+Unfortunately, what seems natural needs a little work in both the +std_vector and the statistics module. + +

+In the std_vector extension module, std::vector<double> needs to be +exposed to Python with the x_class_builder<> template instead of the +regular class_builder<>. For example: + +

+  x_class_builder<std::vector<double> > v_double(std_vector_module, "double");
+
+ +In the extension module that wraps class xy we need to use +the import_class_builder<> template: + +
+  import_class_builder<std::vector<double> > v_double("std_vector", "double");
+
+ +That is all. All the properties that are defined for std_vector.double +in the std_vector Boost.Python module will be available for the +returned objects of xy.x() and xy.y(). Similarly, the constructor for +xy will accept objects that were created by the std_vector module. + +

Non-copyable types

+ +The x_class_builder<T> instantiates template functions that invoke the +copy constructor of T. For a T that is non-copyable this will result in +compile-time error messages. In such a case, another variety of the +class_builder<>, the xptr_class_builder<> must be used. +For example: + +
+xptr_class_builder<store> py_store(your_module, "store");
+
+ +The corresponding import_class_builder<> does not need any special +attention: + +
+import_class_builder<store> py_store("noncopyable_export", "store");
+
+ +

Python module search path

+ +The std_vector and statistics modules can now be used in the following +way: + +
+import std_vector
+import statistics
+x = std_vector.double([1, 2, 3, 4])
+y = std_vector.double([2, 4, 6, 8])
+xy = statistics.xy(x, y)
+xy.correlation()
+
+ +In this example it is clear that Python has to be able to find both the +std_vector and the statistics extension module. In other words, both +extension modules need to be in the Python module search path +(sys.path). + +

+The situation is not always that obvious. Suppose the statistics +module has a random function that returns a vector of random +numbers with a given length: + +

+import statistics
+x = statistics.random(5)
+y = statistics.random(5)
+xy = statistics.xy(x, y)
+xy.correlation()
+
+ +A naive user will not easily anticipate that the std_vector module is +used to pass the x and y vectors around. If the std_vector module is in +the Python module search path, this form of ignorance is of no harm. +On the contrary, we are glad that we do not have to bother the user +with details like this. + +

+If the std_vector module is not in the Python module search path, a +Python exception will be raised: + +

+Traceback (innermost last):
+  File "foo.py", line 2, in ?
+    x = statistics.random(5)
+ImportError: No module named std_vector
+
+ +As is the case with any system of a non-trivial complexity, it is +important that the setup is consistent and complete. + +

Two-way module dependencies

+ +Boost.Python supports two-way module dependencies. This is best +illustrated by a simple example. + +

+Suppose there is a module ivect that implements vectors of integers, +and a similar module dvect that implements vectors of doubles. We want +to be able do convert an integer vector to a double vector and vice +versa. For example: + +

+import ivect
+iv = ivect.ivect((1,2,3,4,5))
+dv = iv.as_dvect()
+
+ +The last expression will implicitly import the dvect module in order to +enable the conversion of the C++ representation of dvect to a Python +object. The analogous is possible for a dvect: + +
+import dvect
+dv = dvect.dvect((1,2,3,4,5))
+iv = dv.as_ivect()
+
+ +Now the ivect module is imported implicitly. + +

+Note that the two-way dependencies are possible because the +dependencies are resolved only when needed. This is, the initialization +of the ivect module does not rely on the dvect module, and vice versa. +Only if as_dvect() or as_ivect() is actually invoked will the +corresponding module be implicitly imported. This also means that, for +example, the dvect module does not have to be available at all if +as_dvect() is never used. + +

Clarification of compile-time and link-time dependencies

+ +Boost.Python's support for resolving cross-module dependencies at +runtime does not imply that compile-time dependencies are eliminated. +For example, the statistics extension module in the example above will +need to #include <vector>. This is immediately obvious from the +definition of class xy. + +

+If a library is wrapped that consists of both header files and compiled +components (e.g. libdvect.a, dvect.lib, etc.), both the Boost.Python +extension module with the x_class_wrapper<> and the module with the +import_class_wrapper<> need to be linked against the object library. +Ideally one would build a shared library (e.g. libdvect.so, dvect.dll, +etc.). However, this introduces the issue of getting the search path +for the dynamic loading configured correctly. For small libraries it is +therefore often more convenient to ignore the fact that the object +files are loaded into memory more than once. + +

+The main purpose of Boost.Python's support for resolving cross-module +dependencies at runtime is to allow for a modular system layout. With +this support it is straightforward to reflect C++ code organization at +the Python level. Without the cross-module support, a multi-purpose +module like std_vector would be impractical because the entire wrapper +code would somehow have to be duplicated in all extension modules that +use it, making them harder to maintain and harder to build. + +

+Finally, there is an important psychological component. If a group of +classes is lumped together with many others in a huge module, the +authors will have difficulties in being identified with their work. +The situation is much more transparent if the work is represented by +a module with a recognizable name. This is not just a question of +strong egos, but also of getting credit and funding. + +

Why not use the x_class_builder universally?

+ +There is some overhead associated with the Boost.Python cross-module +support. Depending on the platform, the code generated by +x_class_builder<> is roughly 10%-20% larger than that generated by +class_builder<>. For a large extension module with many wrapped +classes, this could mean a significant difference. Therefore the +general recommendation is to use x_class_wrapper<> only for classes +that are likely to be used as function arguments or return values in +other modules. + +
+
+Author: Ralf W. Grosse-Kunstleve, March 2001 +
+ diff --git a/doc/pickle.html b/doc/pickle.html new file mode 100644 index 00000000..842112d3 --- /dev/null +++ b/doc/pickle.html @@ -0,0 +1,245 @@ + + +Boost.Python Pickle Support + +
+ +c++boost.gif (8819 bytes) + +
+

Boost.Python Pickle Support

+ +Pickle is a Python module for object serialization, also known +as persistence, marshalling, or flattening. + +

+It is often necessary to save and restore the contents of an object to +a file. One approach to this problem is to write a pair of functions +that read and write data from a file in a special format. A powerful +alternative approach is to use Python's pickle module. Exploiting +Python's ability for introspection, the pickle module recursively +converts nearly arbitrary Python objects into a stream of bytes that +can be written to a file. + +

+The Boost Python Library supports the pickle module by emulating the +interface implemented by Jim Fulton's ExtensionClass module that is +included in the +ZOPE +distribution. +This interface is similar to that for regular Python classes as +described in detail in the +Python Library Reference for pickle. + +


+

The Boost.Python Pickle Interface

+ +At the user level, the Boost.Python pickle interface involves three special +methods: + +
+
+__getinitargs__ +
+ When an instance of a Boost.Python extension class is pickled, the + pickler tests if the instance has a __getinitargs__ method. + This method must return a Python tuple (it is most convenient to use + a boost::python::tuple). When the instance is restored by the + unpickler, the contents of this tuple are used as the arguments for + the class constructor. + +

+ If __getinitargs__ is not defined, the class constructor + will be called without arguments. + +

+

+__getstate__ + +
+ When an instance of a Boost.Python extension class is pickled, the + pickler tests if the instance has a __getstate__ method. + This method should return a Python object representing the state of + the instance. + +

+ If __getstate__ is not defined, the instance's + __dict__ is pickled (if it is not empty). + +

+

+__setstate__ + +
+ When an instance of a Boost.Python extension class is restored by the + unpickler, it is first constructed using the result of + __getinitargs__ as arguments (see above). Subsequently the + unpickler tests if the new instance has a __setstate__ + method. If so, this method is called with the result of + __getstate__ (a Python object) as the argument. + +

+ If __setstate__ is not defined, the result of + __getstate__ must be a Python dictionary. The items of this + dictionary are added to the instance's __dict__. + +

+ +If both __getstate__ and __setstate__ are defined, +the Python object returned by __getstate__ need not be a +dictionary. The __getstate__ and __setstate__ methods +can do what they want. + +
+

Pitfalls and Safety Guards

+ +In Boost.Python extension modules with many extension classes, +providing complete pickle support for all classes would be a +significant overhead. In general complete pickle support should only be +implemented for extension classes that will eventually be pickled. +However, the author of a Boost.Python extension module might not +anticipate correctly which classes need support for pickle. +Unfortunately, the pickle protocol described above has two important +pitfalls that the end user of a Boost.Python extension module might not +be aware of: + +
+
+Pitfall 1: +Both __getinitargs__ and __getstate__ are not defined. + +
+ In this situation the unpickler calls the class constructor without + arguments and then adds the __dict__ that was pickled by + default to that of the new instance. + +

+ However, most C++ classes wrapped with Boost.Python will have member + data that are not restored correctly by this procedure. To alert the + user to this problem, a safety guard is provided. If both + __getinitargs__ and __getstate__ are not defined, + Boost.Python tests if the class has an attribute + __dict_defines_state__. An exception is raised if this + attribute is not defined: + +

+    RuntimeError: Incomplete pickle support (__dict_defines_state__ not set)
+
+ + In the rare cases where this is not the desired behavior, the safety + guard can deliberately be disabled. The corresponding C++ code for + this is, e.g.: + +
+    class_builder<your_class> py_your_class(your_module, "your_class");
+    py_your_class.dict_defines_state();
+
+ + It is also possible to override the safety guard at the Python level. + E.g.: + +
+    import your_bpl_module
+    class your_class(your_bpl_module.your_class):
+      __dict_defines_state__ = 1
+
+ +

+

+Pitfall 2: +__getstate__ is defined and the instance's __dict__ is not empty. + +
+ The author of a Boost.Python extension class might provide a + __getstate__ method without considering the possibilities + that: + +

+

    +
  • + his class is used in Python as a base class. Most likely the + __dict__ of instances of the derived class needs to be + pickled in order to restore the instances correctly. + +

    +

  • + the user adds items to the instance's __dict__ directly. + Again, the __dict__ of the instance then needs to be + pickled. + +
+

+ + To alert the user to this highly unobvious problem, a safety guard is + provided. If __getstate__ is defined and the instance's + __dict__ is not empty, Boost.Python tests if the class has + an attribute __getstate_manages_dict__. An exception is + raised if this attribute is not defined: + +

+    RuntimeError: Incomplete pickle support (__getstate_manages_dict__ not set)
+
+ + To resolve this problem, it should first be established that the + __getstate__ and __setstate__ methods manage the + instances's __dict__ correctly. Note that this can be done + both at the C++ and the Python level. Finally, the safety guard + should intentionally be overridden. E.g. in C++: + +
+    class_builder<your_class> py_your_class(your_module, "your_class");
+    py_your_class.getstate_manages_dict();
+
+ + In Python: + +
+    import your_bpl_module
+    class your_class(your_bpl_module.your_class):
+      __getstate_manages_dict__ = 1
+      def __getstate__(self):
+        # your code here
+      def __setstate__(self, state):
+        # your code here
+
+
+ +
+

Practical Advice

+ + + +
+

Example

+ +An example that shows how to configure pickle support is available in the +boost/lib/python/example directory +(getting_started3.cpp). + +
+© Copyright Ralf W. Grosse-Kunstleve 2001. Permission to copy, +use, modify, sell and distribute this document is granted provided this +copyright notice appears in all copies. This document is provided "as +is" without express or implied warranty, and with no claim as to its +suitability for any purpose. + +

+Updated: March 10, 2001 +

diff --git a/example/pickle1.cpp b/example/pickle1.cpp new file mode 100644 index 00000000..2f786f69 --- /dev/null +++ b/example/pickle1.cpp @@ -0,0 +1,57 @@ +/* + This example shows how to make an Extension Class "pickleable". + For more information refer to boost/libs/python/doc/pickle.html. + */ + +#include + +#include +namespace python = boost::python; + +namespace { // Avoid cluttering the global namespace. + + // A friendly class. + class world + { + private: + std::string country; + int secret_number; + public: + world(const std::string& country) : secret_number(0) { + this->country = country; + } + std::string greet() const { return "Hello from " + country + "!"; } + std::string get_country() const { return country; } + }; + + // Support for pickle. + python::ref world_getinitargs(const world& w) { + python::tuple result(1); + result.set_item(0, w.get_country()); + return result.reference(); + } +} + +BOOST_PYTHON_MODULE_INIT(pickle1) +{ + try + { + // Create an object representing this extension module. + python::module_builder this_module("pickle1"); + + // Create the Python type object for our extension class. + python::class_builder world_class(this_module, "world"); + + // Add the __init__ function. + world_class.def(python::constructor()); + // Add a regular member function. + world_class.def(&world::greet, "greet"); + + // Support for pickle. + world_class.def(world_getinitargs, "__getinitargs__"); + } + catch(...) + { + python::handle_exception(); // Deal with the exception for Python + } +} diff --git a/example/pickle2.cpp b/example/pickle2.cpp new file mode 100644 index 00000000..c33776a0 --- /dev/null +++ b/example/pickle2.cpp @@ -0,0 +1,80 @@ +/* + This example shows how to make an Extension Class "pickleable". + For more information refer to boost/libs/python/doc/pickle.html. + */ + +#include + +#include +namespace python = boost::python; + +namespace { // Avoid cluttering the global namespace. + + // A friendly class. + class world + { + public: + world(const std::string& country) : secret_number(0) { + this->country = country; + } + std::string greet() const { return "Hello from " + country + "!"; } + std::string get_country() const { return country; } + void set_secret_number(int number) { secret_number = number; } + int get_secret_number() const { return secret_number; } + private: + std::string country; + int secret_number; + }; + + // Support for pickle. + python::ref world_getinitargs(const world& w) { + python::tuple result(1); + result.set_item(0, w.get_country()); + return result.reference(); // returning the reference avoids the copying. + } + + python::ref world_getstate(const world& w) { + python::tuple result(1); + result.set_item(0, w.get_secret_number()); + return result.reference(); // returning the reference avoids the copying. + } + + void world_setstate(world& w, python::tuple state) { + if (state.size() != 1) { + PyErr_SetString(PyExc_ValueError, + "Unexpected argument in call to __setstate__."); + throw python::error_already_set(); + } + int number = state[0].get(); + if (number != 42) + w.set_secret_number(number); + } +} + +BOOST_PYTHON_MODULE_INIT(pickle2) +{ + try + { + // Create an object representing this extension module. + python::module_builder this_module("pickle2"); + + // Create the Python type object for our extension class. + python::class_builder world_class(this_module, "world"); + + // Add the __init__ function. + world_class.def(python::constructor()); + // Add a regular member function. + world_class.def(&world::greet, "greet"); + world_class.def(&world::get_secret_number, "get_secret_number"); + world_class.def(&world::set_secret_number, "set_secret_number"); + + // Support for pickle. + world_class.def(world_getinitargs, "__getinitargs__"); + world_class.def(world_getstate, "__getstate__"); + world_class.def(world_setstate, "__setstate__"); + } + catch(...) + { + python::handle_exception(); // Deal with the exception for Python + } +} diff --git a/example/pickle3.cpp b/example/pickle3.cpp new file mode 100644 index 00000000..19ddec43 --- /dev/null +++ b/example/pickle3.cpp @@ -0,0 +1,121 @@ +/* + This example shows how to make an Extension Class "pickleable". + For more information refer to boost/libs/python/doc/pickle.html. + */ + +#include + +#include +namespace python = boost::python; + +namespace { // Avoid cluttering the global namespace. + + // A friendly class. + class world + { + public: + world(const std::string& country) : secret_number(0) { + this->country = country; + } + std::string greet() const { return "Hello from " + country + "!"; } + std::string get_country() const { return country; } + void set_secret_number(int number) { secret_number = number; } + int get_secret_number() const { return secret_number; } + private: + std::string country; + int secret_number; + }; + + // Support for pickle. + python::ref world_getinitargs(const world& w) { + python::tuple result(1); + result.set_item(0, w.get_country()); + return result.reference(); // returning the reference avoids the copying. + } + + python::ref world_getstate(python::tuple const & args, + python::dictionary const & keywords); + + PyObject* world_setstate(python::tuple const & args, + python::dictionary const & keywords); +} + +BOOST_PYTHON_MODULE_INIT(pickle3) +{ + try + { + // Create an object representing this extension module. + python::module_builder this_module("pickle3"); + + // Create the Python type object for our extension class. + python::class_builder world_class(this_module, "world"); + + // Add the __init__ function. + world_class.def(python::constructor()); + // Add a regular member function. + world_class.def(&world::greet, "greet"); + world_class.def(&world::get_secret_number, "get_secret_number"); + world_class.def(&world::set_secret_number, "set_secret_number"); + + // Support for pickle. + world_class.def(world_getinitargs, "__getinitargs__"); + world_class.def_raw(world_getstate, "__getstate__"); + world_class.def_raw(world_setstate, "__setstate__"); + world_class.getstate_manages_dict(); + } + catch(...) + { + python::handle_exception(); // Deal with the exception for Python + } +} + +namespace { + + python::ref world_getstate(python::tuple const & args, + python::dictionary const & keywords) + { + if(args.size() != 1 || keywords.size() != 0) { + PyErr_SetString(PyExc_TypeError, "wrong number of arguments"); + throw boost::python::argument_error(); + } + const world& w = args[0].get(); + python::ref mydict(args[0].getattr("__dict__")); + python::tuple result(2); + // store the object's __dict__ + result.set_item(0, mydict); + // store the internal state of the C++ object + result.set_item(1, w.get_secret_number()); + return result.reference(); // returning the reference avoids the copying. + } + + PyObject* world_setstate(python::tuple const & args, + python::dictionary const & keywords) + { + if(args.size() != 2 || keywords.size() != 0) { + PyErr_SetString(PyExc_TypeError, "wrong number of arguments"); + throw boost::python::argument_error(); + } + world& w = args[0].get(); + python::ref mydict(args[0].getattr("__dict__")); + const python::tuple& state(args[1].get()); + if (state.size() != 2) { + PyErr_SetString(PyExc_ValueError, + "Unexpected argument in call to __setstate__."); + throw python::error_already_set(); + } + // restore the object's __dict__ + python::dictionary odict(mydict.get()); + const python::dictionary& pdict(state[0].get()); + python::list pkeys(pdict.keys()); + for (int i = 0; i < pkeys.size(); i++) { + python::ref k(pkeys[i]); + //odict[k] = pdict[k]; // XXX memory leak! + odict[k] = pdict.get_item(k); // this does not leak. + } + // restore the internal state of the C++ object + int number = state[1].get(); + if (number != 42) + w.set_secret_number(number); + return python::detail::none(); + } +} diff --git a/example/test_pickle1.py b/example/test_pickle1.py new file mode 100644 index 00000000..05696d4a --- /dev/null +++ b/example/test_pickle1.py @@ -0,0 +1,31 @@ +r'''>>> import pickle1 + >>> import re + >>> import pickle + >>> pickle1.world.__module__ + 'pickle1' + >>> pickle1.world.__safe_for_unpickling__ + 1 + >>> pickle1.world.__reduce__() + 'world' + >>> assert re.match( + ... "\(, \('Hello',\)\)", + ... repr(pickle1.world('Hello').__reduce__())) + >>> + >>> wd = pickle1.world('California') + >>> pstr = pickle.dumps(wd) + >>> wl = pickle.loads(pstr) + >>> print wd.greet() + Hello from California! + >>> print wl.greet() + Hello from California! +''' + +def run(args = None): + if args is not None: + import sys + sys.argv = args + import doctest, test_pickle1 + doctest.testmod(test_pickle1) + +if __name__ == '__main__': + run() diff --git a/example/test_pickle2.py b/example/test_pickle2.py new file mode 100644 index 00000000..463befa6 --- /dev/null +++ b/example/test_pickle2.py @@ -0,0 +1,45 @@ +r'''>>> import pickle2 + >>> import re + >>> import pickle + >>> pickle2.world.__module__ + 'pickle2' + >>> pickle2.world.__safe_for_unpickling__ + 1 + >>> pickle2.world.__reduce__() + 'world' + >>> assert re.match( + ... "\(, \('Hello',\), \(0,\)\)", + ... repr(pickle2.world('Hello').__reduce__())) + >>> + >>> for number in (24, 42): + ... wd = pickle2.world('California') + ... wd.set_secret_number(number) + ... pstr = pickle.dumps(wd) + ... wl = pickle.loads(pstr) + ... print wd.greet(), wd.get_secret_number() + ... print wl.greet(), wl.get_secret_number() + Hello from California! 24 + Hello from California! 24 + Hello from California! 42 + Hello from California! 0 + +# Now show that the __dict__ is not taken care of. + >>> wd = pickle2.world('California') + >>> wd.x = 1 + >>> wd.__dict__ + {'x': 1} + >>> try: pstr = pickle.dumps(wd) + ... except RuntimeError, err: print err[0] + ... + Incomplete pickle support (__getstate_manages_dict__ not set) +''' + +def run(args = None): + if args is not None: + import sys + sys.argv = args + import doctest, test_pickle2 + doctest.testmod(test_pickle2) + +if __name__ == '__main__': + run() diff --git a/example/test_pickle3.py b/example/test_pickle3.py new file mode 100644 index 00000000..b964f1a2 --- /dev/null +++ b/example/test_pickle3.py @@ -0,0 +1,38 @@ +r'''>>> import pickle3 + >>> import re + >>> import pickle + >>> pickle3.world.__module__ + 'pickle3' + >>> pickle3.world.__safe_for_unpickling__ + 1 + >>> pickle3.world.__reduce__() + 'world' + >>> assert re.match( + ... "\(, \('Hello',\), \(\{\}, 0\)\)", + ... repr(pickle3.world('Hello').__reduce__())) + >>> + >>> for number in (24, 42): + ... wd = pickle3.world('California') + ... wd.set_secret_number(number) + ... wd.x = 2 * number + ... wd.y = 'y' * number + ... wd.z = 3. * number + ... pstr = pickle.dumps(wd) + ... wl = pickle.loads(pstr) + ... print wd.greet(), wd.get_secret_number(), wd.__dict__ + ... print wl.greet(), wl.get_secret_number(), wl.__dict__ + Hello from California! 24 {'z': 72.0, 'x': 48, 'y': 'yyyyyyyyyyyyyyyyyyyyyyyy'} + Hello from California! 24 {'z': 72.0, 'x': 48, 'y': 'yyyyyyyyyyyyyyyyyyyyyyyy'} + Hello from California! 42 {'z': 126.0, 'x': 84, 'y': 'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'} + Hello from California! 0 {'z': 126.0, 'x': 84, 'y': 'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'} +''' + +def run(args = None): + if args is not None: + import sys + sys.argv = args + import doctest, test_pickle3 + doctest.testmod(test_pickle3) + +if __name__ == '__main__': + run()