?????????? ????????? - ??????????????? - /home/agenciai/public_html/cd38d8/compute.tar
???????
exception/program_build_failure.hpp 0000644 00000003253 15125510617 0013616 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2017 Kristian Popov <kristian.popov@outlook.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXCEPTION_PROGRAM_BUILD_FAILURE_HPP #define BOOST_COMPUTE_EXCEPTION_PROGRAM_BUILD_FAILURE_HPP #include <string> #include <boost/compute/exception/opencl_error.hpp> namespace boost { namespace compute { /// \class program_build_failure /// \brief A failure when building OpenCL program /// /// Instances of this class are thrown when OpenCL program build fails. /// Extends opencl_error by saving a program build log so it can be used /// for testing, debugging, or logging purposes. /// /// \see opencl_error class program_build_failure : public opencl_error { public: /// Creates a new program_build_failure exception object for \p error /// and \p build_log. explicit program_build_failure(cl_int error, const std::string& build_log) throw() : opencl_error(error), m_build_log(build_log) { } /// Destroys the program_build_failure object. ~program_build_failure() throw() { } /// Retrieve the log of a failed program build. std::string build_log() const throw() { return m_build_log; } private: std::string m_build_log; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXCEPTION_PROGRAM_BUILD_FAILURE_HPP exception/context_error.hpp 0000644 00000005017 15125510617 0012156 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXCEPTION_CONTEXT_ERROR_HPP #define BOOST_COMPUTE_EXCEPTION_CONTEXT_ERROR_HPP #include <exception> namespace boost { namespace compute { class context; /// \class context_error /// \brief A run-time OpenCL context error. /// /// The context_error exception is thrown when the OpenCL context encounters /// an error condition. Boost.Compute is notified of these error conditions by /// registering an error handler when creating context objects (via the /// \c pfn_notify argument to the \c clCreateContext() function). /// /// This exception is different than the opencl_error exception which is thrown /// as a result of error caused when calling a single OpenCL API function. /// /// \see opencl_error class context_error : public std::exception { public: /// Creates a new context error exception object. context_error(const context *context, const char *errinfo, const void *private_info, size_t private_info_size) throw() : m_context(context), m_errinfo(errinfo), m_private_info(private_info), m_private_info_size(private_info_size) { } /// Destroys the context error object. ~context_error() throw() { } /// Returns a string with a description of the error. const char* what() const throw() { return m_errinfo; } /// Returns a pointer to the context object which generated the error /// notification. const context* get_context_ptr() const throw() { return m_context; } /// Returns a pointer to the private info memory block. const void* get_private_info_ptr() const throw() { return m_private_info; } /// Returns the size of the private info memory block. size_t get_private_info_size() const throw() { return m_private_info_size; } private: const context *m_context; const char *m_errinfo; const void *m_private_info; size_t m_private_info_size; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXCEPTION_CONTEXT_ERROR_HPP exception/unsupported_extension_error.hpp 0000644 00000004230 15125510617 0015152 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXCEPTION_UNSUPPORTED_EXTENSION_ERROR_HPP #define BOOST_COMPUTE_EXCEPTION_UNSUPPORTED_EXTENSION_ERROR_HPP #include <exception> #include <sstream> #include <string> namespace boost { namespace compute { /// \class unsupported_extension_error /// \brief Exception thrown when attempting to use an unsupported /// OpenCL extension. /// /// This exception is thrown when the user attempts to use an OpenCL /// extension which is not supported on the platform and/or device. /// /// An example of this is attempting to use CL-GL sharing on a non-GPU /// device. /// /// \see opencl_error class unsupported_extension_error : public std::exception { public: /// Creates a new unsupported extension error exception object indicating /// that \p extension is not supported by the OpenCL platform or device. explicit unsupported_extension_error(const char *extension) throw() : m_extension(extension) { std::stringstream msg; msg << "OpenCL extension " << extension << " not supported"; m_error_string = msg.str(); } /// Destroys the unsupported extension error object. ~unsupported_extension_error() throw() { } /// Returns the name of the unsupported extension. std::string extension_name() const throw() { return m_extension; } /// Returns a string containing a human-readable error message containing /// the name of the unsupported exception. const char* what() const throw() { return m_error_string.c_str(); } private: std::string m_extension; std::string m_error_string; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXCEPTION_UNSUPPORTED_EXTENSION_ERROR_HPP exception/no_device_found.hpp 0000644 00000002451 15125510617 0012406 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXCEPTION_NO_DEVICE_FOUND_HPP #define BOOST_COMPUTE_EXCEPTION_NO_DEVICE_FOUND_HPP #include <exception> namespace boost { namespace compute { /// \class no_device_found /// \brief Exception thrown when no OpenCL device is found /// /// This exception is thrown when no valid OpenCL device can be found. /// /// \see opencl_error class no_device_found : public std::exception { public: /// Creates a new no_device_found exception object. no_device_found() throw() { } /// Destroys the no_device_found exception object. ~no_device_found() throw() { } /// Returns a string containing a human-readable error message. const char* what() const throw() { return "No OpenCL device found"; } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXCEPTION_NO_DEVICE_FOUND_HPP exception/opencl_error.hpp 0000644 00000015063 15125510617 0011754 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXCEPTION_OPENCL_ERROR_HPP #define BOOST_COMPUTE_EXCEPTION_OPENCL_ERROR_HPP #include <exception> #include <string> #include <sstream> #include <boost/compute/cl.hpp> namespace boost { namespace compute { /// \class opencl_error /// \brief A run-time OpenCL error. /// /// The opencl_error class represents an error returned from an OpenCL /// function. /// /// \see context_error class opencl_error : public std::exception { public: /// Creates a new opencl_error exception object for \p error. explicit opencl_error(cl_int error) throw() : m_error(error), m_error_string(to_string(error)) { } /// Destroys the opencl_error object. ~opencl_error() throw() { } /// Returns the numeric error code. cl_int error_code() const throw() { return m_error; } /// Returns a string description of the error. std::string error_string() const throw() { return m_error_string; } /// Returns a C-string description of the error. const char* what() const throw() { return m_error_string.c_str(); } /// Static function which converts the numeric OpenCL error code \p error /// to a human-readable string. /// /// For example: /// \code /// std::cout << opencl_error::to_string(CL_INVALID_KERNEL_ARGS) << std::endl; /// \endcode /// /// Will print "Invalid Kernel Arguments". /// /// If the error code is unknown (e.g. not a valid OpenCL error), a string /// containing "Unknown OpenCL Error" along with the error number will be /// returned. static std::string to_string(cl_int error) { switch(error){ case CL_SUCCESS: return "Success"; case CL_DEVICE_NOT_FOUND: return "Device Not Found"; case CL_DEVICE_NOT_AVAILABLE: return "Device Not Available"; case CL_COMPILER_NOT_AVAILABLE: return "Compiler Not Available"; case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory Object Allocation Failure"; case CL_OUT_OF_RESOURCES: return "Out of Resources"; case CL_OUT_OF_HOST_MEMORY: return "Out of Host Memory"; case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling Information Not Available"; case CL_MEM_COPY_OVERLAP: return "Memory Copy Overlap"; case CL_IMAGE_FORMAT_MISMATCH: return "Image Format Mismatch"; case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image Format Not Supported"; case CL_BUILD_PROGRAM_FAILURE: return "Build Program Failure"; case CL_MAP_FAILURE: return "Map Failure"; case CL_INVALID_VALUE: return "Invalid Value"; case CL_INVALID_DEVICE_TYPE: return "Invalid Device Type"; case CL_INVALID_PLATFORM: return "Invalid Platform"; case CL_INVALID_DEVICE: return "Invalid Device"; case CL_INVALID_CONTEXT: return "Invalid Context"; case CL_INVALID_QUEUE_PROPERTIES: return "Invalid Queue Properties"; case CL_INVALID_COMMAND_QUEUE: return "Invalid Command Queue"; case CL_INVALID_HOST_PTR: return "Invalid Host Pointer"; case CL_INVALID_MEM_OBJECT: return "Invalid Memory Object"; case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid Image Format Descriptor"; case CL_INVALID_IMAGE_SIZE: return "Invalid Image Size"; case CL_INVALID_SAMPLER: return "Invalid Sampler"; case CL_INVALID_BINARY: return "Invalid Binary"; case CL_INVALID_BUILD_OPTIONS: return "Invalid Build Options"; case CL_INVALID_PROGRAM: return "Invalid Program"; case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid Program Executable"; case CL_INVALID_KERNEL_NAME: return "Invalid Kernel Name"; case CL_INVALID_KERNEL_DEFINITION: return "Invalid Kernel Definition"; case CL_INVALID_KERNEL: return "Invalid Kernel"; case CL_INVALID_ARG_INDEX: return "Invalid Argument Index"; case CL_INVALID_ARG_VALUE: return "Invalid Argument Value"; case CL_INVALID_ARG_SIZE: return "Invalid Argument Size"; case CL_INVALID_KERNEL_ARGS: return "Invalid Kernel Arguments"; case CL_INVALID_WORK_DIMENSION: return "Invalid Work Dimension"; case CL_INVALID_WORK_GROUP_SIZE: return "Invalid Work Group Size"; case CL_INVALID_WORK_ITEM_SIZE: return "Invalid Work Item Size"; case CL_INVALID_GLOBAL_OFFSET: return "Invalid Global Offset"; case CL_INVALID_EVENT_WAIT_LIST: return "Invalid Event Wait List"; case CL_INVALID_EVENT: return "Invalid Event"; case CL_INVALID_OPERATION: return "Invalid Operation"; case CL_INVALID_GL_OBJECT: return "Invalid GL Object"; case CL_INVALID_BUFFER_SIZE: return "Invalid Buffer Size"; case CL_INVALID_MIP_LEVEL: return "Invalid MIP Level"; case CL_INVALID_GLOBAL_WORK_SIZE: return "Invalid Global Work Size"; #ifdef BOOST_COMPUTE_CL_VERSION_1_2 case CL_COMPILE_PROGRAM_FAILURE: return "Compile Program Failure"; case CL_LINKER_NOT_AVAILABLE: return "Linker Not Available"; case CL_LINK_PROGRAM_FAILURE: return "Link Program Failure"; case CL_DEVICE_PARTITION_FAILED: return "Device Partition Failed"; case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "Kernel Argument Info Not Available"; case CL_INVALID_PROPERTY: return "Invalid Property"; case CL_INVALID_IMAGE_DESCRIPTOR: return "Invalid Image Descriptor"; case CL_INVALID_COMPILER_OPTIONS: return "Invalid Compiler Options"; case CL_INVALID_LINKER_OPTIONS: return "Invalid Linker Options"; case CL_INVALID_DEVICE_PARTITION_COUNT: return "Invalid Device Partition Count"; #endif // BOOST_COMPUTE_CL_VERSION_1_2 #ifdef BOOST_COMPUTE_CL_VERSION_2_0 case CL_INVALID_PIPE_SIZE: return "Invalid Pipe Size"; case CL_INVALID_DEVICE_QUEUE: return "Invalid Device Queue"; #endif default: { std::stringstream s; s << "Unknown OpenCL Error (" << error << ")"; return s.str(); } } } private: cl_int m_error; std::string m_error_string; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXCEPTION_OPENCL_ERROR_HPP buffer.hpp 0000644 00000015177 15125510617 0006544 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_BUFFER_HPP #define BOOST_COMPUTE_BUFFER_HPP #include <boost/compute/config.hpp> #include <boost/compute/context.hpp> #include <boost/compute/exception.hpp> #include <boost/compute/memory_object.hpp> #include <boost/compute/detail/get_object_info.hpp> namespace boost { namespace compute { // forward declarations class command_queue; /// \class buffer /// \brief A memory buffer on a compute device. /// /// The buffer class represents a memory buffer on a compute device. /// /// Buffers are allocated within a compute context. For example, to allocate /// a memory buffer for 32 float's: /// /// \snippet test/test_buffer.cpp constructor /// /// Once created, data can be copied to and from the buffer using the /// \c enqueue_*_buffer() methods in the command_queue class. For example, to /// copy a set of \c int values from the host to the device: /// \code /// int data[] = { 1, 2, 3, 4 }; /// /// queue.enqueue_write_buffer(buf, 0, 4 * sizeof(int), data); /// \endcode /// /// Also see the copy() algorithm for a higher-level interface to copying data /// between the host and the device. For a higher-level, dynamically-resizable, /// type-safe container for data on a compute device, use the vector<T> class. /// /// Buffer objects have reference semantics. Creating a copy of a buffer /// object simply creates another reference to the underlying OpenCL memory /// object. To create an actual copy use the buffer::clone() method. /// /// \see context, command_queue class buffer : public memory_object { public: /// Creates a null buffer object. buffer() : memory_object() { } /// Creates a buffer object for \p mem. If \p retain is \c true, the /// reference count for \p mem will be incremented. explicit buffer(cl_mem mem, bool retain = true) : memory_object(mem, retain) { } /// Create a new memory buffer in of \p size with \p flags in /// \p context. /// /// \see_opencl_ref{clCreateBuffer} buffer(const context &context, size_t size, cl_mem_flags flags = read_write, void *host_ptr = 0) { cl_int error = 0; m_mem = clCreateBuffer(context, flags, (std::max)(size, size_t(1)), host_ptr, &error); if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new buffer object as a copy of \p other. buffer(const buffer &other) : memory_object(other) { } /// Copies the buffer object from \p other to \c *this. buffer& operator=(const buffer &other) { if(this != &other){ memory_object::operator=(other); } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new buffer object from \p other. buffer(buffer&& other) BOOST_NOEXCEPT : memory_object(std::move(other)) { } /// Move-assigns the buffer from \p other to \c *this. buffer& operator=(buffer&& other) BOOST_NOEXCEPT { memory_object::operator=(std::move(other)); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the buffer object. ~buffer() { } /// Returns the size of the buffer in bytes. size_t size() const { return get_memory_size(); } /// \internal_ size_t max_size() const { return get_context().get_device().max_memory_alloc_size(); } /// Returns information about the buffer. /// /// \see_opencl_ref{clGetMemObjectInfo} template<class T> T get_info(cl_mem_info info) const { return get_memory_info<T>(info); } /// \overload template<int Enum> typename detail::get_object_info_type<buffer, Enum>::type get_info() const; /// Creates a new buffer with a copy of the data in \c *this. Uses /// \p queue to perform the copy. buffer clone(command_queue &queue) const; #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Creates a new buffer out of this buffer. /// The new buffer is a sub region of this buffer. /// \p flags The mem_flags which should be used to create the new buffer /// \p origin The start index in this buffer /// \p size The size of the new sub buffer /// /// \see_opencl_ref{clCreateSubBuffer} /// /// \opencl_version_warning{1,1} buffer create_subbuffer(cl_mem_flags flags, size_t origin, size_t size) { BOOST_ASSERT(origin + size <= this->size()); BOOST_ASSERT(origin % (get_context(). get_device(). get_info<CL_DEVICE_MEM_BASE_ADDR_ALIGN>() / 8) == 0); cl_int error = 0; cl_buffer_region region = { origin, size }; cl_mem mem = clCreateSubBuffer(m_mem, flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); if(!mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } return buffer(mem, false); } #endif // BOOST_COMPUTE_CL_VERSION_1_1 }; /// \internal_ define get_info() specializations for buffer BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(buffer, ((cl_mem_object_type, CL_MEM_TYPE)) ((cl_mem_flags, CL_MEM_FLAGS)) ((size_t, CL_MEM_SIZE)) ((void *, CL_MEM_HOST_PTR)) ((cl_uint, CL_MEM_MAP_COUNT)) ((cl_uint, CL_MEM_REFERENCE_COUNT)) ((cl_context, CL_MEM_CONTEXT)) ) #ifdef BOOST_COMPUTE_CL_VERSION_1_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(buffer, ((cl_mem, CL_MEM_ASSOCIATED_MEMOBJECT)) ((size_t, CL_MEM_OFFSET)) ) #endif // BOOST_COMPUTE_CL_VERSION_1_1 namespace detail { // set_kernel_arg specialization for buffer template<> struct set_kernel_arg<buffer> { void operator()(kernel &kernel_, size_t index, const buffer &buffer_) { kernel_.set_arg(index, buffer_.get()); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_BUFFER_HPP allocator.hpp 0000644 00000001354 15125510617 0007243 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALLOCATOR_HPP #define BOOST_COMPUTE_ALLOCATOR_HPP /// \file /// /// Meta-header to include all Boost.Compute allocator headers. #include <boost/compute/allocator/buffer_allocator.hpp> #include <boost/compute/allocator/pinned_allocator.hpp> #endif // BOOST_COMPUTE_ALLOCATOR_HPP type_traits.hpp 0000644 00000002122 15125510617 0007624 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_HPP #define BOOST_COMPUTE_TYPE_TRAITS_HPP #include <boost/compute/type_traits/common_type.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> #include <boost/compute/type_traits/is_fundamental.hpp> #include <boost/compute/type_traits/is_vector_type.hpp> #include <boost/compute/type_traits/make_vector_type.hpp> #include <boost/compute/type_traits/result_of.hpp> #include <boost/compute/type_traits/scalar_type.hpp> #include <boost/compute/type_traits/type_definition.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/type_traits/vector_size.hpp> #endif // BOOST_COMPUTE_TYPE_TRAITS_HPP core.hpp 0000644 00000002165 15125510617 0006214 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CORE_HPP #define BOOST_COMPUTE_CORE_HPP /// \file /// /// Meta-header to include all Boost.Compute core headers. #include <boost/compute/buffer.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/config.hpp> #include <boost/compute/context.hpp> #include <boost/compute/device.hpp> #include <boost/compute/event.hpp> #include <boost/compute/kernel.hpp> #include <boost/compute/types.hpp> #include <boost/compute/memory_object.hpp> #include <boost/compute/platform.hpp> #include <boost/compute/program.hpp> #include <boost/compute/system.hpp> #include <boost/compute/user_event.hpp> #include <boost/compute/version.hpp> #endif // BOOST_COMPUTE_CORE_HPP kernel.hpp 0000644 00000042254 15125510617 0006547 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_KERNEL_HPP #define BOOST_COMPUTE_KERNEL_HPP #include <string> #include <boost/assert.hpp> #include <boost/utility/enable_if.hpp> #include <boost/optional.hpp> #include <boost/compute/cl_ext.hpp> // cl_khr_subgroups #include <boost/compute/config.hpp> #include <boost/compute/exception.hpp> #include <boost/compute/program.hpp> #include <boost/compute/platform.hpp> #include <boost/compute/type_traits/is_fundamental.hpp> #include <boost/compute/detail/diagnostic.hpp> #include <boost/compute/detail/get_object_info.hpp> #include <boost/compute/detail/assert_cl_success.hpp> namespace boost { namespace compute { namespace detail { template<class T> struct set_kernel_arg; } // end detail namespace /// \class kernel /// \brief A compute kernel. /// /// \see command_queue, program class kernel { public: /// Creates a null kernel object. kernel() : m_kernel(0) { } /// Creates a new kernel object for \p kernel. If \p retain is /// \c true, the reference count for \p kernel will be incremented. explicit kernel(cl_kernel kernel, bool retain = true) : m_kernel(kernel) { if(m_kernel && retain){ clRetainKernel(m_kernel); } } /// Creates a new kernel object with \p name from \p program. kernel(const program &program, const std::string &name) { cl_int error = 0; m_kernel = clCreateKernel(program.get(), name.c_str(), &error); if(!m_kernel){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new kernel object as a copy of \p other. kernel(const kernel &other) : m_kernel(other.m_kernel) { if(m_kernel){ clRetainKernel(m_kernel); } } /// Copies the kernel object from \p other to \c *this. kernel& operator=(const kernel &other) { if(this != &other){ if(m_kernel){ clReleaseKernel(m_kernel); } m_kernel = other.m_kernel; if(m_kernel){ clRetainKernel(m_kernel); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new kernel object from \p other. kernel(kernel&& other) BOOST_NOEXCEPT : m_kernel(other.m_kernel) { other.m_kernel = 0; } /// Move-assigns the kernel from \p other to \c *this. kernel& operator=(kernel&& other) BOOST_NOEXCEPT { if(m_kernel){ clReleaseKernel(m_kernel); } m_kernel = other.m_kernel; other.m_kernel = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the kernel object. ~kernel() { if(m_kernel){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseKernel(m_kernel) ); } } #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Creates a new kernel object based on a shallow copy of /// the undelying OpenCL kernel object. /// /// \opencl_version_warning{2,1} /// /// \see_opencl21_ref{clCloneKernel} kernel clone() { cl_int ret = 0; cl_kernel k = clCloneKernel(m_kernel, &ret); return kernel(k, false); } #endif // BOOST_COMPUTE_CL_VERSION_2_1 /// Returns a reference to the underlying OpenCL kernel object. cl_kernel& get() const { return const_cast<cl_kernel &>(m_kernel); } /// Returns the function name for the kernel. std::string name() const { return get_info<std::string>(CL_KERNEL_FUNCTION_NAME); } /// Returns the number of arguments for the kernel. size_t arity() const { return get_info<cl_uint>(CL_KERNEL_NUM_ARGS); } /// Returns the program for the kernel. program get_program() const { return program(get_info<cl_program>(CL_KERNEL_PROGRAM)); } /// Returns the context for the kernel. context get_context() const { return context(get_info<cl_context>(CL_KERNEL_CONTEXT)); } /// Returns information about the kernel. /// /// \see_opencl_ref{clGetKernelInfo} template<class T> T get_info(cl_kernel_info info) const { return detail::get_object_info<T>(clGetKernelInfo, m_kernel, info); } /// \overload template<int Enum> typename detail::get_object_info_type<kernel, Enum>::type get_info() const; #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Returns information about the argument at \p index. /// /// For example, to get the name of the first argument: /// \code /// std::string arg = kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_NAME); /// \endcode /// /// Note, this function requires that the program be compiled with the /// \c "-cl-kernel-arg-info" flag. For example: /// \code /// program.build("-cl-kernel-arg-info"); /// \endcode /// /// \opencl_version_warning{1,2} /// /// \see_opencl_ref{clGetKernelArgInfo} template<class T> T get_arg_info(size_t index, cl_kernel_arg_info info) const { return detail::get_object_info<T>( clGetKernelArgInfo, m_kernel, info, static_cast<cl_uint>(index) ); } /// \overload template<int Enum> typename detail::get_object_info_type<kernel, Enum>::type get_arg_info(size_t index) const; #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Returns work-group information for the kernel with \p device. /// /// \see_opencl_ref{clGetKernelWorkGroupInfo} template<class T> T get_work_group_info(const device &device, cl_kernel_work_group_info info) const { return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id()); } #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Returns sub-group information for the kernel with \p device. Returns a null /// optional if \p device is not 2.1 device, or is not 2.0 device with support /// for cl_khr_subgroups extension. /// /// \opencl_version_warning{2,1} /// \see_opencl21_ref{clGetKernelSubGroupInfo} /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR} template<class T> boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info, const size_t input_size, const void * input) const { if(device.check_version(2, 1)) { return detail::get_object_info<T>( clGetKernelSubGroupInfo, m_kernel, info, device.id(), input_size, input ); } else if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups")) { return boost::optional<T>(); } // Only CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE and CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE // are supported in cl_khr_subgroups extension for 2.0 devices. else if(info != CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE && info != CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) { return boost::optional<T>(); } BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS(); clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr = reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>( reinterpret_cast<size_t>( device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR") ) ); BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS(); return detail::get_object_info<T>( clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input ); } /// \overload template<class T> boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info) const { return get_sub_group_info<T>(device, info, 0, 0); } /// \overload template<class T> boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info, const size_t input) const { return get_sub_group_info<T>(device, info, sizeof(size_t), &input); } #endif // BOOST_COMPUTE_CL_VERSION_2_1 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1) /// Returns sub-group information for the kernel with \p device. Returns a null /// optional if cl_khr_subgroups extension is not supported by \p device. /// /// \opencl_version_warning{2,0} /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR} template<class T> boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info, const size_t input_size, const void * input) const { if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups")) { return boost::optional<T>(); } BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS(); clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr = reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>( reinterpret_cast<size_t>( device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR") ) ); BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS(); return detail::get_object_info<T>( clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input ); } #endif // defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1) #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// \overload template<class T> boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info, const std::vector<size_t> input) const { BOOST_ASSERT(input.size() > 0); return get_sub_group_info<T>(device, info, input.size() * sizeof(size_t), &input[0]); } #endif // BOOST_COMPUTE_CL_VERSION_2_0 /// Sets the argument at \p index to \p value with \p size. /// /// \see_opencl_ref{clSetKernelArg} void set_arg(size_t index, size_t size, const void *value) { BOOST_ASSERT(index < arity()); cl_int ret = clSetKernelArg(m_kernel, static_cast<cl_uint>(index), size, value); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } /// Sets the argument at \p index to \p value. /// /// For built-in types (e.g. \c float, \c int4_), this is equivalent to /// calling set_arg(index, sizeof(type), &value). /// /// Additionally, this method is specialized for device memory objects /// such as buffer and image2d. This allows for them to be passed directly /// without having to extract their underlying cl_mem object. /// /// This method is also specialized for device container types such as /// vector<T> and array<T, N>. This allows for them to be passed directly /// as kernel arguments without having to extract their underlying buffer. /// /// For setting local memory arguments (e.g. "__local float *buf"), the /// local_buffer<T> class may be used: /// \code /// // set argument to a local buffer with storage for 32 float's /// kernel.set_arg(0, local_buffer<float>(32)); /// \endcode template<class T> void set_arg(size_t index, const T &value) { // if you get a compilation error pointing here it means you // attempted to set a kernel argument from an invalid type. detail::set_kernel_arg<T>()(*this, index, value); } /// \internal_ void set_arg(size_t index, const cl_mem mem) { set_arg(index, sizeof(cl_mem), static_cast<const void *>(&mem)); } /// \internal_ void set_arg(size_t index, const cl_sampler sampler) { set_arg(index, sizeof(cl_sampler), static_cast<const void *>(&sampler)); } /// \internal_ void set_arg_svm_ptr(size_t index, void* ptr) { #ifdef BOOST_COMPUTE_CL_VERSION_2_0 cl_int ret = clSetKernelArgSVMPointer(m_kernel, static_cast<cl_uint>(index), ptr); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } #else (void) index; (void) ptr; BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE)); #endif } #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES /// Sets the arguments for the kernel to \p args. template<class... T> void set_args(T&&... args) { BOOST_ASSERT(sizeof...(T) <= arity()); _set_args<0>(args...); } #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Sets additional execution information for the kernel. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clSetKernelExecInfo} void set_exec_info(cl_kernel_exec_info info, size_t size, const void *value) { cl_int ret = clSetKernelExecInfo(m_kernel, info, size, value); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } #endif // BOOST_COMPUTE_CL_VERSION_2_0 /// Returns \c true if the kernel is the same at \p other. bool operator==(const kernel &other) const { return m_kernel == other.m_kernel; } /// Returns \c true if the kernel is different from \p other. bool operator!=(const kernel &other) const { return m_kernel != other.m_kernel; } /// \internal_ operator cl_kernel() const { return m_kernel; } /// \internal_ static kernel create_with_source(const std::string &source, const std::string &name, const context &context) { return program::build_with_source(source, context).create_kernel(name); } private: #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES /// \internal_ template<size_t N> void _set_args() { } /// \internal_ template<size_t N, class T, class... Args> void _set_args(T&& arg, Args&&... rest) { set_arg(N, arg); _set_args<N+1>(rest...); } #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES private: cl_kernel m_kernel; }; inline kernel program::create_kernel(const std::string &name) const { return kernel(*this, name); } /// \internal_ define get_info() specializations for kernel BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel, ((std::string, CL_KERNEL_FUNCTION_NAME)) ((cl_uint, CL_KERNEL_NUM_ARGS)) ((cl_uint, CL_KERNEL_REFERENCE_COUNT)) ((cl_context, CL_KERNEL_CONTEXT)) ((cl_program, CL_KERNEL_PROGRAM)) ) #ifdef BOOST_COMPUTE_CL_VERSION_1_2 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel, ((std::string, CL_KERNEL_ATTRIBUTES)) ) #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// \internal_ define get_arg_info() specializations for kernel #ifdef BOOST_COMPUTE_CL_VERSION_1_2 #define BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(result_type, value) \ namespace detail { \ template<> struct get_object_info_type<kernel, value> { typedef result_type type; }; \ } \ template<> inline result_type kernel::get_arg_info<value>(size_t index) const { \ return get_arg_info<result_type>(index, value); \ } BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_address_qualifier, CL_KERNEL_ARG_ADDRESS_QUALIFIER) BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_access_qualifier, CL_KERNEL_ARG_ACCESS_QUALIFIER) BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_TYPE_NAME) BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_QUALIFIER) BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_NAME) #endif // BOOST_COMPUTE_CL_VERSION_1_2 namespace detail { // set_kernel_arg implementation for built-in types template<class T> struct set_kernel_arg { typename boost::enable_if<is_fundamental<T> >::type operator()(kernel &kernel_, size_t index, const T &value) { kernel_.set_arg(index, sizeof(T), &value); } }; // set_kernel_arg specialization for char (different from built-in cl_char) template<> struct set_kernel_arg<char> { void operator()(kernel &kernel_, size_t index, const char c) { kernel_.set_arg(index, sizeof(char), &c); } }; } // end detail namespace } // end namespace compute } // end namespace boost #endif // BOOST_COMPUTE_KERNEL_HPP interop/eigen/core.hpp 0000644 00000005152 15125510617 0010762 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_EIGEN_EIGEN_HPP #define BOOST_COMPUTE_INTEROP_EIGEN_EIGEN_HPP #include <Eigen/Core> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy_n.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/type_traits/type_name.hpp> namespace boost { namespace compute { /// Copies \p matrix to \p buffer. template<class Derived> inline void eigen_copy_matrix_to_buffer(const Eigen::PlainObjectBase<Derived> &matrix, buffer_iterator<typename Derived::Scalar> buffer, command_queue &queue = system::default_queue()) { ::boost::compute::copy_n(matrix.data(), matrix.size(), buffer, queue); } /// Copies \p buffer to \p matrix. template<class Derived> inline void eigen_copy_buffer_to_matrix(const buffer_iterator<typename Derived::Scalar> buffer, Eigen::PlainObjectBase<Derived> &matrix, command_queue &queue = system::default_queue()) { ::boost::compute::copy_n(buffer, matrix.size(), matrix.data(), queue); } /// Converts an \c Eigen::Matrix4f to a \c float16_. inline float16_ eigen_matrix4f_to_float16(const Eigen::Matrix4f &matrix) { float16_ result; std::memcpy(&result, matrix.data(), 16 * sizeof(float)); return result; } /// Converts an \c Eigen::Matrix4d to a \c double16_. inline double16_ eigen_matrix4d_to_double16(const Eigen::Matrix4d &matrix) { double16_ result; std::memcpy(&result, matrix.data(), 16 * sizeof(double)); return result; } } // end compute namespace } // end boost namespace BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2i, int2) BOOST_COMPUTE_TYPE_NAME(Eigen::Vector4i, int4) BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2f, float2) BOOST_COMPUTE_TYPE_NAME(Eigen::Vector4f, float4) BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix2f, float8) BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix4f, float16) BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2d, double2) BOOST_COMPUTE_TYPE_NAME(Eigen::Vector4d, double4) BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix2d, double8) BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix4d, double16) #endif // BOOST_COMPUTE_INTEROP_EIGEN_EIGEN_HPP interop/eigen.hpp 0000644 00000001151 15125510617 0010025 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_EIGEN_HPP #define BOOST_COMPUTE_INTEROP_EIGEN_HPP #include <boost/compute/interop/eigen/core.hpp> #endif // BOOST_COMPUTE_INTEROP_EIGEN_HPP interop/opengl/acquire.hpp 0000644 00000007247 15125510617 0011667 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_ACQUIRE_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_ACQUIRE_HPP #include <boost/compute/command_queue.hpp> #include <boost/compute/interop/opengl/cl_gl.hpp> #include <boost/compute/interop/opengl/opengl_buffer.hpp> #include <boost/compute/types/fundamental.hpp> #include <boost/compute/utility/wait_list.hpp> namespace boost { namespace compute { /// Enqueues a command to acquire the specified OpenGL memory objects. /// /// \see_opencl_ref{clEnqueueAcquireGLObjects} inline event opengl_enqueue_acquire_gl_objects(const uint_ num_objects, const cl_mem *mem_objects, command_queue &queue, const wait_list &events = wait_list()) { BOOST_ASSERT(queue != 0); event event_; cl_int ret = clEnqueueAcquireGLObjects(queue.get(), num_objects, mem_objects, events.size(), events.get_event_ptr(), &event_.get()); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to release the specified OpenGL memory objects. /// /// \see_opencl_ref{clEnqueueReleaseGLObjects} inline event opengl_enqueue_release_gl_objects(const uint_ num_objects, const cl_mem *mem_objects, command_queue &queue, const wait_list &events = wait_list()) { BOOST_ASSERT(queue != 0); event event_; cl_int ret = clEnqueueReleaseGLObjects(queue.get(), num_objects, mem_objects, events.size(), events.get_event_ptr(), &event_.get()); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to acquire the specified OpenGL buffer. /// /// \see_opencl_ref{clEnqueueAcquireGLObjects} inline event opengl_enqueue_acquire_buffer(const opengl_buffer &buffer, command_queue &queue, const wait_list &events = wait_list()) { BOOST_ASSERT(buffer.get_context() == queue.get_context()); return opengl_enqueue_acquire_gl_objects(1, &buffer.get(), queue, events); } /// Enqueues a command to release the specified OpenGL buffer. /// /// \see_opencl_ref{clEnqueueReleaseGLObjects} inline event opengl_enqueue_release_buffer(const opengl_buffer &buffer, command_queue &queue, const wait_list &events = wait_list()) { BOOST_ASSERT(buffer.get_context() == queue.get_context()); return opengl_enqueue_release_gl_objects(1, &buffer.get(), queue, events); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_OPENGL_ACQUIRE_HPP interop/opengl/opengl_renderbuffer.hpp 0000644 00000007066 15125510617 0014252 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_RENDERBUFFER_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_RENDERBUFFER_HPP #include <boost/compute/image/image_object.hpp> #include <boost/compute/interop/opengl/gl.hpp> #include <boost/compute/interop/opengl/cl_gl.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/utility/extents.hpp> namespace boost { namespace compute { /// \class opengl_renderbuffer /// /// A OpenCL buffer for accessing an OpenGL renderbuffer object. class opengl_renderbuffer : public image_object { public: /// Creates a null OpenGL renderbuffer object. opengl_renderbuffer() : image_object() { } /// Creates a new OpenGL renderbuffer object for \p mem. explicit opengl_renderbuffer(cl_mem mem, bool retain = true) : image_object(mem, retain) { } /// Creates a new OpenGL renderbuffer object in \p context for /// \p renderbuffer with \p flags. /// /// \see_opencl_ref{clCreateFromGLRenderbuffer} opengl_renderbuffer(const context &context, GLuint renderbuffer, cl_mem_flags flags = read_write) { cl_int error = 0; m_mem = clCreateFromGLRenderbuffer( context, flags, renderbuffer, &error ); if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new OpenGL renderbuffer object as a copy of \p other. opengl_renderbuffer(const opengl_renderbuffer &other) : image_object(other) { } /// Copies the OpenGL renderbuffer object from \p other. opengl_renderbuffer& operator=(const opengl_renderbuffer &other) { if(this != &other){ image_object::operator=(other); } return *this; } /// Destroys the OpenGL buffer object. ~opengl_renderbuffer() { } /// Returns the size (width, height) of the renderbuffer. extents<2> size() const { extents<2> size; size[0] = get_image_info<size_t>(CL_IMAGE_WIDTH); size[1] = get_image_info<size_t>(CL_IMAGE_HEIGHT); return size; } /// Returns the origin of the renderbuffer (\c 0, \c 0). extents<2> origin() const { return extents<2>(); } /// Returns the OpenGL memory object ID. /// /// \see_opencl_ref{clGetGLObjectInfo} GLuint get_opengl_object() const { GLuint object = 0; clGetGLObjectInfo(m_mem, 0, &object); return object; } /// Returns the OpenGL memory object type. /// /// \see_opencl_ref{clGetGLObjectInfo} cl_gl_object_type get_opengl_type() const { cl_gl_object_type type; clGetGLObjectInfo(m_mem, &type, 0); return type; } }; namespace detail { // set_kernel_arg() specialization for opengl_renderbuffer template<> struct set_kernel_arg<opengl_renderbuffer> : public set_kernel_arg<image_object> { }; } // end detail namespace } // end compute namespace } // end boost namespace BOOST_COMPUTE_TYPE_NAME(boost::compute::opengl_renderbuffer, image2d_t) #endif // BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_RENDERBUFFER_HPP interop/opengl/gl.hpp 0000644 00000001223 15125510617 0010624 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_GL_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_GL_HPP #if defined(__APPLE__) #include <OpenGL/gl.h> #else #include <GL/gl.h> #endif #endif // BOOST_COMPUTE_INTEROP_OPENGL_GL_HPP interop/opengl/cl_gl.hpp 0000644 00000001323 15125510617 0011303 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_HPP #include <boost/compute/detail/cl_versions.hpp> #if defined(__APPLE__) #include <OpenCL/cl_gl.h> #else #include <CL/cl_gl.h> #endif #endif // BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_HPP interop/opengl/context.hpp 0000644 00000010725 15125510617 0011715 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_CONTEXT_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_CONTEXT_HPP #include <boost/throw_exception.hpp> #include <boost/compute/device.hpp> #include <boost/compute/system.hpp> #include <boost/compute/context.hpp> #include <boost/compute/exception/unsupported_extension_error.hpp> #include <boost/compute/interop/opengl/cl_gl.hpp> #ifdef __APPLE__ #include <OpenCL/cl_gl_ext.h> #include <OpenGL/OpenGL.h> #endif #ifdef __linux__ #include <GL/glx.h> #endif namespace boost { namespace compute { /// Creates a shared OpenCL/OpenGL context for the currently active /// OpenGL context. /// /// Once created, the shared context can be used to create OpenCL memory /// objects which can interact with OpenGL memory objects (e.g. VBOs). /// /// \throws unsupported_extension_error if no CL-GL sharing capable devices /// are found. inline context opengl_create_shared_context() { // name of the OpenGL sharing extension for the system #if defined(__APPLE__) const char *cl_gl_sharing_extension = "cl_APPLE_gl_sharing"; #else const char *cl_gl_sharing_extension = "cl_khr_gl_sharing"; #endif #if defined(__APPLE__) // get OpenGL share group CGLContextObj cgl_current_context = CGLGetCurrentContext(); CGLShareGroupObj cgl_share_group = CGLGetShareGroup(cgl_current_context); cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties) cgl_share_group, 0 }; cl_int error = 0; cl_context cl_gl_context = clCreateContext(properties, 0, 0, 0, 0, &error); if(!cl_gl_context){ BOOST_THROW_EXCEPTION(opencl_error(error)); } return context(cl_gl_context, false); #else typedef cl_int(*GetGLContextInfoKHRFunction)( const cl_context_properties*, cl_gl_context_info, size_t, void *, size_t * ); std::vector<platform> platforms = system::platforms(); for(size_t i = 0; i < platforms.size(); i++){ const platform &platform = platforms[i]; // check whether this platform supports OpenCL/OpenGL sharing if (!platform.supports_extension(cl_gl_sharing_extension)) continue; // load clGetGLContextInfoKHR() extension function GetGLContextInfoKHRFunction GetGLContextInfoKHR = reinterpret_cast<GetGLContextInfoKHRFunction>( reinterpret_cast<size_t>( platform.get_extension_function_address("clGetGLContextInfoKHR") ) ); if(!GetGLContextInfoKHR){ continue; } // create context properties listing the platform and current OpenGL display cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform.id(), #if defined(__linux__) CL_GL_CONTEXT_KHR, (cl_context_properties) glXGetCurrentContext(), CL_GLX_DISPLAY_KHR, (cl_context_properties) glXGetCurrentDisplay(), #elif defined(_WIN32) CL_GL_CONTEXT_KHR, (cl_context_properties) wglGetCurrentContext(), CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(), #endif 0 }; // lookup current OpenCL device for current OpenGL context cl_device_id gpu_id; cl_int ret = GetGLContextInfoKHR( properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &gpu_id, 0 ); if(ret != CL_SUCCESS){ continue; } // create device object for the GPU and ensure it supports CL-GL sharing device gpu(gpu_id, false); if(!gpu.supports_extension(cl_gl_sharing_extension)){ continue; } // return CL-GL sharing context return context(gpu, properties); } #endif // no CL-GL sharing capable devices found BOOST_THROW_EXCEPTION( unsupported_extension_error(cl_gl_sharing_extension) ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_OPENGL_CONTEXT_HPP interop/opengl/cl_gl_ext.hpp 0000644 00000001347 15125510617 0012171 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_EXT_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_EXT_HPP #include <boost/compute/detail/cl_versions.hpp> #if defined(__APPLE__) #include <OpenCL/cl_gl_ext.h> #else #include <CL/cl_gl_ext.h> #endif #endif // BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_EXT_HPP interop/opengl/opengl_texture.hpp 0000644 00000007546 15125510617 0013304 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_TEXTURE_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_TEXTURE_HPP #include <boost/compute/image/image_object.hpp> #include <boost/compute/interop/opengl/gl.hpp> #include <boost/compute/interop/opengl/cl_gl.hpp> #include <boost/compute/detail/get_object_info.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/utility/extents.hpp> namespace boost { namespace compute { /// \class opengl_texture /// /// A OpenCL image2d for accessing an OpenGL texture object. class opengl_texture : public image_object { public: /// Creates a null OpenGL texture object. opengl_texture() : image_object() { } /// Creates a new OpenGL texture object for \p mem. explicit opengl_texture(cl_mem mem, bool retain = true) : image_object(mem, retain) { } /// Creates a new OpenGL texture object in \p context for \p texture /// with \p flags. /// /// \see_opencl_ref{clCreateFromGLTexture} opengl_texture(const context &context, GLenum texture_target, GLint miplevel, GLuint texture, cl_mem_flags flags = read_write) { cl_int error = 0; #ifdef BOOST_COMPUTE_CL_VERSION_1_2 m_mem = clCreateFromGLTexture(context, flags, texture_target, miplevel, texture, &error); #else m_mem = clCreateFromGLTexture2D(context, flags, texture_target, miplevel, texture, &error); #endif if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new OpenGL texture object as a copy of \p other. opengl_texture(const opengl_texture &other) : image_object(other) { } /// Copies the OpenGL texture object from \p other. opengl_texture& operator=(const opengl_texture &other) { if(this != &other){ image_object::operator=(other); } return *this; } /// Destroys the texture object. ~opengl_texture() { } /// Returns the size (width, height) of the texture. extents<2> size() const { extents<2> size; size[0] = get_image_info<size_t>(CL_IMAGE_WIDTH); size[1] = get_image_info<size_t>(CL_IMAGE_HEIGHT); return size; } /// Returns the origin of the texture (\c 0, \c 0). extents<2> origin() const { return extents<2>(); } /// Returns information about the texture. /// /// \see_opencl_ref{clGetGLTextureInfo} template<class T> T get_texture_info(cl_gl_texture_info info) const { return detail::get_object_info<T>(clGetGLTextureInfo, m_mem, info); } }; namespace detail { // set_kernel_arg() specialization for opengl_texture template<> struct set_kernel_arg<opengl_texture> : public set_kernel_arg<image_object> { }; } // end detail namespace } // end compute namespace } // end boost namespace BOOST_COMPUTE_TYPE_NAME(boost::compute::opengl_texture, image2d_t) #endif // BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_TEXTURE_HPP interop/opengl/opengl_buffer.hpp 0000644 00000005414 15125510617 0013045 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_BUFFER_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_BUFFER_HPP #include <boost/compute/buffer.hpp> #include <boost/compute/interop/opengl/gl.hpp> #include <boost/compute/interop/opengl/cl_gl.hpp> namespace boost { namespace compute { /// \class opengl_buffer /// /// A OpenCL buffer for accessing an OpenGL memory object. class opengl_buffer : public buffer { public: /// Creates a null OpenGL buffer object. opengl_buffer() : buffer() { } /// Creates a new OpenGL buffer object for \p mem. explicit opengl_buffer(cl_mem mem, bool retain = true) : buffer(mem, retain) { } /// Creates a new OpenGL buffer object in \p context for \p bufobj /// with \p flags. /// /// \see_opencl_ref{clCreateFromGLBuffer} opengl_buffer(const context &context, GLuint bufobj, cl_mem_flags flags = read_write) { cl_int error = 0; m_mem = clCreateFromGLBuffer(context, flags, bufobj, &error); if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new OpenGL buffer object as a copy of \p other. opengl_buffer(const opengl_buffer &other) : buffer(other) { } /// Copies the OpenGL buffer object from \p other. opengl_buffer& operator=(const opengl_buffer &other) { if(this != &other){ buffer::operator=(other); } return *this; } /// Destroys the OpenGL buffer object. ~opengl_buffer() { } /// Returns the OpenGL memory object ID. /// /// \see_opencl_ref{clGetGLObjectInfo} GLuint get_opengl_object() const { GLuint object = 0; clGetGLObjectInfo(m_mem, 0, &object); return object; } /// Returns the OpenGL memory object type. /// /// \see_opencl_ref{clGetGLObjectInfo} cl_gl_object_type get_opengl_type() const { cl_gl_object_type type; clGetGLObjectInfo(m_mem, &type, 0); return type; } }; namespace detail { // set_kernel_arg specialization for opengl_buffer template<> struct set_kernel_arg<opengl_buffer> : set_kernel_arg<memory_object> { }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_BUFFER_HPP interop/vtk.hpp 0000644 00000001372 15125510617 0007547 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_VTK_HPP #define BOOST_COMPUTE_INTEROP_VTK_HPP #include <boost/compute/interop/vtk/bounds.hpp> #include <boost/compute/interop/vtk/data_array.hpp> #include <boost/compute/interop/vtk/matrix4x4.hpp> #include <boost/compute/interop/vtk/points.hpp> #endif // BOOST_COMPUTE_INTEROP_VTK_HPP interop/vtk/data_array.hpp 0000644 00000005165 15125510617 0011662 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_VTK_DATA_ARRAY_HPP #define BOOST_COMPUTE_INTEROP_VTK_DATA_ARRAY_HPP #include <vtkDataArray.h> #include <vtkDataArrayTemplate.h> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/algorithm/copy_n.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> namespace boost { namespace compute { /// Copies the values in \p data to \p buffer. template<class T> inline void vtk_copy_data_array_to_buffer(const vtkDataArray *data, buffer_iterator<T> buffer, command_queue &queue = system::default_queue()); /// \internal_ template<class T> inline void vtk_copy_data_array_to_buffer(const vtkDataArrayTemplate<T> *data, buffer_iterator<T> buffer, command_queue &queue = system::default_queue()) { vtkDataArrayTemplate<T> *data_ = const_cast<vtkDataArrayTemplate<T> *>(data); const T *data_ptr = static_cast<const T *>(data_->GetVoidPointer(0)); size_t data_size = data_->GetNumberOfComponents() * data_->GetNumberOfTuples(); ::boost::compute::copy_n(data_ptr, data_size, buffer, queue); } /// Copies the values in the range [\p first, \p last) to \p data. template<class T> inline void vtk_copy_buffer_to_data_array(buffer_iterator<T> first, buffer_iterator<T> last, vtkDataArray *data, command_queue &queue = system::default_queue()); /// \internal_ template<class T> inline void vtk_copy_buffer_to_data_array(buffer_iterator<T> first, buffer_iterator<T> last, vtkDataArrayTemplate<T> *data, command_queue &queue = system::default_queue()) { T *data_ptr = static_cast<T *>(data->GetVoidPointer(0)); ::boost::compute::copy(first, last, data_ptr, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_VTK_DATA_ARRAY_HPP interop/vtk/points.hpp 0000644 00000003461 15125510617 0011064 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_VTK_POINTS_HPP #define BOOST_COMPUTE_INTEROP_VTK_POINTS_HPP #include <vector> #include <vtkPoints.h> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> namespace boost { namespace compute { /// Copies \p points to \p buffer. /// /// For example, to copy from a \c vtkPoints object to a \c vector<float4_>: /// \code /// vtkPoints *points = ... /// vector<float4_> vector(points->GetNumberOfPoints(), context); /// vtk_copy_points_to_buffer(points, vector.begin(), queue); /// \endcode template<class PointType> inline void vtk_copy_points_to_buffer(const vtkPoints *points, buffer_iterator<PointType> buffer, command_queue &queue = system::default_queue()) { vtkPoints *points_ = const_cast<vtkPoints *>(points); // copy points to aligned buffer std::vector<PointType> tmp(points_->GetNumberOfPoints()); for(vtkIdType i = 0; i < points_->GetNumberOfPoints(); i++){ double *p = points_->GetPoint(i); tmp[i] = PointType(p[0], p[1], p[2], 1); } // copy data to device copy(tmp.begin(), tmp.end(), buffer, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_VTK_POINTS_HPP interop/vtk/matrix4x4.hpp 0000644 00000002415 15125510617 0011412 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_VTK_MATRIX4X4_HPP #define BOOST_COMPUTE_INTEROP_VTK_MATRIX4X4_HPP #include <vtkMatrix4x4.h> #include <boost/compute/types/fundamental.hpp> namespace boost { namespace compute { /// Converts a \c vtkMatrix4x4 to a \c float16_. inline float16_ vtk_matrix4x4_to_float16(const vtkMatrix4x4 *matrix) { float16_ result; for(int i = 0; i < 4; i++){ for(int j = 0; j < 4; j++){ result[i*4+j] = matrix->GetElement(i, j); } } return result; } /// Converts a \c vtkMatrix4x4 to a \c double16_; inline double16_ vtk_matrix4x4_to_double16(const vtkMatrix4x4 *matrix) { double16_ result; std::memcpy(&result, matrix->Element, 16 * sizeof(double)); return result; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_VTK_MATRIX4X4_HPP interop/vtk/bounds.hpp 0000644 00000004006 15125510617 0011036 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_VTK_BOUNDS_HPP #define BOOST_COMPUTE_INTEROP_VTK_BOUNDS_HPP #include <vector> #include <iterator> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy_n.hpp> #include <boost/compute/algorithm/reduce.hpp> #include <boost/compute/container/array.hpp> namespace boost { namespace compute { /// Calculates the bounds for the points in the range [\p first, \p last) and /// stores the result in \p bounds. /// /// For example, this can be used to implement the GetBounds() method for a /// vtkMapper subclass. template<class PointIterator> inline void vtk_compute_bounds(PointIterator first, PointIterator last, double bounds[6], command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits<PointIterator>::value_type T; const context &context = queue.get_context(); // compute min and max point array<T, 2> extrema(context); reduce(first, last, extrema.begin() + 0, min<T>(), queue); reduce(first, last, extrema.begin() + 1, max<T>(), queue); // copy results to host buffer std::vector<T> buffer(2); copy_n(extrema.begin(), 2, buffer.begin(), queue); // copy to vtk-style bounds bounds[0] = buffer[0][0]; bounds[1] = buffer[1][0]; bounds[2] = buffer[0][1]; bounds[3] = buffer[1][1]; bounds[4] = buffer[0][2]; bounds[5] = buffer[1][2]; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_VTK_BOUNDS_HPP interop/opengl.hpp 0000644 00000001655 15125510617 0010233 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_HPP /// \file /// /// Meta-header to include all Boost.Compute OpenGL interop headers. #include <boost/compute/interop/opengl/acquire.hpp> #include <boost/compute/interop/opengl/context.hpp> #include <boost/compute/interop/opengl/opengl_buffer.hpp> #include <boost/compute/interop/opengl/opengl_renderbuffer.hpp> #include <boost/compute/interop/opengl/opengl_texture.hpp> #endif // BOOST_COMPUTE_INTEROP_OPENGL_HPP interop/qt.hpp 0000644 00000001215 15125510617 0007363 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_QT_HPP #define BOOST_COMPUTE_INTEROP_QT_HPP #include <boost/compute/interop/qt/qtcore.hpp> #include <boost/compute/interop/qt/qtgui.hpp> #endif // BOOST_COMPUTE_INTEROP_QT_HPP interop/opencv/core.hpp 0000644 00000011211 15125510617 0011156 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENCV_CORE_HPP #define BOOST_COMPUTE_INTEROP_OPENCV_CORE_HPP #include <opencv2/core/core.hpp> #include <boost/throw_exception.hpp> #include <boost/compute/algorithm/copy_n.hpp> #include <boost/compute/exception/opencl_error.hpp> #include <boost/compute/image/image2d.hpp> #include <boost/compute/image/image_format.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> namespace boost { namespace compute { template<class T> inline void opencv_copy_mat_to_buffer(const cv::Mat &mat, buffer_iterator<T> buffer, command_queue &queue = system::default_queue()) { BOOST_ASSERT(mat.isContinuous()); ::boost::compute::copy_n( reinterpret_cast<T *>(mat.data), mat.rows * mat.cols, buffer, queue ); } template<class T> inline void opencv_copy_buffer_to_mat(const buffer_iterator<T> buffer, cv::Mat &mat, command_queue &queue = system::default_queue()) { BOOST_ASSERT(mat.isContinuous()); ::boost::compute::copy_n( buffer, mat.cols * mat.rows, reinterpret_cast<T *>(mat.data), queue ); } inline void opencv_copy_mat_to_image(const cv::Mat &mat, image2d &image, command_queue &queue = system::default_queue()) { BOOST_ASSERT(mat.data != 0); BOOST_ASSERT(mat.isContinuous()); BOOST_ASSERT(image.get_context() == queue.get_context()); queue.enqueue_write_image(image, image.origin(), image.size(), mat.data); } inline void opencv_copy_image_to_mat(const image2d &image, cv::Mat &mat, command_queue &queue = system::default_queue()) { BOOST_ASSERT(mat.isContinuous()); BOOST_ASSERT(image.get_context() == queue.get_context()); queue.enqueue_read_image(image, image.origin(), image.size(), mat.data); } inline image_format opencv_get_mat_image_format(const cv::Mat &mat) { switch(mat.type()){ case CV_8UC4: return image_format(CL_BGRA, CL_UNORM_INT8); case CV_16UC4: return image_format(CL_BGRA, CL_UNORM_INT16); case CV_32F: return image_format(CL_INTENSITY, CL_FLOAT); case CV_32FC4: return image_format(CL_RGBA, CL_FLOAT); case CV_8UC1: return image_format(CL_INTENSITY, CL_UNORM_INT8); } BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED)); } inline cv::Mat opencv_create_mat_with_image2d(const image2d &image, command_queue &queue = system::default_queue()) { BOOST_ASSERT(image.get_context() == queue.get_context()); cv::Mat mat; image_format format = image.get_format(); const cl_image_format *cl_image_format = format.get_format_ptr(); if(cl_image_format->image_channel_data_type == CL_UNORM_INT8 && cl_image_format->image_channel_order == CL_BGRA) { mat = cv::Mat(image.height(), image.width(), CV_8UC4); } else if(cl_image_format->image_channel_data_type == CL_UNORM_INT16 && cl_image_format->image_channel_order == CL_BGRA) { mat = cv::Mat(image.height(), image.width(), CV_16UC4); } else if(cl_image_format->image_channel_data_type == CL_FLOAT && cl_image_format->image_channel_order == CL_INTENSITY) { mat = cv::Mat(image.height(), image.width(), CV_32FC1); } else { mat = cv::Mat(image.height(), image.width(), CV_8UC1); } opencv_copy_image_to_mat(image, mat, queue); return mat; } inline image2d opencv_create_image2d_with_mat(const cv::Mat &mat, cl_mem_flags flags, command_queue &queue = system::default_queue()) { const context &context = queue.get_context(); const image_format format = opencv_get_mat_image_format(mat); image2d image(context, mat.cols, mat.rows, format, flags); opencv_copy_mat_to_image(mat, image, queue); return image; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_OPENCV_CORE_HPP interop/opencv/ocl.hpp 0000644 00000002546 15125510617 0011016 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENCV_OCL_HPP #define BOOST_COMPUTE_INTEROP_OPENCV_OCL_HPP #include <opencv2/ocl/ocl.hpp> #include <boost/compute/buffer.hpp> #include <boost/compute/context.hpp> #include <boost/compute/command_queue.hpp> namespace boost { namespace compute { context opencv_ocl_get_context() { void *ocl_context = cv::ocl::getoclContext(); if(!ocl_context){ return context(); } return context(*(static_cast<cl_context *>(ocl_context))); } command_queue opencv_ocl_get_command_queue() { void *ocl_queue = cv::ocl::getoclCommandQueue(); if(!ocl_queue){ return command_queue(); } return command_queue(*(static_cast<cl_command_queue *>(ocl_queue))); } buffer opencv_ocl_get_buffer(const cv::ocl::oclMat &mat) { return buffer(reinterpret_cast<cl_mem>(mat.data)); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_OPENCV_OCL_HPP interop/opencv/highgui.hpp 0000644 00000002043 15125510617 0011655 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENCV_HIGHGUI_HPP #define BOOST_COMPUTE_INTEROP_OPENCV_HIGHGUI_HPP #include <opencv2/highgui/highgui.hpp> #include <boost/compute/interop/opencv/core.hpp> namespace boost { namespace compute { inline void opencv_imshow(const std::string &winname, const image2d &image, command_queue &queue = system::default_queue()) { const cv::Mat mat = opencv_create_mat_with_image2d(image, queue); cv::imshow(winname, mat); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_OPENCV_HIGHGUI_HPP interop/qt/qimage.hpp 0000644 00000004175 15125510617 0010636 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_QT_QIMAGE_HPP #define BOOST_COMPUTE_INTEROP_QT_QIMAGE_HPP #include <boost/throw_exception.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/exception/opencl_error.hpp> #include <boost/compute/image/image2d.hpp> #include <boost/compute/image/image_format.hpp> #include <boost/compute/utility/dim.hpp> #include <QImage> namespace boost { namespace compute { inline image_format qt_qimage_format_to_image_format(const QImage::Format &format) { if(format == QImage::Format_RGB32){ return image_format(image_format::bgra, image_format::unorm_int8); } BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED)); } inline QImage::Format qt_image_format_to_qimage_format(const image_format &format) { if(format == image_format(image_format::bgra, image_format::unorm_int8)){ return QImage::Format_RGB32; } return QImage::Format_Invalid; } inline image_format qt_qimage_get_format(const QImage &image) { return qt_qimage_format_to_image_format(image.format()); } inline void qt_copy_qimage_to_image2d(const QImage &qimage, image2d &image, command_queue &queue) { queue.enqueue_write_image(image, image.origin(), image.size(), qimage.constBits()); } inline void qt_copy_image2d_to_qimage(const image2d &image, QImage &qimage, command_queue &queue) { queue.enqueue_read_image( image, dim(0, 0), dim(qimage.width(), qimage.height()), qimage.bits() ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_QT_QIMAGE_HPP interop/qt/qvector.hpp 0000644 00000002546 15125510617 0011056 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_QT_QVECTOR_HPP #define BOOST_COMPUTE_INTEROP_QT_QVECTOR_HPP #include <boost/compute/detail/is_contiguous_iterator.hpp> #include <QVector> namespace boost { namespace compute { namespace detail { template<class Iterator> struct _is_contiguous_iterator< Iterator, typename boost::enable_if< typename boost::is_same< Iterator, typename QVector<typename Iterator::value_type>::iterator >::type >::type > : public boost::true_type {}; template<class Iterator> struct _is_contiguous_iterator< Iterator, typename boost::enable_if< typename boost::is_same< Iterator, typename QVector<typename Iterator::value_type>::const_iterator >::type >::type > : public boost::true_type {}; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_QT_QVECTOR_HPP interop/qt/qpointf.hpp 0000644 00000001273 15125510617 0011047 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_QT_QPOINTF_HPP #define BOOST_COMPUTE_INTEROP_QT_QPOINTF_HPP #include <QPointF> #include <boost/compute/type_traits/type_name.hpp> BOOST_COMPUTE_TYPE_NAME(QPointF, "float2") #endif // BOOST_COMPUTE_INTEROP_QT_QPOINTF_HPP interop/qt/qtgui.hpp 0000644 00000001161 15125510617 0010514 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_QT_QTGUI_HPP #define BOOST_COMPUTE_INTEROP_QT_QTGUI_HPP #include <boost/compute/interop/qt/qimage.hpp> #endif // BOOST_COMPUTE_INTEROP_QT_QTGUI_HPP interop/qt/qpoint.hpp 0000644 00000001264 15125510617 0010701 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_QT_QPOINT_HPP #define BOOST_COMPUTE_INTEROP_QT_QPOINT_HPP #include <QPoint> #include <boost/compute/type_traits/type_name.hpp> BOOST_COMPUTE_TYPE_NAME(QPoint, "int2") #endif // BOOST_COMPUTE_INTEROP_QT_QPOINT_HPP interop/qt/qtcore.hpp 0000644 00000001324 15125510617 0010661 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_QT_QTCORE_HPP #define BOOST_COMPUTE_INTEROP_QT_QTCORE_HPP #include <boost/compute/interop/qt/qpoint.hpp> #include <boost/compute/interop/qt/qpointf.hpp> #include <boost/compute/interop/qt/qvector.hpp> #endif // BOOST_COMPUTE_INTEROP_QT_QTCORE_HPP interop/opencv.hpp 0000644 00000001241 15125510617 0010230 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENCV_HPP #define BOOST_COMPUTE_INTEROP_OPENCV_HPP #include <boost/compute/interop/opencv/core.hpp> #include <boost/compute/interop/opencv/highgui.hpp> #endif // BOOST_COMPUTE_INTEROP_OPENCV_HPP memory/svm_ptr.hpp 0000644 00000010536 15125510617 0010267 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_MEMORY_SVM_PTR_HPP #define BOOST_COMPUTE_MEMORY_SVM_PTR_HPP #include <boost/type_traits.hpp> #include <boost/static_assert.hpp> #include <boost/assert.hpp> #include <boost/compute/cl.hpp> #include <boost/compute/kernel.hpp> #include <boost/compute/context.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { // forward declaration for svm_ptr<T> template<class T> class svm_ptr; // svm functions require OpenCL 2.0 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) namespace detail { template<class T, class IndexExpr> struct svm_ptr_index_expr { typedef T result_type; svm_ptr_index_expr(const svm_ptr<T> &svm_ptr, const IndexExpr &expr) : m_svm_ptr(svm_ptr), m_expr(expr) { } operator T() const { BOOST_STATIC_ASSERT_MSG(boost::is_integral<IndexExpr>::value, "Index expression must be integral"); BOOST_ASSERT(m_svm_ptr.get()); const context &context = m_svm_ptr.get_context(); const device &device = context.get_device(); command_queue queue(context, device); T value; T* ptr = static_cast<T*>(m_svm_ptr.get()) + static_cast<std::ptrdiff_t>(m_expr); queue.enqueue_svm_map(static_cast<void*>(ptr), sizeof(T), CL_MAP_READ); value = *(ptr); queue.enqueue_svm_unmap(static_cast<void*>(ptr)).wait(); return value; } const svm_ptr<T> &m_svm_ptr; IndexExpr m_expr; }; } // end detail namespace #endif template<class T> class svm_ptr { public: typedef T value_type; typedef std::ptrdiff_t difference_type; typedef T* pointer; typedef T& reference; typedef std::random_access_iterator_tag iterator_category; svm_ptr() : m_ptr(0) { } svm_ptr(void *ptr, const context &context) : m_ptr(static_cast<T*>(ptr)), m_context(context) { } svm_ptr(const svm_ptr<T> &other) : m_ptr(other.m_ptr), m_context(other.m_context) { } svm_ptr<T>& operator=(const svm_ptr<T> &other) { m_ptr = other.m_ptr; m_context = other.m_context; return *this; } ~svm_ptr() { } void* get() const { return m_ptr; } svm_ptr<T> operator+(difference_type n) { return svm_ptr<T>(m_ptr + n, m_context); } difference_type operator-(svm_ptr<T> other) { BOOST_ASSERT(other.m_context == m_context); return m_ptr - other.m_ptr; } const context& get_context() const { return m_context; } bool operator==(const svm_ptr<T>& other) const { return (other.m_context == m_context) && (m_ptr == other.m_ptr); } bool operator!=(const svm_ptr<T>& other) const { return (other.m_context != m_context) || (m_ptr != other.m_ptr); } // svm functions require OpenCL 2.0 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// \internal_ template<class Expr> detail::svm_ptr_index_expr<T, Expr> operator[](const Expr &expr) const { BOOST_ASSERT(m_ptr); return detail::svm_ptr_index_expr<T, Expr>(*this, expr); } #endif private: T *m_ptr; context m_context; }; namespace detail { /// \internal_ template<class T> struct set_kernel_arg<svm_ptr<T> > { void operator()(kernel &kernel_, size_t index, const svm_ptr<T> &ptr) { kernel_.set_arg_svm_ptr(index, ptr.get()); } }; } // end detail namespace /// \internal_ (is_device_iterator specialization for svm_ptr) template<class T> struct is_device_iterator<svm_ptr<T> > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_MEMORY_SVM_PTR_HPP memory/local_buffer.hpp 0000644 00000004314 15125510617 0011215 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_MEMORY_LOCAL_BUFFER_HPP #define BOOST_COMPUTE_MEMORY_LOCAL_BUFFER_HPP #include <boost/compute/cl.hpp> #include <boost/compute/kernel.hpp> namespace boost { namespace compute { /// \class local_buffer /// \brief Represents a local memory buffer on the device. /// /// The local_buffer class represents a block of local memory on a compute /// device. /// /// This class is most commonly used to set local memory arguments for compute /// kernels: /// \code /// // set argument to a local buffer with storage for 32 float's /// kernel.set_arg(0, local_buffer<float>(32)); /// \endcode /// /// \see buffer, kernel template<class T> class local_buffer { public: /// Creates a local buffer object for \p size elements. local_buffer(const size_t size) : m_size(size) { } /// Creates a local buffer object as a copy of \p other. local_buffer(const local_buffer &other) : m_size(other.m_size) { } /// Copies \p other to \c *this. local_buffer& operator=(const local_buffer &other) { if(this != &other){ m_size = other.m_size; } return *this; } /// Destroys the local memory object. ~local_buffer() { } /// Returns the number of elements in the local buffer. size_t size() const { return m_size; } private: size_t m_size; }; namespace detail { // set_kernel_arg specialization for local_buffer<T> template<class T> struct set_kernel_arg<local_buffer<T> > { void operator()(kernel &kernel_, size_t index, const local_buffer<T> &buffer) { kernel_.set_arg(index, buffer.size() * sizeof(T), 0); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_MEMORY_SVM_PTR_HPP random/uniform_int_distribution.hpp 0000644 00000007016 15125510617 0013674 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP #define BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP #include <limits> #include <boost/type_traits.hpp> #include <boost/static_assert.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/function.hpp> #include <boost/compute/types/fundamental.hpp> #include <boost/compute/algorithm/copy_if.hpp> #include <boost/compute/algorithm/transform.hpp> namespace boost { namespace compute { /// \class uniform_int_distribution /// \brief Produces uniformily distributed random integers /// /// The following example shows how to setup a uniform int distribution to /// produce random integers 0 and 1. /// /// \snippet test/test_uniform_int_distribution.cpp generate /// template<class IntType = uint_> class uniform_int_distribution { public: typedef IntType result_type; /// Creates a new uniform distribution producing numbers in the range /// [\p a, \p b]. explicit uniform_int_distribution(IntType a = 0, IntType b = (std::numeric_limits<IntType>::max)()) : m_a(a), m_b(b) { } /// Destroys the uniform_int_distribution object. ~uniform_int_distribution() { } /// Returns the minimum value of the distribution. result_type a() const { return m_a; } /// Returns the maximum value of the distribution. result_type b() const { return m_b; } /// Generates uniformily distributed integers and stores /// them to the range [\p first, \p last). template<class OutputIterator, class Generator> void generate(OutputIterator first, OutputIterator last, Generator &generator, command_queue &queue) { size_t size = std::distance(first, last); typedef typename Generator::result_type g_result_type; vector<g_result_type> tmp(size, queue.get_context()); vector<g_result_type> tmp2(size, queue.get_context()); uint_ bound = ((uint_(-1))/(m_b-m_a+1))*(m_b-m_a+1); buffer_iterator<g_result_type> tmp2_iter; while(size>0) { generator.generate(tmp.begin(), tmp.begin() + size, queue); tmp2_iter = copy_if(tmp.begin(), tmp.begin() + size, tmp2.begin(), _1 <= bound, queue); size = std::distance(tmp2_iter, tmp2.end()); } BOOST_COMPUTE_FUNCTION(IntType, scale_random, (const g_result_type x), { return LO + (x % (HI-LO+1)); }); scale_random.define("LO", boost::lexical_cast<std::string>(m_a)); scale_random.define("HI", boost::lexical_cast<std::string>(m_b)); transform(tmp2.begin(), tmp2.end(), first, scale_random, queue); } private: IntType m_a; IntType m_b; BOOST_STATIC_ASSERT_MSG( boost::is_integral<IntType>::value, "Template argument must be integral" ); }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP random/linear_congruential_engine.hpp 0000644 00000016512 15125510617 0014116 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_LINEAR_CONGRUENTIAL_ENGINE_HPP #define BOOST_COMPUTE_RANDOM_LINEAR_CONGRUENTIAL_ENGINE_HPP #include <algorithm> #include <boost/compute/types.hpp> #include <boost/compute/buffer.hpp> #include <boost/compute/kernel.hpp> #include <boost/compute/context.hpp> #include <boost/compute/program.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/transform.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/iterator/discard_iterator.hpp> #include <boost/compute/utility/program_cache.hpp> namespace boost { namespace compute { /// /// \class linear_congruential_engine /// \brief 'Quick and Dirty' linear congruential engine /// /// Quick and dirty linear congruential engine to generate low quality /// random numbers very quickly. For uses in which good quality of random /// numbers is required(Monte-Carlo Simulations), use other engines like /// Mersenne Twister instead. /// template<class T = uint_> class linear_congruential_engine { public: typedef T result_type; static const T default_seed = 1; static const T a = 1099087573; static const size_t threads = 1024; /// Creates a new linear_congruential_engine and seeds it with \p value. explicit linear_congruential_engine(command_queue &queue, result_type value = default_seed) : m_context(queue.get_context()), m_multiplicands(m_context, threads * sizeof(result_type)) { // setup program load_program(); // seed state seed(value, queue); // generate multiplicands generate_multiplicands(queue); } /// Creates a new linear_congruential_engine object as a copy of \p other. linear_congruential_engine(const linear_congruential_engine<T> &other) : m_context(other.m_context), m_program(other.m_program), m_seed(other.m_seed), m_multiplicands(other.m_multiplicands) { } /// Copies \p other to \c *this. linear_congruential_engine<T>& operator=(const linear_congruential_engine<T> &other) { if(this != &other){ m_context = other.m_context; m_program = other.m_program; m_seed = other.m_seed; m_multiplicands = other.m_multiplicands; } return *this; } /// Destroys the linear_congruential_engine object. ~linear_congruential_engine() { } /// Seeds the random number generator with \p value. /// /// \param value seed value for the random-number generator /// \param queue command queue to perform the operation /// /// If no seed value is provided, \c default_seed is used. void seed(result_type value, command_queue &queue) { (void) queue; m_seed = value; } /// \overload void seed(command_queue &queue) { seed(default_seed, queue); } /// Generates random numbers and stores them to the range [\p first, \p last). template<class OutputIterator> void generate(OutputIterator first, OutputIterator last, command_queue &queue) { size_t size = detail::iterator_range_size(first, last); kernel fill_kernel(m_program, "fill"); fill_kernel.set_arg(1, m_multiplicands); fill_kernel.set_arg(2, first.get_buffer()); size_t offset = 0; for(;;){ size_t count = 0; if(size > threads){ count = (std::min)(static_cast<size_t>(threads), size - offset); } else { count = size; } fill_kernel.set_arg(0, static_cast<const uint_>(m_seed)); fill_kernel.set_arg(3, static_cast<const uint_>(offset)); queue.enqueue_1d_range_kernel(fill_kernel, 0, count, 0); offset += count; if(offset >= size){ break; } update_seed(queue); } } /// \internal_ void generate(discard_iterator first, discard_iterator last, command_queue &queue) { (void) queue; size_t size = detail::iterator_range_size(first, last); uint_ max_mult = detail::read_single_value<T>(m_multiplicands, threads-1, queue); while(size >= threads) { m_seed *= max_mult; size -= threads; } m_seed *= detail::read_single_value<T>(m_multiplicands, size-1, queue); } /// Generates random numbers, transforms them with \p op, and then stores /// them to the range [\p first, \p last). template<class OutputIterator, class Function> void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue) { vector<T> tmp(std::distance(first, last), queue.get_context()); generate(tmp.begin(), tmp.end(), queue); transform(tmp.begin(), tmp.end(), first, op, queue); } /// Generates \p z random numbers and discards them. void discard(size_t z, command_queue &queue) { generate(discard_iterator(0), discard_iterator(z), queue); } private: /// \internal_ /// Generates the multiplicands for each thread void generate_multiplicands(command_queue &queue) { kernel multiplicand_kernel = m_program.create_kernel("multiplicand"); multiplicand_kernel.set_arg(0, m_multiplicands); queue.enqueue_task(multiplicand_kernel); } /// \internal_ void update_seed(command_queue &queue) { m_seed *= detail::read_single_value<T>(m_multiplicands, threads-1, queue); } /// \internal_ void load_program() { boost::shared_ptr<program_cache> cache = program_cache::get_global_cache(m_context); std::string cache_key = std::string("__boost_linear_congruential_engine_") + type_name<T>(); const char source[] = "__kernel void multiplicand(__global uint *multiplicands)\n" "{\n" " uint a = 1099087573;\n" " multiplicands[0] = a;\n" " for(uint i = 1; i < 1024; i++){\n" " multiplicands[i] = a * multiplicands[i-1];\n" " }\n" "}\n" "__kernel void fill(const uint seed,\n" " __global uint *multiplicands,\n" " __global uint *result," " const uint offset)\n" "{\n" " const uint i = get_global_id(0);\n" " result[offset+i] = seed * multiplicands[i];\n" "}\n"; m_program = cache->get_or_build(cache_key, std::string(), source, m_context); } private: context m_context; program m_program; T m_seed; buffer m_multiplicands; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_LINEAR_CONGRUENTIAL_ENGINE_HPP random/bernoulli_distribution.hpp 0000644 00000005630 15125510617 0013336 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_BERNOULLI_DISTRIBUTION_HPP #define BOOST_COMPUTE_RANDOM_BERNOULLI_DISTRIBUTION_HPP #include <boost/assert.hpp> #include <boost/type_traits.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/function.hpp> #include <boost/compute/types/fundamental.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/literal.hpp> namespace boost { namespace compute { /// /// \class bernoulli_distribution /// \brief Produces random boolean values according to the following /// discrete probability function with parameter p : /// P(true/p) = p and P(false/p) = (1 - p) /// /// The following example shows how to setup a bernoulli distribution to /// produce random boolean values with parameter p = 0.25 /// /// \snippet test/test_bernoulli_distribution.cpp generate /// template<class RealType = float> class bernoulli_distribution { public: /// Creates a new bernoulli distribution bernoulli_distribution(RealType p = 0.5f) : m_p(p) { } /// Destroys the bernoulli_distribution object ~bernoulli_distribution() { } /// Returns the value of the parameter p RealType p() const { return m_p; } /// Generates bernoulli distributed booleans and stores /// them in the range [\p first, \p last). template<class OutputIterator, class Generator> void generate(OutputIterator first, OutputIterator last, Generator &generator, command_queue &queue) { size_t count = detail::iterator_range_size(first, last); vector<uint_> tmp(count, queue.get_context()); generator.generate(tmp.begin(), tmp.end(), queue); BOOST_COMPUTE_FUNCTION(bool, scale_random, (const uint_ x), { return (convert_RealType(x) / MAX_RANDOM) < PARAM; }); scale_random.define("PARAM", detail::make_literal(m_p)); scale_random.define("MAX_RANDOM", "UINT_MAX"); scale_random.define( "convert_RealType", std::string("convert_") + type_name<RealType>() ); transform( tmp.begin(), tmp.end(), first, scale_random, queue ); } private: RealType m_p; BOOST_STATIC_ASSERT_MSG( boost::is_floating_point<RealType>::value, "Template argument must be a floating point type" ); }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_BERNOULLI_DISTRIBUTION_HPP random/uniform_real_distribution.hpp 0000644 00000006622 15125510617 0014027 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_UNIFORM_REAL_DISTRIBUTION_HPP #define BOOST_COMPUTE_RANDOM_UNIFORM_REAL_DISTRIBUTION_HPP #include <boost/assert.hpp> #include <boost/type_traits.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/function.hpp> #include <boost/compute/detail/literal.hpp> #include <boost/compute/types/fundamental.hpp> namespace boost { namespace compute { /// \class uniform_real_distribution /// \brief Produces uniformly distributed random floating-point numbers. /// /// The following example shows how to setup a uniform real distribution to /// produce random \c float values between \c 1 and \c 100. /// /// \snippet test/test_uniform_real_distribution.cpp generate /// /// \see default_random_engine, normal_distribution template<class RealType = float> class uniform_real_distribution { public: typedef RealType result_type; /// Creates a new uniform distribution producing numbers in the range /// [\p a, \p b). /// Requires a < b uniform_real_distribution(RealType a = 0.f, RealType b = 1.f) : m_a(a), m_b(b) { BOOST_ASSERT(a < b); } /// Destroys the uniform_real_distribution object. ~uniform_real_distribution() { } /// Returns the minimum value of the distribution. result_type a() const { return m_a; } /// Returns the maximum value of the distribution. result_type b() const { return m_b; } /// Generates uniformly distributed floating-point numbers and stores /// them to the range [\p first, \p last). template<class OutputIterator, class Generator> void generate(OutputIterator first, OutputIterator last, Generator &generator, command_queue &queue) { BOOST_COMPUTE_FUNCTION(RealType, scale_random, (const uint_ x), { return nextafter(LO + (convert_RealType(x) / MAX_RANDOM) * (HI - LO), (RealType) LO); }); scale_random.define("LO", detail::make_literal(m_a)); scale_random.define("HI", detail::make_literal(m_b)); scale_random.define("MAX_RANDOM", "UINT_MAX"); scale_random.define( "convert_RealType", std::string("convert_") + type_name<RealType>() ); scale_random.define("RealType", type_name<RealType>()); generator.generate( first, last, scale_random, queue ); } /// \internal_ (deprecated) template<class OutputIterator, class Generator> void fill(OutputIterator first, OutputIterator last, Generator &g, command_queue &queue) { generate(first, last, g, queue); } private: RealType m_a; RealType m_b; BOOST_STATIC_ASSERT_MSG( boost::is_floating_point<RealType>::value, "Template argument must be a floating point type" ); }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_UNIFORM_REAL_DISTRIBUTION_HPP random/normal_distribution.hpp 0000644 00000011042 15125510617 0012625 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_NORMAL_DISTRIBUTION_HPP #define BOOST_COMPUTE_RANDOM_NORMAL_DISTRIBUTION_HPP #include <limits> #include <boost/assert.hpp> #include <boost/type_traits.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/function.hpp> #include <boost/compute/types/fundamental.hpp> #include <boost/compute/type_traits/make_vector_type.hpp> namespace boost { namespace compute { /// \class normal_distribution /// \brief Produces random, normally-distributed floating-point numbers. /// /// The following example shows how to setup a normal distribution to /// produce random \c float values centered at \c 5: /// /// \snippet test/test_normal_distribution.cpp generate /// /// \see default_random_engine, uniform_real_distribution template<class RealType = float> class normal_distribution { public: typedef RealType result_type; /// Creates a new normal distribution producing numbers with the given /// \p mean and \p stddev. normal_distribution(RealType mean = 0.f, RealType stddev = 1.f) : m_mean(mean), m_stddev(stddev) { } /// Destroys the normal distribution object. ~normal_distribution() { } /// Returns the mean value of the distribution. result_type mean() const { return m_mean; } /// Returns the standard-deviation of the distribution. result_type stddev() const { return m_stddev; } /// Returns the minimum value of the distribution. result_type min BOOST_PREVENT_MACRO_SUBSTITUTION () const { return -std::numeric_limits<RealType>::infinity(); } /// Returns the maximum value of the distribution. result_type max BOOST_PREVENT_MACRO_SUBSTITUTION () const { return std::numeric_limits<RealType>::infinity(); } /// Generates normally-distributed floating-point numbers and stores /// them to the range [\p first, \p last). template<class OutputIterator, class Generator> void generate(OutputIterator first, OutputIterator last, Generator &generator, command_queue &queue) { typedef typename make_vector_type<RealType, 2>::type RealType2; size_t count = detail::iterator_range_size(first, last); vector<uint_> tmp(count, queue.get_context()); generator.generate(tmp.begin(), tmp.end(), queue); BOOST_COMPUTE_FUNCTION(RealType2, box_muller, (const uint2_ x), { const RealType one = 1; const RealType two = 2; // Use nextafter to push values down into [0,1) range; without this, floating point rounding can // lead to have x1 = 1, but that would lead to taking the log of 0, which would result in negative // infinities; by pushing the values off 1 towards 0, we ensure this won't happen. const RealType x1 = nextafter(x.x / (RealType) UINT_MAX, (RealType) 0); const RealType x2 = x.y / (RealType) UINT_MAX; const RealType rho = sqrt(-two * log(one-x1)); const RealType z1 = rho * cos(two * M_PI_F * x2); const RealType z2 = rho * sin(two * M_PI_F * x2); return (RealType2)(MEAN, MEAN) + (RealType2)(z1, z2) * (RealType2)(STDDEV, STDDEV); }); box_muller.define("MEAN", boost::lexical_cast<std::string>(m_mean)); box_muller.define("STDDEV", boost::lexical_cast<std::string>(m_stddev)); box_muller.define("RealType", type_name<RealType>()); box_muller.define("RealType2", type_name<RealType2>()); transform( make_buffer_iterator<uint2_>(tmp.get_buffer(), 0), make_buffer_iterator<uint2_>(tmp.get_buffer(), count / 2), make_buffer_iterator<RealType2>(first.get_buffer(), 0), box_muller, queue ); } private: RealType m_mean; RealType m_stddev; BOOST_STATIC_ASSERT_MSG( boost::is_floating_point<RealType>::value, "Template argument must be a floating point type" ); }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_NORMAL_DISTRIBUTION_HPP random/threefry_engine.hpp 0000644 00000033161 15125510617 0011721 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2015 Muhammad Junaid Muzammil <mjunaidmuzammil@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_THREEFRY_HPP #define BOOST_COMPUTE_RANDOM_THREEFRY_HPP #include <algorithm> #include <boost/compute/types.hpp> #include <boost/compute/buffer.hpp> #include <boost/compute/kernel.hpp> #include <boost/compute/context.hpp> #include <boost/compute/program.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/transform.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/utility/program_cache.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/iterator/discard_iterator.hpp> namespace boost { namespace compute { /// \class threefry_engine /// \brief Threefry pseudorandom number generator. template<class T = uint_> class threefry_engine { public: typedef T result_type; static const ulong_ default_seed = 0UL; /// Creates a new threefry_engine and seeds it with \p value. explicit threefry_engine(command_queue &queue, ulong_ value = default_seed) : m_key(value), m_counter(0), m_context(queue.get_context()) { // Load program load_program(); } /// Creates a new threefry_engine object as a copy of \p other. threefry_engine(const threefry_engine<T> &other) : m_key(other.m_key), m_counter(other.m_counter), m_context(other.m_context), m_program(other.m_program) { } /// Copies \p other to \c *this. threefry_engine<T>& operator=(const threefry_engine<T> &other) { if(this != &other){ m_key = other.m_key; m_counter = other.m_counter; m_context = other.m_context; m_program = other.m_program; } return *this; } /// Destroys the threefry_engine object. ~threefry_engine() { } /// Seeds the random number generator with \p value. /// /// \param value seed value for the random-number generator /// \param queue command queue to perform the operation /// /// If no seed value is provided, \c default_seed is used. void seed(ulong_ value, command_queue &queue) { (void) queue; m_key = value; // Reset counter m_counter = 0; } /// \overload void seed(command_queue &queue) { seed(default_seed, queue); } /// Generates random numbers and stores them to the range [\p first, \p last). template<class OutputIterator> void generate(OutputIterator first, OutputIterator last, command_queue &queue) { const size_t size = detail::iterator_range_size(first, last); kernel fill_kernel(m_program, "fill"); fill_kernel.set_arg(0, first.get_buffer()); fill_kernel.set_arg(1, static_cast<const uint_>(size)); fill_kernel.set_arg(2, m_key); fill_kernel.set_arg(3, m_counter); queue.enqueue_1d_range_kernel(fill_kernel, 0, (size + 1)/2, 0); discard(size, queue); } /// \internal_ void generate(discard_iterator first, discard_iterator last, command_queue &queue) { (void) queue; ulong_ offset = std::distance(first, last); m_counter += offset; } /// Generates random numbers, transforms them with \p op, and then stores /// them to the range [\p first, \p last). template<class OutputIterator, class Function> void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue) { vector<T> tmp(std::distance(first, last), queue.get_context()); generate(tmp.begin(), tmp.end(), queue); ::boost::compute::transform(tmp.begin(), tmp.end(), first, op, queue); } /// Generates \p z random numbers and discards them. void discard(size_t z, command_queue &queue) { generate(discard_iterator(0), discard_iterator(z), queue); } private: void load_program() { boost::shared_ptr<program_cache> cache = program_cache::get_global_cache(m_context); std::string cache_key = std::string("__boost_threefry_engine_32x2"); // Copyright 2010-2012, D. E. Shaw Research. // All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions, and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions, and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of D. E. Shaw Research nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. const char source[] = "#define THREEFRY2x32_DEFAULT_ROUNDS 20\n" "#define SKEIN_KS_PARITY_32 0x1BD11BDA\n" "enum r123_enum_threefry32x2 {\n" " R_32x2_0_0=13,\n" " R_32x2_1_0=15,\n" " R_32x2_2_0=26,\n" " R_32x2_3_0= 6,\n" " R_32x2_4_0=17,\n" " R_32x2_5_0=29,\n" " R_32x2_6_0=16,\n" " R_32x2_7_0=24\n" "};\n" "static uint RotL_32(uint x, uint N)\n" "{\n" " return (x << (N & 31)) | (x >> ((32-N) & 31));\n" "}\n" "struct r123array2x32 {\n" " uint v[2];\n" "};\n" "typedef struct r123array2x32 threefry2x32_ctr_t;\n" "typedef struct r123array2x32 threefry2x32_key_t;\n" "threefry2x32_ctr_t threefry2x32_R(unsigned int Nrounds, threefry2x32_ctr_t in, threefry2x32_key_t k)\n" "{\n" " threefry2x32_ctr_t X;\n" " uint ks[3];\n" " uint i; \n" " ks[2] = SKEIN_KS_PARITY_32;\n" " for (i=0;i < 2; i++) {\n" " ks[i] = k.v[i];\n" " X.v[i] = in.v[i];\n" " ks[2] ^= k.v[i];\n" " }\n" " X.v[0] += ks[0]; X.v[1] += ks[1];\n" " if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>3){\n" " X.v[0] += ks[1]; X.v[1] += ks[2];\n" " X.v[1] += 1;\n" " }\n" " if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>7){\n" " X.v[0] += ks[2]; X.v[1] += ks[0];\n" " X.v[1] += 2;\n" " }\n" " if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>11){\n" " X.v[0] += ks[0]; X.v[1] += ks[1];\n" " X.v[1] += 3;\n" " }\n" " if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>15){\n" " X.v[0] += ks[1]; X.v[1] += ks[2];\n" " X.v[1] += 4;\n" " }\n" " if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>19){\n" " X.v[0] += ks[2]; X.v[1] += ks[0];\n" " X.v[1] += 5;\n" " }\n" " if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>23){\n" " X.v[0] += ks[0]; X.v[1] += ks[1];\n" " X.v[1] += 6;\n" " }\n" " if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>27){\n" " X.v[0] += ks[1]; X.v[1] += ks[2];\n" " X.v[1] += 7;\n" " }\n" " if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>31){\n" " X.v[0] += ks[2]; X.v[1] += ks[0];\n" " X.v[1] += 8;\n" " }\n" " return X;\n" "}\n" "__kernel void fill(__global uint * output,\n" " const uint output_size,\n" " const uint2 key,\n" " const uint2 counter)\n" "{\n" " uint gid = get_global_id(0);\n" " threefry2x32_ctr_t c;\n" " c.v[0] = counter.x + gid;\n" " c.v[1] = counter.y + (c.v[0] < counter.x ? 1 : 0);\n" "\n" " threefry2x32_key_t k = { {key.x, key.y} };\n" "\n" " threefry2x32_ctr_t result;\n" " result = threefry2x32_R(THREEFRY2x32_DEFAULT_ROUNDS, c, k);\n" "\n" " if(gid < output_size/2)\n" " {\n" " output[2 * gid] = result.v[0];\n" " output[2 * gid + 1] = result.v[1];\n" " }\n" " else if(gid < (output_size+1)/2)\n" " output[2 * gid] = result.v[0];\n" "}\n"; m_program = cache->get_or_build(cache_key, std::string(), source, m_context); } // Engine state ulong_ m_key; // 2 x 32bit ulong_ m_counter; // OpenCL context m_context; program m_program; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_THREEFRY_HPP random/discrete_distribution.hpp 0000644 00000012425 15125510617 0013145 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_DISCRETE_DISTRIBUTION_HPP #define BOOST_COMPUTE_RANDOM_DISCRETE_DISTRIBUTION_HPP #include <numeric> #include <boost/config.hpp> #include <boost/type_traits.hpp> #include <boost/static_assert.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/function.hpp> #include <boost/compute/algorithm/accumulate.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/algorithm/transform.hpp> #include <boost/compute/detail/literal.hpp> #include <boost/compute/types/fundamental.hpp> namespace boost { namespace compute { /// \class discrete_distribution /// \brief Produces random integers on the interval [0, n), where /// probability of each integer is given by the weight of the ith /// integer divided by the sum of all weights. /// /// The following example shows how to setup a discrete distribution to /// produce 0 and 1 with equal probability /// /// \snippet test/test_discrete_distribution.cpp generate /// template<class IntType = uint_> class discrete_distribution { public: typedef IntType result_type; /// Creates a new discrete distribution with a single weight p = { 1 }. /// This distribution produces only zeroes. discrete_distribution() : m_probabilities(1, double(1)), m_scanned_probabilities(1, double(1)) { } /// Creates a new discrete distribution with weights given by /// the range [\p first, \p last). template<class InputIterator> discrete_distribution(InputIterator first, InputIterator last) : m_probabilities(first, last), m_scanned_probabilities(std::distance(first, last)) { if(first != last) { // after this m_scanned_probabilities.back() is a sum of all // weights from the range [first, last) std::partial_sum(first, last, m_scanned_probabilities.begin()); std::vector<double>::iterator i = m_probabilities.begin(); std::vector<double>::iterator j = m_scanned_probabilities.begin(); for(; i != m_probabilities.end(); ++i, ++j) { // dividing each weight by sum of all weights to // get probabilities *i = *i / m_scanned_probabilities.back(); // dividing each partial sum of weights by sum of // all weights to get partial sums of probabilities *j = *j / m_scanned_probabilities.back(); } } else { m_probabilities.push_back(double(1)); m_scanned_probabilities.push_back(double(1)); } } /// Destroys the discrete_distribution object. ~discrete_distribution() { } /// Returns the probabilities ::std::vector<double> probabilities() const { return m_probabilities; } /// Returns the minimum potentially generated value. result_type min BOOST_PREVENT_MACRO_SUBSTITUTION () const { return result_type(0); } /// Returns the maximum potentially generated value. result_type max BOOST_PREVENT_MACRO_SUBSTITUTION () const { size_t type_max = static_cast<size_t>( (std::numeric_limits<result_type>::max)() ); if(m_probabilities.size() - 1 > type_max) { return (std::numeric_limits<result_type>::max)(); } return static_cast<result_type>(m_probabilities.size() - 1); } /// Generates uniformly distributed integers and stores /// them to the range [\p first, \p last). template<class OutputIterator, class Generator> void generate(OutputIterator first, OutputIterator last, Generator &generator, command_queue &queue) { std::string source = "inline IntType scale_random(uint x)\n"; source = source + "{\n" + "float rno = convert_float(x) / UINT_MAX;\n"; for(size_t i = 0; i < m_scanned_probabilities.size() - 1; i++) { source = source + "if(rno <= " + detail::make_literal<float>(m_scanned_probabilities[i]) + ")\n" + " return " + detail::make_literal(i) + ";\n"; } source = source + "return " + detail::make_literal(m_scanned_probabilities.size() - 1) + ";\n" + "}\n"; BOOST_COMPUTE_FUNCTION(IntType, scale_random, (const uint_ x), {}); scale_random.set_source(source); scale_random.define("IntType", type_name<IntType>()); generator.generate(first, last, scale_random, queue); } private: ::std::vector<double> m_probabilities; ::std::vector<double> m_scanned_probabilities; BOOST_STATIC_ASSERT_MSG( boost::is_integral<IntType>::value, "Template argument must be integral" ); }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP random/mersenne_twister_engine.hpp 0000644 00000020140 15125510617 0013457 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_MERSENNE_TWISTER_ENGINE_HPP #define BOOST_COMPUTE_RANDOM_MERSENNE_TWISTER_ENGINE_HPP #include <algorithm> #include <boost/compute/types.hpp> #include <boost/compute/buffer.hpp> #include <boost/compute/kernel.hpp> #include <boost/compute/context.hpp> #include <boost/compute/program.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/transform.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/iterator/discard_iterator.hpp> #include <boost/compute/utility/program_cache.hpp> namespace boost { namespace compute { /// \class mersenne_twister_engine /// \brief Mersenne twister pseudorandom number generator. template<class T> class mersenne_twister_engine { public: typedef T result_type; static const T default_seed = 5489U; static const T n = 624; static const T m = 397; /// Creates a new mersenne_twister_engine and seeds it with \p value. explicit mersenne_twister_engine(command_queue &queue, result_type value = default_seed) : m_context(queue.get_context()), m_state_buffer(m_context, n * sizeof(result_type)) { // setup program load_program(); // seed state seed(value, queue); } /// Creates a new mersenne_twister_engine object as a copy of \p other. mersenne_twister_engine(const mersenne_twister_engine<T> &other) : m_context(other.m_context), m_state_index(other.m_state_index), m_program(other.m_program), m_state_buffer(other.m_state_buffer) { } /// Copies \p other to \c *this. mersenne_twister_engine<T>& operator=(const mersenne_twister_engine<T> &other) { if(this != &other){ m_context = other.m_context; m_state_index = other.m_state_index; m_program = other.m_program; m_state_buffer = other.m_state_buffer; } return *this; } /// Destroys the mersenne_twister_engine object. ~mersenne_twister_engine() { } /// Seeds the random number generator with \p value. /// /// \param value seed value for the random-number generator /// \param queue command queue to perform the operation /// /// If no seed value is provided, \c default_seed is used. void seed(result_type value, command_queue &queue) { kernel seed_kernel = m_program.create_kernel("seed"); seed_kernel.set_arg(0, value); seed_kernel.set_arg(1, m_state_buffer); queue.enqueue_task(seed_kernel); m_state_index = 0; } /// \overload void seed(command_queue &queue) { seed(default_seed, queue); } /// Generates random numbers and stores them to the range [\p first, \p last). template<class OutputIterator> void generate(OutputIterator first, OutputIterator last, command_queue &queue) { const size_t size = detail::iterator_range_size(first, last); kernel fill_kernel(m_program, "fill"); fill_kernel.set_arg(0, m_state_buffer); fill_kernel.set_arg(2, first.get_buffer()); size_t offset = 0; size_t &p = m_state_index; for(;;){ size_t count = 0; if(size > n){ count = (std::min)(static_cast<size_t>(n), size - offset); } else { count = size; } fill_kernel.set_arg(1, static_cast<const uint_>(p)); fill_kernel.set_arg(3, static_cast<const uint_>(offset)); queue.enqueue_1d_range_kernel(fill_kernel, 0, count, 0); p += count; offset += count; if(offset >= size){ break; } generate_state(queue); p = 0; } } /// \internal_ void generate(discard_iterator first, discard_iterator last, command_queue &queue) { (void) queue; m_state_index += std::distance(first, last); } /// Generates random numbers, transforms them with \p op, and then stores /// them to the range [\p first, \p last). template<class OutputIterator, class Function> void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue) { vector<T> tmp(std::distance(first, last), queue.get_context()); generate(tmp.begin(), tmp.end(), queue); transform(tmp.begin(), tmp.end(), first, op, queue); } /// Generates \p z random numbers and discards them. void discard(size_t z, command_queue &queue) { generate(discard_iterator(0), discard_iterator(z), queue); } /// \internal_ (deprecated) template<class OutputIterator> void fill(OutputIterator first, OutputIterator last, command_queue &queue) { generate(first, last, queue); } private: /// \internal_ void generate_state(command_queue &queue) { kernel generate_state_kernel = m_program.create_kernel("generate_state"); generate_state_kernel.set_arg(0, m_state_buffer); queue.enqueue_task(generate_state_kernel); } /// \internal_ void load_program() { boost::shared_ptr<program_cache> cache = program_cache::get_global_cache(m_context); std::string cache_key = std::string("__boost_mersenne_twister_engine_") + type_name<T>(); const char source[] = "static uint twiddle(uint u, uint v)\n" "{\n" " return (((u & 0x80000000U) | (v & 0x7FFFFFFFU)) >> 1) ^\n" " ((v & 1U) ? 0x9908B0DFU : 0x0U);\n" "}\n" "__kernel void generate_state(__global uint *state)\n" "{\n" " const uint n = 624;\n" " const uint m = 397;\n" " for(uint i = 0; i < (n - m); i++)\n" " state[i] = state[i+m] ^ twiddle(state[i], state[i+1]);\n" " for(uint i = n - m; i < (n - 1); i++)\n" " state[i] = state[i+m-n] ^ twiddle(state[i], state[i+1]);\n" " state[n-1] = state[m-1] ^ twiddle(state[n-1], state[0]);\n" "}\n" "__kernel void seed(const uint s, __global uint *state)\n" "{\n" " const uint n = 624;\n" " state[0] = s & 0xFFFFFFFFU;\n" " for(uint i = 1; i < n; i++){\n" " state[i] = 1812433253U * (state[i-1] ^ (state[i-1] >> 30)) + i;\n" " state[i] &= 0xFFFFFFFFU;\n" " }\n" " generate_state(state);\n" "}\n" "static uint random_number(__global uint *state, const uint p)\n" "{\n" " uint x = state[p];\n" " x ^= (x >> 11);\n" " x ^= (x << 7) & 0x9D2C5680U;\n" " x ^= (x << 15) & 0xEFC60000U;\n" " return x ^ (x >> 18);\n" "}\n" "__kernel void fill(__global uint *state,\n" " const uint state_index,\n" " __global uint *vector,\n" " const uint offset)\n" "{\n" " const uint i = get_global_id(0);\n" " vector[offset+i] = random_number(state, state_index + i);\n" "}\n"; m_program = cache->get_or_build(cache_key, std::string(), source, m_context); } private: context m_context; size_t m_state_index; program m_program; buffer m_state_buffer; }; typedef mersenne_twister_engine<uint_> mt19937; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_MERSENNE_TWISTER_ENGINE_HPP random/default_random_engine.hpp 0000644 00000001441 15125510617 0013051 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_DEFAULT_RANDOM_ENGINE_HPP #define BOOST_COMPUTE_RANDOM_DEFAULT_RANDOM_ENGINE_HPP #include <boost/compute/random/mersenne_twister_engine.hpp> namespace boost { namespace compute { typedef mt19937 default_random_engine; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_DEFAULT_RANDOM_ENGINE_HPP container/stack.hpp 0000644 00000003063 15125510617 0010351 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_STACK_HPP #define BOOST_COMPUTE_CONTAINER_STACK_HPP #include <boost/compute/container/vector.hpp> namespace boost { namespace compute { template<class T> class stack { public: typedef vector<T> container_type; typedef typename container_type::size_type size_type; typedef typename container_type::value_type value_type; stack() { } stack(const stack<T> &other) : m_vector(other.m_vector) { } stack<T>& operator=(const stack<T> &other) { if(this != &other){ m_vector = other.m_vector; } return *this; } ~stack() { } bool empty() const { return m_vector.empty(); } size_type size() const { return m_vector.size(); } value_type top() const { return m_vector.back(); } void push(const T &value) { m_vector.push_back(value); } void pop() { m_vector.pop_back(); } private: container_type m_vector; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_STACK_HPP container/basic_string.hpp 0000644 00000017256 15125510617 0011724 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_BASIC_STRING_HPP #define BOOST_COMPUTE_CONTAINER_BASIC_STRING_HPP #include <string> #include <cstring> #include <boost/compute/cl.hpp> #include <boost/compute/algorithm/find.hpp> #include <boost/compute/algorithm/search.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <iosfwd> namespace boost { namespace compute { /// \class basic_string /// \brief A template for a dynamically-sized character sequence. /// /// The \c basic_string class provides a generic template for a dynamically- /// sized character sequence. This is most commonly used through the \c string /// typedef (for \c basic_string<char>). /// /// For example, to create a string on the device with its contents copied /// from a C-string on the host: /// \code /// boost::compute::string str("hello, world!"); /// \endcode /// /// \see \ref vector "vector<T>" template<class CharT, class Traits = std::char_traits<CharT> > class basic_string { public: typedef Traits traits_type; typedef typename Traits::char_type value_type; typedef size_t size_type; static const size_type npos = size_type(-1); typedef typename ::boost::compute::vector<CharT>::reference reference; typedef typename ::boost::compute::vector<CharT>::const_reference const_reference; typedef typename ::boost::compute::vector<CharT>::iterator iterator; typedef typename ::boost::compute::vector<CharT>::const_iterator const_iterator; typedef typename ::boost::compute::vector<CharT>::reverse_iterator reverse_iterator; typedef typename ::boost::compute::vector<CharT>::const_reverse_iterator const_reverse_iterator; basic_string() { } basic_string(size_type count, CharT ch) : m_data(count) { std::fill(m_data.begin(), m_data.end(), ch); } basic_string(const basic_string &other, size_type pos, size_type count = npos) : m_data(other.begin() + pos, other.begin() + (std::min)(other.size(), count)) { } basic_string(const char *s, size_type count) : m_data(s, s + count) { } basic_string(const char *s) : m_data(s, s + std::strlen(s)) { } template<class InputIterator> basic_string(InputIterator first, InputIterator last) : m_data(first, last) { } basic_string(const basic_string<CharT, Traits> &other) : m_data(other.m_data) { } basic_string<CharT, Traits>& operator=(const basic_string<CharT, Traits> &other) { if(this != &other){ m_data = other.m_data; } return *this; } ~basic_string() { } reference at(size_type pos) { return m_data.at(pos); } const_reference at(size_type pos) const { return m_data.at(pos); } reference operator[](size_type pos) { return m_data[pos]; } const_reference operator[](size_type pos) const { return m_data[pos]; } reference front() { return m_data.front(); } const_reference front() const { return m_data.front(); } reference back() { return m_data.back(); } const_reference back() const { return m_data.back(); } iterator begin() { return m_data.begin(); } const_iterator begin() const { return m_data.begin(); } const_iterator cbegin() const { return m_data.cbegin(); } iterator end() { return m_data.end(); } const_iterator end() const { return m_data.end(); } const_iterator cend() const { return m_data.cend(); } reverse_iterator rbegin() { return m_data.rbegin(); } const_reverse_iterator rbegin() const { return m_data.rbegin(); } const_reverse_iterator crbegin() const { return m_data.crbegin(); } reverse_iterator rend() { return m_data.rend(); } const_reverse_iterator rend() const { return m_data.rend(); } const_reverse_iterator crend() const { return m_data.crend(); } bool empty() const { return m_data.empty(); } size_type size() const { return m_data.size(); } size_type length() const { return m_data.size(); } size_type max_size() const { return m_data.max_size(); } void reserve(size_type size) { m_data.reserve(size); } size_type capacity() const { return m_data.capacity(); } void shrink_to_fit() { m_data.shrink_to_fit(); } void clear() { m_data.clear(); } void swap(basic_string<CharT, Traits> &other) { if(this != &other) { ::boost::compute::vector<CharT> temp_data(other.m_data); other.m_data = m_data; m_data = temp_data; } } basic_string<CharT, Traits> substr(size_type pos = 0, size_type count = npos) const { return basic_string<CharT, Traits>(*this, pos, count); } /// Finds the first character \p ch size_type find(CharT ch, size_type pos = 0) const { const_iterator iter = ::boost::compute::find(begin() + pos, end(), ch); if(iter == end()){ return npos; } else { return static_cast<size_type>(std::distance(begin(), iter)); } } /// Finds the first substring equal to \p str size_type find(basic_string& str, size_type pos = 0) const { const_iterator iter = ::boost::compute::search(begin() + pos, end(), str.begin(), str.end()); if(iter == end()){ return npos; } else { return static_cast<size_type>(std::distance(begin(), iter)); } } /// Finds the first substring equal to the character string /// pointed to by \p s. /// The length of the string is determined by the first null character. /// /// For example, the following code /// \snippet test/test_string.cpp string_find /// /// will return 5 as position. size_type find(const char* s, size_type pos = 0) const { basic_string str(s); const_iterator iter = ::boost::compute::search(begin() + pos, end(), str.begin(), str.end()); if(iter == end()){ return npos; } else { return static_cast<size_type>(std::distance(begin(), iter)); } } private: ::boost::compute::vector<CharT> m_data; }; template<class CharT, class Traits> std::ostream& operator<<(std::ostream& stream, boost::compute::basic_string<CharT, Traits>const& outStr) { command_queue queue = ::boost::compute::system::default_queue(); boost::compute::copy(outStr.begin(), outStr.end(), std::ostream_iterator<CharT>(stream), queue); return stream; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_BASIC_STRING_HPP container/string.hpp 0000644 00000001441 15125510617 0010550 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_STRING_HPP #define BOOST_COMPUTE_CONTAINER_STRING_HPP #include <boost/compute/types/fundamental.hpp> #include <boost/compute/container/basic_string.hpp> namespace boost { namespace compute { typedef basic_string<char_> string; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_STRING_HPP container/array.hpp 0000644 00000016766 15125510617 0010400 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_ARRAY_HPP #define BOOST_COMPUTE_CONTAINER_ARRAY_HPP #include <cstddef> #include <iterator> #include <exception> #include <boost/array.hpp> #include <boost/throw_exception.hpp> #include <boost/compute/buffer.hpp> #include <boost/compute/system.hpp> #include <boost/compute/algorithm/fill.hpp> #include <boost/compute/algorithm/swap_ranges.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/type_traits/detail/capture_traits.hpp> #include <boost/compute/detail/buffer_value.hpp> namespace boost { namespace compute { /// \class array /// \brief A fixed-size container. /// /// The array container is very similar to the \ref vector container except /// its size is fixed at compile-time rather than being dynamically resizable /// at run-time. /// /// For example, to create a fixed-size array with eight values on the device: /// \code /// boost::compute::array<int, 8> values(context); /// \endcode /// /// The Boost.Compute \c array class provides a STL-like API and is modeled /// after the \c std::array class from the C++ standard library. /// /// \see \ref vector "vector<T>" template<class T, std::size_t N> class array { public: typedef T value_type; typedef std::size_t size_type; typedef ptrdiff_t difference_type; typedef detail::buffer_value<T> reference; typedef const detail::buffer_value<T> const_reference; typedef T* pointer; typedef const T* const_pointer; typedef buffer_iterator<T> iterator; typedef buffer_iterator<T> const_iterator; typedef std::reverse_iterator<iterator> reverse_iterator; typedef std::reverse_iterator<const_iterator> const_reverse_iterator; enum { static_size = N }; explicit array(const context &context = system::default_context()) : m_buffer(context, sizeof(T) * N) { } array(const array<T, N> &other) : m_buffer(other.m_buffer.get_context(), sizeof(T) * N) { command_queue queue = default_queue(); boost::compute::copy(other.begin(), other.end(), begin(), queue); queue.finish(); } array(const boost::array<T, N> &array, const context &context = system::default_context()) : m_buffer(context, sizeof(T) * N) { command_queue queue = default_queue(); boost::compute::copy(array.begin(), array.end(), begin(), queue); queue.finish(); } array(const array<T, N> &other, const command_queue &queue) : m_buffer(other.m_buffer.get_context(), sizeof(T) * N) { boost::compute::copy(other.begin(), other.end(), begin(), queue); } array<T, N>& operator=(const array<T, N> &other) { if(this != &other){ command_queue queue = default_queue(); boost::compute::copy(other.begin(), other.end(), begin(), queue); queue.finish(); } return *this; } array<T, N>& operator=(const boost::array<T, N> &array) { command_queue queue = default_queue(); boost::compute::copy(array.begin(), array.end(), begin(), queue); queue.finish(); return *this; } ~array() { } iterator begin() { return buffer_iterator<T>(m_buffer, 0); } const_iterator begin() const { return buffer_iterator<T>(m_buffer, 0); } const_iterator cbegin() const { return begin(); } iterator end() { return buffer_iterator<T>(m_buffer, N); } const_iterator end() const { return buffer_iterator<T>(m_buffer, N); } const_iterator cend() const { return end(); } reverse_iterator rbegin() { return reverse_iterator(end() - 1); } const_reverse_iterator rbegin() const { return reverse_iterator(end() - 1); } const_reverse_iterator crbegin() const { return rbegin(); } reverse_iterator rend() { return reverse_iterator(begin() - 1); } const_reverse_iterator rend() const { return reverse_iterator(begin() - 1); } const_reverse_iterator crend() const { return rend(); } size_type size() const { return N; } bool empty() const { return N == 0; } size_type max_size() const { return N; } reference operator[](size_type index) { return *(begin() + static_cast<difference_type>(index)); } const_reference operator[](size_type index) const { return *(begin() + static_cast<difference_type>(index)); } reference at(size_type index) { if(index >= N){ BOOST_THROW_EXCEPTION(std::out_of_range("index out of range")); } return operator[](index); } const_reference at(size_type index) const { if(index >= N){ BOOST_THROW_EXCEPTION(std::out_of_range("index out of range")); } return operator[](index); } reference front() { return *begin(); } const_reference front() const { return *begin(); } reference back() { return *(end() - static_cast<difference_type>(1)); } const_reference back() const { return *(end() - static_cast<difference_type>(1)); } void fill(const value_type &value, const command_queue &queue) { ::boost::compute::fill(begin(), end(), value, queue); } void swap(array<T, N> &other, const command_queue &queue) { ::boost::compute::swap_ranges(begin(), end(), other.begin(), queue); } void fill(const value_type &value) { command_queue queue = default_queue(); ::boost::compute::fill(begin(), end(), value, queue); queue.finish(); } void swap(array<T, N> &other) { command_queue queue = default_queue(); ::boost::compute::swap_ranges(begin(), end(), other.begin(), queue); queue.finish(); } const buffer& get_buffer() const { return m_buffer; } private: buffer m_buffer; command_queue default_queue() const { const context &context = m_buffer.get_context(); command_queue queue(context, context.get_device()); return queue; } }; namespace detail { // set_kernel_arg specialization for array<T, N> template<class T, std::size_t N> struct set_kernel_arg<array<T, N> > { void operator()(kernel &kernel_, size_t index, const array<T, N> &array) { kernel_.set_arg(index, array.get_buffer()); } }; // for capturing array<T, N> with BOOST_COMPUTE_CLOSURE() template<class T, size_t N> struct capture_traits<array<T, N> > { static std::string type_name() { return std::string("__global ") + ::boost::compute::type_name<T>() + "*"; } }; // meta_kernel streaming operator for array<T, N> template<class T, size_t N> meta_kernel& operator<<(meta_kernel &k, const array<T, N> &array) { return k << k.get_buffer_identifier<T>(array.get_buffer()); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_ARRAY_HPP container/flat_set.hpp 0000644 00000020247 15125510617 0011050 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_FLAT_SET_HPP #define BOOST_COMPUTE_CONTAINER_FLAT_SET_HPP #include <cstddef> #include <utility> #include <boost/compute/algorithm/find.hpp> #include <boost/compute/algorithm/lower_bound.hpp> #include <boost/compute/algorithm/upper_bound.hpp> #include <boost/compute/container/vector.hpp> namespace boost { namespace compute { template<class T> class flat_set { public: typedef T key_type; typedef typename vector<T>::value_type value_type; typedef typename vector<T>::size_type size_type; typedef typename vector<T>::difference_type difference_type; typedef typename vector<T>::reference reference; typedef typename vector<T>::const_reference const_reference; typedef typename vector<T>::pointer pointer; typedef typename vector<T>::const_pointer const_pointer; typedef typename vector<T>::iterator iterator; typedef typename vector<T>::const_iterator const_iterator; typedef typename vector<T>::reverse_iterator reverse_iterator; typedef typename vector<T>::const_reverse_iterator const_reverse_iterator; explicit flat_set(const context &context = system::default_context()) : m_vector(context) { } flat_set(const flat_set<T> &other) : m_vector(other.m_vector) { } flat_set<T>& operator=(const flat_set<T> &other) { if(this != &other){ m_vector = other.m_vector; } return *this; } ~flat_set() { } iterator begin() { return m_vector.begin(); } const_iterator begin() const { return m_vector.begin(); } const_iterator cbegin() const { return m_vector.cbegin(); } iterator end() { return m_vector.end(); } const_iterator end() const { return m_vector.end(); } const_iterator cend() const { return m_vector.cend(); } reverse_iterator rbegin() { return m_vector.rbegin(); } const_reverse_iterator rbegin() const { return m_vector.rbegin(); } const_reverse_iterator crbegin() const { return m_vector.crbegin(); } reverse_iterator rend() { return m_vector.rend(); } const_reverse_iterator rend() const { return m_vector.rend(); } const_reverse_iterator crend() const { return m_vector.crend(); } size_type size() const { return m_vector.size(); } size_type max_size() const { return m_vector.max_size(); } bool empty() const { return m_vector.empty(); } size_type capacity() const { return m_vector.capacity(); } void reserve(size_type size, command_queue &queue) { m_vector.reserve(size, queue); } void reserve(size_type size) { command_queue queue = m_vector.default_queue(); reserve(size, queue); queue.finish(); } void shrink_to_fit() { m_vector.shrink_to_fit(); } void clear() { m_vector.clear(); } std::pair<iterator, bool> insert(const value_type &value, command_queue &queue) { iterator location = upper_bound(value, queue); if(location != begin()){ value_type current_value; ::boost::compute::copy_n(location - 1, 1, ¤t_value, queue); if(value == current_value){ return std::make_pair(location - 1, false); } } m_vector.insert(location, value, queue); return std::make_pair(location, true); } std::pair<iterator, bool> insert(const value_type &value) { command_queue queue = m_vector.default_queue(); std::pair<iterator, bool> result = insert(value, queue); queue.finish(); return result; } iterator erase(const const_iterator &position, command_queue &queue) { return erase(position, position + 1, queue); } iterator erase(const const_iterator &position) { command_queue queue = m_vector.default_queue(); iterator iter = erase(position, queue); queue.finish(); return iter; } iterator erase(const const_iterator &first, const const_iterator &last, command_queue &queue) { return m_vector.erase(first, last, queue); } iterator erase(const const_iterator &first, const const_iterator &last) { command_queue queue = m_vector.default_queue(); iterator iter = erase(first, last, queue); queue.finish(); return iter; } size_type erase(const key_type &value, command_queue &queue) { iterator position = find(value, queue); if(position == end()){ return 0; } else { erase(position, queue); return 1; } } size_type erase(const key_type &value) { command_queue queue = m_vector.default_queue(); size_type result = erase(value, queue); queue.finish(); return result; } iterator find(const key_type &value, command_queue &queue) { return ::boost::compute::find(begin(), end(), value, queue); } iterator find(const key_type &value) { command_queue queue = m_vector.default_queue(); iterator iter = find(value, queue); queue.finish(); return iter; } const_iterator find(const key_type &value, command_queue &queue) const { return ::boost::compute::find(begin(), end(), value, queue); } const_iterator find(const key_type &value) const { command_queue queue = m_vector.default_queue(); const_iterator iter = find(value, queue); queue.finish(); return iter; } size_type count(const key_type &value, command_queue &queue) const { return find(value, queue) != end() ? 1 : 0; } size_type count(const key_type &value) const { command_queue queue = m_vector.default_queue(); size_type result = count(value, queue); queue.finish(); return result; } iterator lower_bound(const key_type &value, command_queue &queue) { return ::boost::compute::lower_bound(begin(), end(), value, queue); } iterator lower_bound(const key_type &value) { command_queue queue = m_vector.default_queue(); iterator iter = lower_bound(value, queue); queue.finish(); return iter; } const_iterator lower_bound(const key_type &value, command_queue &queue) const { return ::boost::compute::lower_bound(begin(), end(), value, queue); } const_iterator lower_bound(const key_type &value) const { command_queue queue = m_vector.default_queue(); const_iterator iter = lower_bound(value, queue); queue.finish(); return iter; } iterator upper_bound(const key_type &value, command_queue &queue) { return ::boost::compute::upper_bound(begin(), end(), value, queue); } iterator upper_bound(const key_type &value) { command_queue queue = m_vector.default_queue(); iterator iter = upper_bound(value, queue); queue.finish(); return iter; } const_iterator upper_bound(const key_type &value, command_queue &queue) const { return ::boost::compute::upper_bound(begin(), end(), value, queue); } const_iterator upper_bound(const key_type &value) const { command_queue queue = m_vector.default_queue(); const_iterator iter = upper_bound(value, queue); queue.finish(); return iter; } private: vector<T> m_vector; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_FLAT_SET_HPP container/flat_map.hpp 0000644 00000025053 15125510617 0011032 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_FLAT_MAP_HPP #define BOOST_COMPUTE_CONTAINER_FLAT_MAP_HPP #include <cstddef> #include <utility> #include <exception> #include <boost/config.hpp> #include <boost/throw_exception.hpp> #include <boost/compute/exception.hpp> #include <boost/compute/algorithm/find.hpp> #include <boost/compute/algorithm/lower_bound.hpp> #include <boost/compute/algorithm/upper_bound.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/functional/get.hpp> #include <boost/compute/iterator/transform_iterator.hpp> #include <boost/compute/types/pair.hpp> #include <boost/compute/detail/buffer_value.hpp> namespace boost { namespace compute { template<class Key, class T> class flat_map { public: typedef Key key_type; typedef T mapped_type; typedef typename ::boost::compute::vector<std::pair<Key, T> > vector_type; typedef typename vector_type::value_type value_type; typedef typename vector_type::size_type size_type; typedef typename vector_type::difference_type difference_type; typedef typename vector_type::reference reference; typedef typename vector_type::const_reference const_reference; typedef typename vector_type::pointer pointer; typedef typename vector_type::const_pointer const_pointer; typedef typename vector_type::iterator iterator; typedef typename vector_type::const_iterator const_iterator; typedef typename vector_type::reverse_iterator reverse_iterator; typedef typename vector_type::const_reverse_iterator const_reverse_iterator; explicit flat_map(const context &context = system::default_context()) : m_vector(context) { } flat_map(const flat_map<Key, T> &other) : m_vector(other.m_vector) { } flat_map<Key, T>& operator=(const flat_map<Key, T> &other) { if(this != &other){ m_vector = other.m_vector; } return *this; } ~flat_map() { } iterator begin() { return m_vector.begin(); } const_iterator begin() const { return m_vector.begin(); } const_iterator cbegin() const { return m_vector.cbegin(); } iterator end() { return m_vector.end(); } const_iterator end() const { return m_vector.end(); } const_iterator cend() const { return m_vector.cend(); } reverse_iterator rbegin() { return m_vector.rbegin(); } const_reverse_iterator rbegin() const { return m_vector.rbegin(); } const_reverse_iterator crbegin() const { return m_vector.crbegin(); } reverse_iterator rend() { return m_vector.rend(); } const_reverse_iterator rend() const { return m_vector.rend(); } const_reverse_iterator crend() const { return m_vector.crend(); } size_type size() const { return m_vector.size(); } size_type max_size() const { return m_vector.max_size(); } bool empty() const { return m_vector.empty(); } size_type capacity() const { return m_vector.capacity(); } void reserve(size_type size, command_queue &queue) { m_vector.reserve(size, queue); } void reserve(size_type size) { command_queue queue = m_vector.default_queue(); reserve(size, queue); queue.finish(); } void shrink_to_fit() { m_vector.shrink_to_fit(); } void clear() { m_vector.clear(); } std::pair<iterator, bool> insert(const value_type &value, command_queue &queue) { iterator location = upper_bound(value.first, queue); if(location != begin()){ value_type current_value; ::boost::compute::copy_n(location - 1, 1, ¤t_value, queue); if(value.first == current_value.first){ return std::make_pair(location - 1, false); } } m_vector.insert(location, value); return std::make_pair(location, true); } std::pair<iterator, bool> insert(const value_type &value) { command_queue queue = m_vector.default_queue(); std::pair<iterator, bool> result = insert(value, queue); queue.finish(); return result; } iterator erase(const const_iterator &position, command_queue &queue) { return erase(position, position + 1, queue); } iterator erase(const const_iterator &position) { command_queue queue = m_vector.default_queue(); iterator iter = erase(position, queue); queue.finish(); return iter; } iterator erase(const const_iterator &first, const const_iterator &last, command_queue &queue) { return m_vector.erase(first, last, queue); } iterator erase(const const_iterator &first, const const_iterator &last) { command_queue queue = m_vector.default_queue(); iterator iter = erase(first, last, queue); queue.finish(); return iter; } size_type erase(const key_type &value, command_queue &queue) { iterator position = find(value, queue); if(position == end()){ return 0; } else { erase(position, queue); return 1; } } iterator find(const key_type &value, command_queue &queue) { ::boost::compute::get<0> get_key; return ::boost::compute::find( ::boost::compute::make_transform_iterator(begin(), get_key), ::boost::compute::make_transform_iterator(end(), get_key), value, queue ).base(); } iterator find(const key_type &value) { command_queue queue = m_vector.default_queue(); iterator iter = find(value, queue); queue.finish(); return iter; } const_iterator find(const key_type &value, command_queue &queue) const { ::boost::compute::get<0> get_key; return ::boost::compute::find( ::boost::compute::make_transform_iterator(begin(), get_key), ::boost::compute::make_transform_iterator(end(), get_key), value, queue ).base(); } const_iterator find(const key_type &value) const { command_queue queue = m_vector.default_queue(); const_iterator iter = find(value, queue); queue.finish(); return iter; } size_type count(const key_type &value, command_queue &queue) const { return find(value, queue) != end() ? 1 : 0; } size_type count(const key_type &value) const { command_queue queue = m_vector.default_queue(); size_type result = count(value, queue); queue.finish(); return result; } iterator lower_bound(const key_type &value, command_queue &queue) { ::boost::compute::get<0> get_key; return ::boost::compute::lower_bound( ::boost::compute::make_transform_iterator(begin(), get_key), ::boost::compute::make_transform_iterator(end(), get_key), value, queue ).base(); } iterator lower_bound(const key_type &value) { command_queue queue = m_vector.default_queue(); iterator iter = lower_bound(value, queue); queue.finish(); return iter; } const_iterator lower_bound(const key_type &value, command_queue &queue) const { ::boost::compute::get<0> get_key; return ::boost::compute::lower_bound( ::boost::compute::make_transform_iterator(begin(), get_key), ::boost::compute::make_transform_iterator(end(), get_key), value, queue ).base(); } const_iterator lower_bound(const key_type &value) const { command_queue queue = m_vector.default_queue(); const_iterator iter = lower_bound(value, queue); queue.finish(); return iter; } iterator upper_bound(const key_type &value, command_queue &queue) { ::boost::compute::get<0> get_key; return ::boost::compute::upper_bound( ::boost::compute::make_transform_iterator(begin(), get_key), ::boost::compute::make_transform_iterator(end(), get_key), value, queue ).base(); } iterator upper_bound(const key_type &value) { command_queue queue = m_vector.default_queue(); iterator iter = upper_bound(value, queue); queue.finish(); return iter; } const_iterator upper_bound(const key_type &value, command_queue &queue) const { ::boost::compute::get<0> get_key; return ::boost::compute::upper_bound( ::boost::compute::make_transform_iterator(begin(), get_key), ::boost::compute::make_transform_iterator(end(), get_key), value, queue ).base(); } const_iterator upper_bound(const key_type &value) const { command_queue queue = m_vector.default_queue(); const_iterator iter = upper_bound(value, queue); queue.finish(); return iter; } const mapped_type at(const key_type &key) const { const_iterator iter = find(key); if(iter == end()){ BOOST_THROW_EXCEPTION(std::out_of_range("key not found")); } return value_type(*iter).second; } detail::buffer_value<mapped_type> operator[](const key_type &key) { iterator iter = find(key); if(iter == end()){ iter = insert(std::make_pair(key, mapped_type())).first; } size_t index = iter.get_index() * sizeof(value_type) + sizeof(key_type); return detail::buffer_value<mapped_type>(m_vector.get_buffer(), index); } private: ::boost::compute::vector<std::pair<Key, T> > m_vector; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_FLAT_MAP_HPP container/vector.hpp 0000644 00000056605 15125510617 0010560 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_VECTOR_HPP #define BOOST_COMPUTE_CONTAINER_VECTOR_HPP #include <vector> #include <cstddef> #include <iterator> #include <exception> #include <boost/throw_exception.hpp> #include <boost/compute/config.hpp> #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST #include <initializer_list> #endif #include <boost/compute/buffer.hpp> #include <boost/compute/device.hpp> #include <boost/compute/system.hpp> #include <boost/compute/context.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/algorithm/copy_n.hpp> #include <boost/compute/algorithm/fill_n.hpp> #include <boost/compute/allocator/buffer_allocator.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/type_traits/detail/capture_traits.hpp> #include <boost/compute/detail/buffer_value.hpp> #include <boost/compute/detail/iterator_range_size.hpp> namespace boost { namespace compute { /// \class vector /// \brief A resizable array of values. /// /// The vector<T> class stores a dynamic array of values. Internally, the data /// is stored in an OpenCL buffer object. /// /// The vector class is the prefered container for storing and accessing data /// on a compute device. In most cases it should be used instead of directly /// dealing with buffer objects. If the undelying buffer is needed, it can be /// accessed with the get_buffer() method. /// /// The internal storage is allocated in a specific OpenCL context which is /// passed as an argument to the constructor when the vector is created. /// /// For example, to create a vector on the device containing space for ten /// \c int values: /// \code /// boost::compute::vector<int> vec(10, context); /// \endcode /// /// Allocation and data transfer can also be performed in a single step: /// \code /// // values on the host /// int data[] = { 1, 2, 3, 4 }; /// /// // create a vector of size four and copy the values from data /// boost::compute::vector<int> vec(data, data + 4, queue); /// \endcode /// /// The Boost.Compute \c vector class provides a STL-like API and is modeled /// after the \c std::vector class from the C++ standard library. It can be /// used with any of the STL-like algorithms provided by Boost.Compute /// including \c copy(), \c transform(), and \c sort() (among many others). /// /// For example: /// \code /// // a vector on a compute device /// boost::compute::vector<float> vec = ... /// /// // copy data to the vector from a host std:vector /// boost::compute::copy(host_vec.begin(), host_vec.end(), vec.begin(), queue); /// /// // copy data from the vector to a host std::vector /// boost::compute::copy(vec.begin(), vec.end(), host_vec.begin(), queue); /// /// // sort the values in the vector /// boost::compute::sort(vec.begin(), vec.end(), queue); /// /// // calculate the sum of the values in the vector (also see reduce()) /// float sum = boost::compute::accumulate(vec.begin(), vec.end(), 0, queue); /// /// // reverse the values in the vector /// boost::compute::reverse(vec.begin(), vec.end(), queue); /// /// // fill the vector with ones /// boost::compute::fill(vec.begin(), vec.end(), 1, queue); /// \endcode /// /// \see \ref array "array<T, N>", buffer template<class T, class Alloc = buffer_allocator<T> > class vector { public: typedef T value_type; typedef Alloc allocator_type; typedef typename allocator_type::size_type size_type; typedef typename allocator_type::difference_type difference_type; typedef detail::buffer_value<T> reference; typedef const detail::buffer_value<T> const_reference; typedef typename allocator_type::pointer pointer; typedef typename allocator_type::const_pointer const_pointer; typedef buffer_iterator<T> iterator; typedef buffer_iterator<T> const_iterator; typedef std::reverse_iterator<iterator> reverse_iterator; typedef std::reverse_iterator<const_iterator> const_reverse_iterator; /// Creates an empty vector in \p context. explicit vector(const context &context = system::default_context()) : m_size(0), m_allocator(context) { m_data = m_allocator.allocate(_minimum_capacity()); } /// Creates a vector with space for \p count elements in \p context. /// /// Note that unlike \c std::vector's constructor, this will not initialize /// the values in the container. Either call the vector constructor which /// takes a value to initialize with or use the fill() algorithm to set /// the initial values. /// /// For example: /// \code /// // create a vector on the device with space for ten ints /// boost::compute::vector<int> vec(10, context); /// \endcode explicit vector(size_type count, const context &context = system::default_context()) : m_size(count), m_allocator(context) { m_data = m_allocator.allocate((std::max)(count, _minimum_capacity())); } /// Creates a vector with space for \p count elements and sets each equal /// to \p value. /// /// For example: /// \code /// // creates a vector with four values set to nine (e.g. [9, 9, 9, 9]). /// boost::compute::vector<int> vec(4, 9, queue); /// \endcode vector(size_type count, const T &value, command_queue &queue = system::default_queue()) : m_size(count), m_allocator(queue.get_context()) { m_data = m_allocator.allocate((std::max)(count, _minimum_capacity())); ::boost::compute::fill_n(begin(), count, value, queue); } /// Creates a vector with space for the values in the range [\p first, /// \p last) and copies them into the vector with \p queue. /// /// For example: /// \code /// // values on the host /// int data[] = { 1, 2, 3, 4 }; /// /// // create a vector of size four and copy the values from data /// boost::compute::vector<int> vec(data, data + 4, queue); /// \endcode template<class InputIterator> vector(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) : m_size(detail::iterator_range_size(first, last)), m_allocator(queue.get_context()) { m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); ::boost::compute::copy(first, last, begin(), queue); } /// Creates a new vector and copies the values from \p other. vector(const vector &other, command_queue &queue = system::default_queue()) : m_size(other.m_size), m_allocator(other.m_allocator) { m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); if(!other.empty()){ if(other.get_buffer().get_context() != queue.get_context()){ command_queue other_queue = other.default_queue(); ::boost::compute::copy(other.begin(), other.end(), begin(), other_queue); other_queue.finish(); } else { ::boost::compute::copy(other.begin(), other.end(), begin(), queue); queue.finish(); } } } /// Creates a new vector and copies the values from \p other. template<class OtherAlloc> vector(const vector<T, OtherAlloc> &other, command_queue &queue = system::default_queue()) : m_size(other.size()), m_allocator(queue.get_context()) { m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); if(!other.empty()){ ::boost::compute::copy(other.begin(), other.end(), begin(), queue); queue.finish(); } } /// Creates a new vector and copies the values from \p vector. template<class OtherAlloc> vector(const std::vector<T, OtherAlloc> &vector, command_queue &queue = system::default_queue()) : m_size(vector.size()), m_allocator(queue.get_context()) { m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); ::boost::compute::copy(vector.begin(), vector.end(), begin(), queue); } #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST vector(std::initializer_list<T> list, command_queue &queue = system::default_queue()) : m_size(list.size()), m_allocator(queue.get_context()) { m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); ::boost::compute::copy(list.begin(), list.end(), begin(), queue); } #endif // BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST vector& operator=(const vector &other) { if(this != &other){ command_queue queue = default_queue(); resize(other.size(), queue); ::boost::compute::copy(other.begin(), other.end(), begin(), queue); queue.finish(); } return *this; } template<class OtherAlloc> vector& operator=(const vector<T, OtherAlloc> &other) { command_queue queue = default_queue(); resize(other.size(), queue); ::boost::compute::copy(other.begin(), other.end(), begin(), queue); queue.finish(); return *this; } template<class OtherAlloc> vector& operator=(const std::vector<T, OtherAlloc> &vector) { command_queue queue = default_queue(); resize(vector.size(), queue); ::boost::compute::copy(vector.begin(), vector.end(), begin(), queue); queue.finish(); return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new vector from \p other. vector(vector&& other) : m_data(std::move(other.m_data)), m_size(other.m_size), m_allocator(std::move(other.m_allocator)) { other.m_size = 0; } /// Move-assigns the data from \p other to \c *this. vector& operator=(vector&& other) { if(capacity() > 0){ m_allocator.deallocate(m_data, capacity()); } m_data = std::move(other.m_data); m_size = other.m_size; m_allocator = std::move(other.m_allocator); other.m_size = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the vector object. ~vector() { if(capacity() > 0){ m_allocator.deallocate(m_data, capacity()); } } iterator begin() { return ::boost::compute::make_buffer_iterator<T>(m_data.get_buffer(), 0); } const_iterator begin() const { return ::boost::compute::make_buffer_iterator<T>(m_data.get_buffer(), 0); } const_iterator cbegin() const { return begin(); } iterator end() { return ::boost::compute::make_buffer_iterator<T>(m_data.get_buffer(), m_size); } const_iterator end() const { return ::boost::compute::make_buffer_iterator<T>(m_data.get_buffer(), m_size); } const_iterator cend() const { return end(); } reverse_iterator rbegin() { return reverse_iterator(end() - 1); } const_reverse_iterator rbegin() const { return reverse_iterator(end() - 1); } const_reverse_iterator crbegin() const { return rbegin(); } reverse_iterator rend() { return reverse_iterator(begin() - 1); } const_reverse_iterator rend() const { return reverse_iterator(begin() - 1); } const_reverse_iterator crend() const { return rend(); } /// Returns the number of elements in the vector. size_type size() const { return m_size; } size_type max_size() const { return m_allocator.max_size(); } /// Resizes the vector to \p size. void resize(size_type size, command_queue &queue) { if(size <= capacity()){ m_size = size; } else { // allocate new buffer pointer new_data = m_allocator.allocate( static_cast<size_type>( static_cast<float>(size) * _growth_factor() ) ); if(capacity() > 0) { // copy old values to the new buffer ::boost::compute::copy(m_data, m_data + m_size, new_data, queue); // free old memory m_allocator.deallocate(m_data, capacity()); } // set new data and size m_data = new_data; m_size = size; } } /// \overload void resize(size_type size) { command_queue queue = default_queue(); resize(size, queue); queue.finish(); } /// Returns \c true if the vector is empty. bool empty() const { return m_size == 0; } /// Returns the capacity of the vector. size_type capacity() const { if(m_data == pointer()) // null pointer check { return 0; } return m_data.get_buffer().size() / sizeof(T); } void reserve(size_type size, command_queue &queue) { if(size > max_size()){ throw std::length_error("vector::reserve"); } if(capacity() < size){ // allocate new buffer pointer new_data = m_allocator.allocate( static_cast<size_type>( static_cast<float>(size) * _growth_factor() ) ); if(capacity() > 0) { // copy old values to the new buffer ::boost::compute::copy(m_data, m_data + m_size, new_data, queue); // free old memory m_allocator.deallocate(m_data, capacity()); } // set new data m_data = new_data; } } void reserve(size_type size) { command_queue queue = default_queue(); reserve(size, queue); queue.finish(); } void shrink_to_fit(command_queue &queue) { pointer old_data = m_data; m_data = pointer(); // null pointer if(m_size > 0) { // allocate new buffer m_data = m_allocator.allocate(m_size); // copy old values to the new buffer ::boost::compute::copy(old_data, old_data + m_size, m_data, queue); } if(capacity() > 0) { // free old memory m_allocator.deallocate(old_data, capacity()); } } void shrink_to_fit() { command_queue queue = default_queue(); shrink_to_fit(queue); queue.finish(); } reference operator[](size_type index) { return *(begin() + static_cast<difference_type>(index)); } const_reference operator[](size_type index) const { return *(begin() + static_cast<difference_type>(index)); } reference at(size_type index) { if(index >= size()){ BOOST_THROW_EXCEPTION(std::out_of_range("index out of range")); } return operator[](index); } const_reference at(size_type index) const { if(index >= size()){ BOOST_THROW_EXCEPTION(std::out_of_range("index out of range")); } return operator[](index); } reference front() { return *begin(); } const_reference front() const { return *begin(); } reference back() { return *(end() - static_cast<difference_type>(1)); } const_reference back() const { return *(end() - static_cast<difference_type>(1)); } template<class InputIterator> void assign(InputIterator first, InputIterator last, command_queue &queue) { // resize vector for new contents resize(detail::iterator_range_size(first, last), queue); // copy values into the vector ::boost::compute::copy(first, last, begin(), queue); } template<class InputIterator> void assign(InputIterator first, InputIterator last) { command_queue queue = default_queue(); assign(first, last, queue); queue.finish(); } void assign(size_type n, const T &value, command_queue &queue) { // resize vector for new contents resize(n, queue); // fill vector with value ::boost::compute::fill_n(begin(), n, value, queue); } void assign(size_type n, const T &value) { command_queue queue = default_queue(); assign(n, value, queue); queue.finish(); } /// Inserts \p value at the end of the vector (resizing if neccessary). /// /// Note that calling \c push_back() to insert data values one at a time /// is inefficient as there is a non-trivial overhead in performing a data /// transfer to the device. It is usually better to store a set of values /// on the host (for example, in a \c std::vector) and then transfer them /// in bulk using the \c insert() method or the copy() algorithm. void push_back(const T &value, command_queue &queue) { insert(end(), value, queue); } /// \overload void push_back(const T &value) { command_queue queue = default_queue(); push_back(value, queue); queue.finish(); } void pop_back(command_queue &queue) { resize(size() - 1, queue); } void pop_back() { command_queue queue = default_queue(); pop_back(queue); queue.finish(); } iterator insert(iterator position, const T &value, command_queue &queue) { if(position == end()){ resize(m_size + 1, queue); position = begin() + position.get_index(); ::boost::compute::copy_n(&value, 1, position, queue); } else { ::boost::compute::vector<T, Alloc> tmp(position, end(), queue); resize(m_size + 1, queue); position = begin() + position.get_index(); ::boost::compute::copy_n(&value, 1, position, queue); ::boost::compute::copy(tmp.begin(), tmp.end(), position + 1, queue); } return position + 1; } iterator insert(iterator position, const T &value) { command_queue queue = default_queue(); iterator iter = insert(position, value, queue); queue.finish(); return iter; } void insert(iterator position, size_type count, const T &value, command_queue &queue) { ::boost::compute::vector<T, Alloc> tmp(position, end(), queue); resize(size() + count, queue); position = begin() + position.get_index(); ::boost::compute::fill_n(position, count, value, queue); ::boost::compute::copy( tmp.begin(), tmp.end(), position + static_cast<difference_type>(count), queue ); } void insert(iterator position, size_type count, const T &value) { command_queue queue = default_queue(); insert(position, count, value, queue); queue.finish(); } /// Inserts the values in the range [\p first, \p last) into the vector at /// \p position using \p queue. template<class InputIterator> void insert(iterator position, InputIterator first, InputIterator last, command_queue &queue) { ::boost::compute::vector<T, Alloc> tmp(position, end(), queue); size_type count = detail::iterator_range_size(first, last); resize(size() + count, queue); position = begin() + position.get_index(); ::boost::compute::copy(first, last, position, queue); ::boost::compute::copy( tmp.begin(), tmp.end(), position + static_cast<difference_type>(count), queue ); } /// \overload template<class InputIterator> void insert(iterator position, InputIterator first, InputIterator last) { command_queue queue = default_queue(); insert(position, first, last, queue); queue.finish(); } iterator erase(iterator position, command_queue &queue) { return erase(position, position + 1, queue); } iterator erase(iterator position) { command_queue queue = default_queue(); iterator iter = erase(position, queue); queue.finish(); return iter; } iterator erase(iterator first, iterator last, command_queue &queue) { if(last != end()){ ::boost::compute::vector<T, Alloc> tmp(last, end(), queue); ::boost::compute::copy(tmp.begin(), tmp.end(), first, queue); } difference_type count = std::distance(first, last); resize(size() - static_cast<size_type>(count), queue); return begin() + first.get_index() + count; } iterator erase(iterator first, iterator last) { command_queue queue = default_queue(); iterator iter = erase(first, last, queue); queue.finish(); return iter; } /// Swaps the contents of \c *this with \p other. void swap(vector &other) { std::swap(m_data, other.m_data); std::swap(m_size, other.m_size); std::swap(m_allocator, other.m_allocator); } /// Removes all elements from the vector. void clear() { m_size = 0; } allocator_type get_allocator() const { return m_allocator; } /// Returns the underlying buffer. const buffer& get_buffer() const { return m_data.get_buffer(); } /// \internal_ /// /// Returns a command queue usable to issue commands for the vector's /// memory buffer. This is used when a member function is called without /// specifying an existing command queue to use. command_queue default_queue() const { const context &context = m_allocator.get_context(); command_queue queue(context, context.get_device()); return queue; } private: /// \internal_ BOOST_CONSTEXPR size_type _minimum_capacity() const { return 4; } /// \internal_ BOOST_CONSTEXPR float _growth_factor() const { return 1.5; } private: pointer m_data; size_type m_size; allocator_type m_allocator; }; namespace detail { // set_kernel_arg specialization for vector<T> template<class T, class Alloc> struct set_kernel_arg<vector<T, Alloc> > { void operator()(kernel &kernel_, size_t index, const vector<T, Alloc> &vector) { kernel_.set_arg(index, vector.get_buffer()); } }; // for capturing vector<T> with BOOST_COMPUTE_CLOSURE() template<class T, class Alloc> struct capture_traits<vector<T, Alloc> > { static std::string type_name() { return std::string("__global ") + ::boost::compute::type_name<T>() + "*"; } }; // meta_kernel streaming operator for vector<T> template<class T, class Alloc> meta_kernel& operator<<(meta_kernel &k, const vector<T, Alloc> &vector) { return k << k.get_buffer_identifier<T>(vector.get_buffer()); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_VECTOR_HPP container/mapped_view.hpp 0000644 00000015376 15125510617 0011556 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_MAPPED_VIEW_HPP #define BOOST_COMPUTE_CONTAINER_MAPPED_VIEW_HPP #include <cstddef> #include <exception> #include <boost/config.hpp> #include <boost/throw_exception.hpp> #include <boost/compute/buffer.hpp> #include <boost/compute/system.hpp> #include <boost/compute/context.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> namespace boost { namespace compute { /// \class mapped_view /// \brief A mapped view of host memory. /// /// The mapped_view class simplifies mapping host-memory to a compute /// device. This allows for host-allocated memory to be used with the /// Boost.Compute algorithms. /// /// The following example shows how to map a simple C-array containing /// data on the host to the device and run the reduce() algorithm to /// calculate the sum: /// /// \snippet test/test_mapped_view.cpp reduce /// /// \see buffer template<class T> class mapped_view { public: typedef T value_type; typedef size_t size_type; typedef ptrdiff_t difference_type; typedef buffer_iterator<T> iterator; typedef buffer_iterator<T> const_iterator; /// Creates a null mapped_view object. mapped_view() { m_mapped_ptr = 0; } /// Creates a mapped_view for \p host_ptr with \p n elements. After /// constructing a mapped_view the data is available for use by a /// compute device. Use the \p unmap() method to make the updated data /// available to the host. mapped_view(T *host_ptr, size_type n, const context &context = system::default_context()) : m_buffer(_make_mapped_buffer(host_ptr, n, context)) { m_mapped_ptr = 0; } /// Creates a read-only mapped_view for \p host_ptr with \p n elements. /// After constructing a mapped_view the data is available for use by a /// compute device. Use the \p unmap() method to make the updated data /// available to the host. mapped_view(const T *host_ptr, size_type n, const context &context = system::default_context()) : m_buffer(_make_mapped_buffer(host_ptr, n, context)) { m_mapped_ptr = 0; } /// Creates a copy of \p other. mapped_view(const mapped_view<T> &other) : m_buffer(other.m_buffer) { m_mapped_ptr = 0; } /// Copies the mapped buffer from \p other. mapped_view<T>& operator=(const mapped_view<T> &other) { if(this != &other){ m_buffer = other.m_buffer; m_mapped_ptr = 0; } return *this; } /// Destroys the mapped_view object. ~mapped_view() { } /// Returns an iterator to the first element in the mapped_view. iterator begin() { return ::boost::compute::make_buffer_iterator<T>(m_buffer, 0); } /// Returns a const_iterator to the first element in the mapped_view. const_iterator begin() const { return ::boost::compute::make_buffer_iterator<T>(m_buffer, 0); } /// Returns a const_iterator to the first element in the mapped_view. const_iterator cbegin() const { return begin(); } /// Returns an iterator to one past the last element in the mapped_view. iterator end() { return ::boost::compute::make_buffer_iterator<T>(m_buffer, size()); } /// Returns a const_iterator to one past the last element in the mapped_view. const_iterator end() const { return ::boost::compute::make_buffer_iterator<T>(m_buffer, size()); } /// Returns a const_iterator to one past the last element in the mapped_view. const_iterator cend() const { return end(); } /// Returns the number of elements in the mapped_view. size_type size() const { return m_buffer.size() / sizeof(T); } /// Returns the host data pointer. T* get_host_ptr() { return static_cast<T *>(m_buffer.get_info<void *>(CL_MEM_HOST_PTR)); } /// Returns the host data pointer. const T* get_host_ptr() const { return static_cast<T *>(m_buffer.get_info<void *>(CL_MEM_HOST_PTR)); } /// Resizes the mapped_view to \p size elements. void resize(size_type size) { T *old_ptr = get_host_ptr(); m_buffer = _make_mapped_buffer(old_ptr, size, m_buffer.get_context()); } /// Returns \c true if the mapped_view is empty. bool empty() const { return size() == 0; } /// Returns the mapped buffer. const buffer& get_buffer() const { return m_buffer; } /// Maps the buffer into the host address space. /// /// \see_opencl_ref{clEnqueueMapBuffer} void map(cl_map_flags flags, command_queue &queue) { BOOST_ASSERT(m_mapped_ptr == 0); m_mapped_ptr = queue.enqueue_map_buffer( m_buffer, flags, 0, m_buffer.size() ); } /// Maps the buffer into the host address space for reading and writing. /// /// Equivalent to: /// \code /// map(CL_MAP_READ | CL_MAP_WRITE, queue); /// \endcode void map(command_queue &queue) { map(CL_MAP_READ | CL_MAP_WRITE, queue); } /// Unmaps the buffer from the host address space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} void unmap(command_queue &queue) { BOOST_ASSERT(m_mapped_ptr != 0); queue.enqueue_unmap_buffer(m_buffer, m_mapped_ptr); m_mapped_ptr = 0; } private: /// \internal_ static buffer _make_mapped_buffer(T *host_ptr, size_t n, const context &context) { return buffer( context, n * sizeof(T), buffer::read_write | buffer::use_host_ptr, host_ptr ); } /// \internal_ static buffer _make_mapped_buffer(const T *host_ptr, size_t n, const context &context) { return buffer( context, n * sizeof(T), buffer::read_only | buffer::use_host_ptr, const_cast<void *>(static_cast<const void *>(host_ptr)) ); } private: buffer m_buffer; void *m_mapped_ptr; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_MAPPED_VIEW_HPP container/detail/scalar.hpp 0000644 00000002743 15125510617 0011757 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_DETAIL_SCALAR_HPP #define BOOST_COMPUTE_CONTAINER_DETAIL_SCALAR_HPP #include <boost/compute/buffer.hpp> #include <boost/compute/event.hpp> #include <boost/compute/detail/read_write_single_value.hpp> namespace boost { namespace compute { namespace detail { // scalar<T> provides a trivial "container" that stores a // single value in a memory buffer on a compute device template<class T> class scalar { public: typedef T value_type; scalar(const context &context) : m_buffer(context, sizeof(T)) { } ~scalar() { } T read(command_queue &queue) const { return read_single_value<T>(m_buffer, 0, queue); } event write(const T &value, command_queue &queue) { return write_single_value<T>(value, m_buffer, 0, queue); } const buffer& get_buffer() const { return m_buffer; } private: buffer m_buffer; }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_DETAIL_SCALAR_HPP container/valarray.hpp 0000644 00000040500 15125510617 0011062 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_VALARRAY_HPP #define BOOST_COMPUTE_CONTAINER_VALARRAY_HPP #include <cstddef> #include <valarray> #include <boost/static_assert.hpp> #include <boost/type_traits.hpp> #include <boost/compute/buffer.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/algorithm/fill.hpp> #include <boost/compute/algorithm/max_element.hpp> #include <boost/compute/algorithm/min_element.hpp> #include <boost/compute/algorithm/transform.hpp> #include <boost/compute/algorithm/accumulate.hpp> #include <boost/compute/detail/buffer_value.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/functional/bind.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/type_traits.hpp> namespace boost { namespace compute { template<class T> class valarray { public: explicit valarray(const context &context = system::default_context()) : m_buffer(context, 0) { } explicit valarray(size_t size, const context &context = system::default_context()) : m_buffer(context, size * sizeof(T)) { } valarray(const T &value, size_t size, const context &context = system::default_context()) : m_buffer(context, size * sizeof(T)) { fill(begin(), end(), value); } valarray(const T *values, size_t size, const context &context = system::default_context()) : m_buffer(context, size * sizeof(T)) { copy(values, values + size, begin()); } valarray(const valarray<T> &other) : m_buffer(other.m_buffer.get_context(), other.size() * sizeof(T)) { copy(other.begin(), other.end(), begin()); } valarray(const std::valarray<T> &valarray, const context &context = system::default_context()) : m_buffer(context, valarray.size() * sizeof(T)) { copy(&valarray[0], &valarray[valarray.size()], begin()); } valarray<T>& operator=(const valarray<T> &other) { if(this != &other){ // change to other's OpenCL context m_buffer = buffer(other.m_buffer.get_context(), other.size() * sizeof(T)); copy(other.begin(), other.end(), begin()); } return *this; } valarray<T>& operator=(const std::valarray<T> &valarray) { m_buffer = buffer(m_buffer.get_context(), valarray.size() * sizeof(T)); copy(&valarray[0], &valarray[valarray.size()], begin()); return *this; } valarray<T>& operator*=(const T&); valarray<T>& operator/=(const T&); valarray<T>& operator%=(const T& val); valarray<T> operator+() const { // This operator can be used with any type. valarray<T> result(size()); copy(begin(), end(), result.begin()); return result; } valarray<T> operator-() const { BOOST_STATIC_ASSERT_MSG( is_fundamental<T>::value, "This operator can be used with all OpenCL built-in scalar" " and vector types" ); valarray<T> result(size()); BOOST_COMPUTE_FUNCTION(T, unary_minus, (T x), { return -x; }); transform(begin(), end(), result.begin(), unary_minus); return result; } valarray<T> operator~() const { BOOST_STATIC_ASSERT_MSG( is_fundamental<T>::value && !is_floating_point<typename scalar_type<T>::type>::value, "This operator can be used with all OpenCL built-in scalar" " and vector types except the built-in scalar and vector float types" ); valarray<T> result(size()); BOOST_COMPUTE_FUNCTION(T, bitwise_not, (T x), { return ~x; }); transform(begin(), end(), result.begin(), bitwise_not); return result; } /// In OpenCL there cannot be memory buffer with bool type, for /// this reason return type is valarray<char> instead of valarray<bool>. /// 1 means true, 0 means false. valarray<char> operator!() const { BOOST_STATIC_ASSERT_MSG( is_fundamental<T>::value, "This operator can be used with all OpenCL built-in scalar" " and vector types" ); valarray<char> result(size()); BOOST_COMPUTE_FUNCTION(char, logical_not, (T x), { return !x; }); transform(begin(), end(), &result[0], logical_not); return result; } valarray<T>& operator+=(const T&); valarray<T>& operator-=(const T&); valarray<T>& operator^=(const T&); valarray<T>& operator&=(const T&); valarray<T>& operator|=(const T&); valarray<T>& operator<<=(const T&); valarray<T>& operator>>=(const T&); valarray<T>& operator*=(const valarray<T>&); valarray<T>& operator/=(const valarray<T>&); valarray<T>& operator%=(const valarray<T>&); valarray<T>& operator+=(const valarray<T>&); valarray<T>& operator-=(const valarray<T>&); valarray<T>& operator^=(const valarray<T>&); valarray<T>& operator&=(const valarray<T>&); valarray<T>& operator|=(const valarray<T>&); valarray<T>& operator<<=(const valarray<T>&); valarray<T>& operator>>=(const valarray<T>&); ~valarray() { } size_t size() const { return m_buffer.size() / sizeof(T); } void resize(size_t size, T value = T()) { m_buffer = buffer(m_buffer.get_context(), size * sizeof(T)); fill(begin(), end(), value); } detail::buffer_value<T> operator[](size_t index) { return *(begin() + static_cast<ptrdiff_t>(index)); } const detail::buffer_value<T> operator[](size_t index) const { return *(begin() + static_cast<ptrdiff_t>(index)); } T (min)() const { return *(boost::compute::min_element(begin(), end())); } T (max)() const { return *(boost::compute::max_element(begin(), end())); } T sum() const { return boost::compute::accumulate(begin(), end(), T(0)); } template<class UnaryFunction> valarray<T> apply(UnaryFunction function) const { valarray<T> result(size()); transform(begin(), end(), result.begin(), function); return result; } const buffer& get_buffer() const { return m_buffer; } private: buffer_iterator<T> begin() const { return buffer_iterator<T>(m_buffer, 0); } buffer_iterator<T> end() const { return buffer_iterator<T>(m_buffer, size()); } private: buffer m_buffer; }; /// \internal_ #define BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(op, op_name, assert) \ template<class T> \ inline valarray<T>& \ valarray<T>::operator op##=(const T& val) \ { \ assert \ transform(begin(), end(), begin(), \ ::boost::compute::bind(op_name<T>(), placeholders::_1, val)); \ return *this; \ } \ \ template<class T> \ inline valarray<T>& \ valarray<T>::operator op##=(const valarray<T> &rhs) \ { \ assert \ transform(begin(), end(), rhs.begin(), begin(), op_name<T>()); \ return *this; \ } /// \internal_ #define BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(op, op_name) \ BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(op, op_name, \ BOOST_STATIC_ASSERT_MSG( \ is_fundamental<T>::value, \ "This operator can be used with all OpenCL built-in scalar" \ " and vector types" \ ); \ ) /// \internal_ /// For some operators class T can't be floating point type. /// See OpenCL specification, operators chapter. #define BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(op, op_name) \ BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(op, op_name, \ BOOST_STATIC_ASSERT_MSG( \ is_fundamental<T>::value && \ !is_floating_point<typename scalar_type<T>::type>::value, \ "This operator can be used with all OpenCL built-in scalar" \ " and vector types except the built-in scalar and vector float types" \ ); \ ) // defining operators BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(+, plus) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(-, minus) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(*, multiplies) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(/, divides) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(^, bit_xor) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(&, bit_and) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(|, bit_or) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(<<, shift_left) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(>>, shift_right) // The remainder (%) operates on // integer scalar and integer vector data types only. // See OpenCL specification. BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(%, modulus, BOOST_STATIC_ASSERT_MSG( is_integral<typename scalar_type<T>::type>::value, "This operator can be used only with OpenCL built-in integer types" ); ) #undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY #undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP #undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT /// \internal_ /// Macro for defining binary operators for valarray #define BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR(op, op_name, assert) \ template<class T> \ valarray<T> operator op (const valarray<T>& lhs, const valarray<T>& rhs) \ { \ assert \ valarray<T> result(lhs.size()); \ transform(buffer_iterator<T>(lhs.get_buffer(), 0), \ buffer_iterator<T>(lhs.get_buffer(), lhs.size()), \ buffer_iterator<T>(rhs.get_buffer(), 0), \ buffer_iterator<T>(result.get_buffer(), 0), \ op_name<T>()); \ return result; \ } \ \ template<class T> \ valarray<T> operator op (const T& val, const valarray<T>& rhs) \ { \ assert \ valarray<T> result(rhs.size()); \ transform(buffer_iterator<T>(rhs.get_buffer(), 0), \ buffer_iterator<T>(rhs.get_buffer(), rhs.size()), \ buffer_iterator<T>(result.get_buffer(), 0), \ ::boost::compute::bind(op_name<T>(), val, placeholders::_1)); \ return result; \ } \ \ template<class T> \ valarray<T> operator op (const valarray<T>& lhs, const T& val) \ { \ assert \ valarray<T> result(lhs.size()); \ transform(buffer_iterator<T>(lhs.get_buffer(), 0), \ buffer_iterator<T>(lhs.get_buffer(), lhs.size()), \ buffer_iterator<T>(result.get_buffer(), 0), \ ::boost::compute::bind(op_name<T>(), placeholders::_1, val)); \ return result; \ } /// \internal_ #define BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(op, op_name) \ BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR(op, op_name, \ BOOST_STATIC_ASSERT_MSG( \ is_fundamental<T>::value, \ "This operator can be used with all OpenCL built-in scalar" \ " and vector types" \ ); \ ) /// \internal_ /// For some operators class T can't be floating point type. /// See OpenCL specification, operators chapter. #define BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(op, op_name) \ BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR(op, op_name, \ BOOST_STATIC_ASSERT_MSG( \ is_fundamental<T>::value && \ !is_floating_point<typename scalar_type<T>::type>::value, \ "This operator can be used with all OpenCL built-in scalar" \ " and vector types except the built-in scalar and vector float types" \ ); \ ) // defining binary operators for valarray BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(+, plus) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(-, minus) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(*, multiplies) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(/, divides) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(^, bit_xor) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(&, bit_and) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(|, bit_or) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(<<, shift_left) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(>>, shift_right) #undef BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY #undef BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP #undef BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR /// \internal_ /// Macro for defining valarray comparison operators. /// For return type valarray<char> is used instead of valarray<bool> because /// in OpenCL there cannot be memory buffer with bool type. /// /// Note it's also used for defining binary logical operators (==, &&) #define BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(op, op_name) \ template<class T> \ valarray<char> operator op (const valarray<T>& lhs, const valarray<T>& rhs) \ { \ BOOST_STATIC_ASSERT_MSG( \ is_fundamental<T>::value, \ "This operator can be used with all OpenCL built-in scalar" \ " and vector types" \ ); \ valarray<char> result(lhs.size()); \ transform(buffer_iterator<T>(lhs.get_buffer(), 0), \ buffer_iterator<T>(lhs.get_buffer(), lhs.size()), \ buffer_iterator<T>(rhs.get_buffer(), 0), \ buffer_iterator<char>(result.get_buffer(), 0), \ op_name<T>()); \ return result; \ } \ \ template<class T> \ valarray<char> operator op (const T& val, const valarray<T>& rhs) \ { \ BOOST_STATIC_ASSERT_MSG( \ is_fundamental<T>::value, \ "This operator can be used with all OpenCL built-in scalar" \ " and vector types" \ ); \ valarray<char> result(rhs.size()); \ transform(buffer_iterator<T>(rhs.get_buffer(), 0), \ buffer_iterator<T>(rhs.get_buffer(), rhs.size()), \ buffer_iterator<char>(result.get_buffer(), 0), \ ::boost::compute::bind(op_name<T>(), val, placeholders::_1)); \ return result; \ } \ \ template<class T> \ valarray<char> operator op (const valarray<T>& lhs, const T& val) \ { \ BOOST_STATIC_ASSERT_MSG( \ is_fundamental<T>::value, \ "This operator can be used with all OpenCL built-in scalar" \ " and vector types" \ ); \ valarray<char> result(lhs.size()); \ transform(buffer_iterator<T>(lhs.get_buffer(), 0), \ buffer_iterator<T>(lhs.get_buffer(), lhs.size()), \ buffer_iterator<char>(result.get_buffer(), 0), \ ::boost::compute::bind(op_name<T>(), placeholders::_1, val)); \ return result; \ } BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(==, equal_to) BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(!=, not_equal_to) BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(>, greater) BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(<, less) BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(>=, greater_equal) BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(<=, less_equal) /// \internal_ /// Macro for defining binary logical operators for valarray. /// /// For return type valarray<char> is used instead of valarray<bool> because /// in OpenCL there cannot be memory buffer with bool type. /// 1 means true, 0 means false. #define BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR(op, op_name) \ BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(op, op_name) BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR(&&, logical_and) BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR(||, logical_or) #undef BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR #undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_VALARRAY_HPP container/dynamic_bitset.hpp 0000644 00000015304 15125510617 0012243 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_DYNAMIC_BITSET_HPP #define BOOST_COMPUTE_CONTAINER_DYNAMIC_BITSET_HPP #include <boost/compute/lambda.hpp> #include <boost/compute/algorithm/any_of.hpp> #include <boost/compute/algorithm/fill.hpp> #include <boost/compute/algorithm/transform_reduce.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/functional/integer.hpp> #include <boost/compute/types/fundamental.hpp> namespace boost { namespace compute { /// \class dynamic_bitset /// \brief The dynamic_bitset class contains a resizable bit array. /// /// For example, to create a dynamic-bitset with space for 1000 bits on the /// device: /// \code /// boost::compute::dynamic_bitset<> bits(1000, queue); /// \endcode /// /// The Boost.Compute \c dynamic_bitset class provides a STL-like API and is /// modeled after the \c boost::dynamic_bitset class from Boost. /// /// \see \ref vector "vector<T>" template<class Block = ulong_, class Alloc = buffer_allocator<Block> > class dynamic_bitset { public: typedef Block block_type; typedef Alloc allocator_type; typedef vector<Block, Alloc> container_type; typedef typename container_type::size_type size_type; BOOST_STATIC_CONSTANT(size_type, bits_per_block = sizeof(block_type) * CHAR_BIT); BOOST_STATIC_CONSTANT(size_type, npos = static_cast<size_type>(-1)); /// Creates a new dynamic bitset with storage for \p size bits. Initializes /// all bits to zero. dynamic_bitset(size_type size, command_queue &queue) : m_bits(size / sizeof(block_type), queue.get_context()), m_size(size) { // initialize all bits to zero reset(queue); } /// Creates a new dynamic bitset as a copy of \p other. dynamic_bitset(const dynamic_bitset &other) : m_bits(other.m_bits), m_size(other.m_size) { } /// Copies the data from \p other to \c *this. dynamic_bitset& operator=(const dynamic_bitset &other) { if(this != &other){ m_bits = other.m_bits; m_size = other.m_size; } return *this; } /// Destroys the dynamic bitset. ~dynamic_bitset() { } /// Returns the size of the dynamic bitset. size_type size() const { return m_size; } /// Returns the number of blocks to store the bits in the dynamic bitset. size_type num_blocks() const { return m_bits.size(); } /// Returns the maximum possible size for the dynamic bitset. size_type max_size() const { return m_bits.max_size() * bits_per_block; } /// Returns \c true if the dynamic bitset is empty (i.e. \c size() == \c 0). bool empty() const { return size() == 0; } /// Returns the number of set bits (i.e. '1') in the bitset. size_type count(command_queue &queue) const { ulong_ count = 0; transform_reduce( m_bits.begin(), m_bits.end(), &count, popcount<block_type>(), plus<ulong_>(), queue ); return static_cast<size_type>(count); } /// Resizes the bitset to contain \p num_bits. If the new size is greater /// than the current size the new bits are set to zero. void resize(size_type num_bits, command_queue &queue) { // resize bits const size_type current_block_count = m_bits.size(); m_bits.resize(num_bits * bits_per_block, queue); // fill new block with zeros (if new blocks were added) const size_type new_block_count = m_bits.size(); if(new_block_count > current_block_count){ fill_n( m_bits.begin() + current_block_count, new_block_count - current_block_count, block_type(0), queue ); } // store new size m_size = num_bits; } /// Sets the bit at position \p n to \c true. void set(size_type n, command_queue &queue) { set(n, true, queue); } /// Sets the bit at position \p n to \p value. void set(size_type n, bool value, command_queue &queue) { const size_type bit = n % bits_per_block; const size_type block = n / bits_per_block; // load current block block_type block_value; copy_n(m_bits.begin() + block, 1, &block_value, queue); // update block value if(value){ block_value |= (size_type(1) << bit); } else { block_value &= ~(size_type(1) << bit); } // store new block copy_n(&block_value, 1, m_bits.begin() + block, queue); } /// Returns \c true if the bit at position \p n is set (i.e. '1'). bool test(size_type n, command_queue &queue) { const size_type bit = n % (sizeof(block_type) * CHAR_BIT); const size_type block = n / (sizeof(block_type) * CHAR_BIT); block_type block_value; copy_n(m_bits.begin() + block, 1, &block_value, queue); return block_value & (size_type(1) << bit); } /// Flips the value of the bit at position \p n. void flip(size_type n, command_queue &queue) { set(n, !test(n, queue), queue); } /// Returns \c true if any bit in the bitset is set (i.e. '1'). bool any(command_queue &queue) const { return any_of( m_bits.begin(), m_bits.end(), lambda::_1 != block_type(0), queue ); } /// Returns \c true if all of the bits in the bitset are set to zero. bool none(command_queue &queue) const { return !any(queue); } /// Sets all of the bits in the bitset to zero. void reset(command_queue &queue) { fill(m_bits.begin(), m_bits.end(), block_type(0), queue); } /// Sets the bit at position \p n to zero. void reset(size_type n, command_queue &queue) { set(n, false, queue); } /// Empties the bitset (e.g. \c resize(0)). void clear() { m_bits.clear(); } /// Returns the allocator used to allocate storage for the bitset. allocator_type get_allocator() const { return m_bits.get_allocator(); } private: container_type m_bits; size_type m_size; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_DYNAMIC_BITSET_HPP user_event.hpp 0000644 00000004556 15125510617 0007451 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_USER_EVENT_HPP #define BOOST_COMPUTE_USER_EVENT_HPP #include <boost/compute/event.hpp> #include <boost/compute/context.hpp> namespace boost { namespace compute { #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// \class user_event /// \brief An user-created event. /// /// \opencl_version_warning{1,1} /// /// \see event class user_event : public event { public: /// Creates a new user-event object. /// /// \see_opencl_ref{clCreateUserEvent} explicit user_event(const context &context) { cl_int error; m_event = clCreateUserEvent(context.get(), &error); if(!m_event){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new user-event from \p other. user_event(const user_event &other) : event(other) { } /// Copies the user-event from \p other to \c *this. user_event& operator=(const user_event &other) { event::operator=(other); return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new user event object from \p other. user_event(user_event&& other) BOOST_NOEXCEPT : event(std::move(other)) { } /// Move-assigns the user event from \p other to \c *this. user_event& operator=(user_event&& other) BOOST_NOEXCEPT { event::operator=(std::move(other)); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Sets the execution status for the user-event. /// /// \see_opencl_ref{clSetUserEventStatus} void set_status(cl_int execution_status) { cl_int ret = clSetUserEventStatus(m_event, execution_status); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } }; #endif // BOOST_COMPUTE_CL_VERSION_1_1 } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EVENT_HPP functional.hpp 0000644 00000002477 15125510617 0007434 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_HPP #define BOOST_COMPUTE_FUNCTIONAL_HPP /// \file /// /// Meta-header to include all Boost.Compute functional headers. #include <boost/compute/functional/as.hpp> #include <boost/compute/functional/atomic.hpp> #include <boost/compute/functional/common.hpp> #include <boost/compute/functional/convert.hpp> #include <boost/compute/functional/field.hpp> #include <boost/compute/functional/geometry.hpp> #include <boost/compute/functional/get.hpp> #include <boost/compute/functional/hash.hpp> #include <boost/compute/functional/identity.hpp> #include <boost/compute/functional/integer.hpp> #include <boost/compute/functional/logical.hpp> #include <boost/compute/functional/math.hpp> #include <boost/compute/functional/operator.hpp> #include <boost/compute/functional/popcount.hpp> #include <boost/compute/functional/relational.hpp> #endif // BOOST_COMPUTE_FUNCTIONAL_HPP types/fundamental.hpp 0000644 00000013532 15125510617 0010726 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPES_FUNDAMENTAL_HPP #define BOOST_COMPUTE_TYPES_FUNDAMENTAL_HPP #include <cstring> #include <ostream> #include <boost/preprocessor/cat.hpp> #include <boost/preprocessor/comma.hpp> #include <boost/preprocessor/repetition.hpp> #include <boost/preprocessor/stringize.hpp> #include <boost/compute/cl.hpp> namespace boost { namespace compute { // scalar data types typedef cl_char char_; typedef cl_uchar uchar_; typedef cl_short short_; typedef cl_ushort ushort_; typedef cl_int int_; typedef cl_uint uint_; typedef cl_long long_; typedef cl_ulong ulong_; typedef cl_float float_; typedef cl_double double_; // converts uchar to ::boost::compute::uchar_ #define BOOST_COMPUTE_MAKE_SCALAR_TYPE(scalar) \ BOOST_PP_CAT(::boost::compute::scalar, _) // converts float, 4 to ::boost::compute::float4_ #define BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) \ BOOST_PP_CAT(BOOST_PP_CAT(::boost::compute::scalar, size), _) namespace detail { // specialized vector_type base classes that provide the // (x,y), (x,y,z,w), (s0..s7), (s0..sf) accessors template<class Scalar, size_t N> class vector_type_desc; template<class Scalar> class vector_type_desc<Scalar, 2> { public: Scalar x, y; Scalar& operator[](size_t i) { return (&x)[i]; } const Scalar operator[](size_t i) const { return (&x)[i]; } }; template<class Scalar> class vector_type_desc<Scalar, 4> : public vector_type_desc<Scalar, 2> { public: Scalar z, w; }; template<class Scalar> class vector_type_desc<Scalar, 8> { public: Scalar s0, s1, s2, s3, s4, s5, s6, s7; Scalar& operator[](size_t i) { return (&s0)[i]; } const Scalar operator[](size_t i) const { return (&s0)[i]; } }; template<class Scalar> class vector_type_desc<Scalar, 16> : public vector_type_desc<Scalar, 8> { public: Scalar s8, s9, sa, sb, sc, sd, se, sf; }; } // end detail namespace // vector data types template<class Scalar, size_t N> class vector_type : public detail::vector_type_desc<Scalar, N> { typedef detail::vector_type_desc<Scalar, N> base_type; public: typedef Scalar scalar_type; vector_type() : base_type() { BOOST_STATIC_ASSERT(sizeof(Scalar) * N == sizeof(vector_type<Scalar, N>)); } explicit vector_type(const Scalar scalar) { for(size_t i = 0; i < N; i++) (*this)[i] = scalar; } vector_type(const vector_type<Scalar, N> &other) { std::memcpy(this, &other, sizeof(Scalar) * N); } vector_type<Scalar, N>& operator=(const vector_type<Scalar, N> &other) { std::memcpy(this, &other, sizeof(Scalar) * N); return *this; } size_t size() const { return N; } bool operator==(const vector_type<Scalar, N> &other) const { return std::memcmp(this, &other, sizeof(Scalar) * N) == 0; } bool operator!=(const vector_type<Scalar, N> &other) const { return !(*this == other); } }; #define BOOST_COMPUTE_VECTOR_TYPE_CTOR_ARG_FUNCTION(z, i, _) \ BOOST_PP_COMMA_IF(i) scalar_type BOOST_PP_CAT(arg, i) #define BOOST_COMPUTE_VECTOR_TYPE_DECLARE_CTOR_ARGS(scalar, size) \ BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_CTOR_ARG_FUNCTION, _) #define BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_ARG(z, i, _) \ (*this)[i] = BOOST_PP_CAT(arg, i); #define BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_SINGLE_ARG(z, i, _) \ (*this)[i] = arg; #define BOOST_COMPUTE_DECLARE_VECTOR_TYPE_CLASS(cl_scalar, size, class_name) \ class class_name : public vector_type<cl_scalar, size> \ { \ public: \ class_name() { } \ explicit class_name( scalar_type arg ) \ { \ BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_SINGLE_ARG, _) \ } \ class_name( \ BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_CTOR_ARG_FUNCTION, _) \ ) \ { \ BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_ARG, _) \ } \ }; #define BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, size) \ BOOST_COMPUTE_DECLARE_VECTOR_TYPE_CLASS(BOOST_PP_CAT(cl_, scalar), \ size, \ BOOST_PP_CAT(BOOST_PP_CAT(scalar, size), _)) \ \ inline std::ostream& operator<<( \ std::ostream &s, \ const BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) &v) \ { \ s << BOOST_PP_STRINGIZE(BOOST_PP_CAT(scalar, size)) << "("; \ for(size_t i = 0; i < size; i++){\ s << v[i]; \ if(i != size - 1){\ s << ", "; \ } \ } \ s << ")"; \ return s; \ } #define BOOST_COMPUTE_DECLARE_VECTOR_TYPES(scalar) \ BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 2) \ BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 4) \ BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 8) \ BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 16) \ BOOST_COMPUTE_DECLARE_VECTOR_TYPES(char) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(uchar) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(short) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(ushort) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(int) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(uint) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(long) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(ulong) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(float) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(double) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPES_FUNDAMENTAL_HPP types/builtin.hpp 0000644 00000001055 15125510617 0010073 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // deprecated, use <boost/compute/types/fundamental.hpp> instead #include <boost/compute/types/fundamental.hpp> types/struct.hpp 0000644 00000013121 15125510617 0007746 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPES_STRUCT_HPP #define BOOST_COMPUTE_TYPES_STRUCT_HPP #include <sstream> #include <boost/static_assert.hpp> #include <boost/preprocessor/expr_if.hpp> #include <boost/preprocessor/stringize.hpp> #include <boost/preprocessor/seq/fold_left.hpp> #include <boost/preprocessor/seq/for_each.hpp> #include <boost/preprocessor/seq/transform.hpp> #include <boost/compute/type_traits/type_definition.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/variadic_macros.hpp> namespace boost { namespace compute { namespace detail { template<class Struct, class T> inline std::string adapt_struct_insert_member(T Struct::*, const char *name) { std::stringstream s; s << " " << type_name<T>() << " " << name << ";\n"; return s.str(); } template<class Struct, class T, int N> inline std::string adapt_struct_insert_member(T (Struct::*)[N], const char *name) { std::stringstream s; s << " " << type_name<T>() << " " << name << "[" << N << "]" << ";\n"; return s.str(); } } // end detail namespace } // end compute namespace } // end boost namespace /// \internal_ #define BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_INSERT_MEMBER(r, type, member) \ << ::boost::compute::detail::adapt_struct_insert_member( \ &type::member, BOOST_PP_STRINGIZE(member) \ ) /// \internal_ #define BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_STREAM_MEMBER(r, data, i, elem) \ BOOST_PP_EXPR_IF(i, << ", ") << data.elem /// \internal_ #define BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE(s, struct_, member_) \ sizeof(((struct_ *)0)->member_) /// \internal_ #define BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_ADD(s, x, y) (x+y) /// \internal_ #define BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_SUM(struct_, members_) \ BOOST_PP_SEQ_FOLD_LEFT( \ BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_ADD, \ 0, \ BOOST_PP_SEQ_TRANSFORM( \ BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE, struct_, members_ \ ) \ ) /// \internal_ /// /// Returns true if struct_ contains no internal padding bytes (i.e. it is /// packed). members_ is a sequence of the names of the struct members. #define BOOST_COMPUTE_DETAIL_STRUCT_IS_PACKED(struct_, members_) \ (sizeof(struct_) == BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_SUM(struct_, members_)) /// The BOOST_COMPUTE_ADAPT_STRUCT() macro makes a C++ struct/class available /// to OpenCL kernels. /// /// \param type The C++ type. /// \param name The OpenCL name. /// \param members A tuple of the struct's members. /// /// For example, to adapt a 2D particle struct with position (x, y) and /// velocity (dx, dy): /// \code /// // c++ struct definition /// struct Particle /// { /// float x, y; /// float dx, dy; /// }; /// /// // adapt struct for OpenCL /// BOOST_COMPUTE_ADAPT_STRUCT(Particle, Particle, (x, y, dx, dy)) /// \endcode /// /// After adapting the struct it can be used in Boost.Compute containers /// and with Boost.Compute algorithms: /// \code /// // create vector of particles /// boost::compute::vector<Particle> particles = ... /// /// // function to compare particles by their x-coordinate /// BOOST_COMPUTE_FUNCTION(bool, sort_by_x, (Particle a, Particle b), /// { /// return a.x < b.x; /// }); /// /// // sort particles by their x-coordinate /// boost::compute::sort( /// particles.begin(), particles.end(), sort_by_x, queue /// ); /// \endcode /// /// Due to differences in struct padding between the host compiler and the /// device compiler, the \c BOOST_COMPUTE_ADAPT_STRUCT() macro requires that /// the adapted struct is packed (i.e. no padding bytes between members). /// /// \see type_name() #define BOOST_COMPUTE_ADAPT_STRUCT(type, name, members) \ BOOST_STATIC_ASSERT_MSG( \ BOOST_COMPUTE_DETAIL_STRUCT_IS_PACKED(type, BOOST_COMPUTE_PP_TUPLE_TO_SEQ(members)), \ "BOOST_COMPUTE_ADAPT_STRUCT() does not support structs with internal padding." \ ); \ BOOST_COMPUTE_TYPE_NAME(type, name) \ namespace boost { namespace compute { \ template<> \ inline std::string type_definition<type>() \ { \ std::stringstream declaration; \ declaration << "typedef struct __attribute__((packed)) {\n" \ BOOST_PP_SEQ_FOR_EACH( \ BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_INSERT_MEMBER, \ type, \ BOOST_COMPUTE_PP_TUPLE_TO_SEQ(members) \ ) \ << "} " << type_name<type>() << ";\n"; \ return declaration.str(); \ } \ namespace detail { \ template<> \ struct inject_type_impl<type> \ { \ void operator()(meta_kernel &kernel) \ { \ kernel.add_type_declaration<type>(type_definition<type>()); \ } \ }; \ inline meta_kernel& operator<<(meta_kernel &k, type s) \ { \ return k << "(" << #name << "){" \ BOOST_PP_SEQ_FOR_EACH_I( \ BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_STREAM_MEMBER, \ s, \ BOOST_COMPUTE_PP_TUPLE_TO_SEQ(members) \ ) \ << "}"; \ } \ }}} #endif // BOOST_COMPUTE_TYPES_STRUCT_HPP types/size_t.hpp 0000644 00000003314 15125510617 0007722 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2017 Denis Demidov <dennis.demidov@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // size_t and ptrdiff_t need special treatment on OSX since those are not // typedefs for ulong and long here: #if defined(__APPLE__) && !defined(BOOST_COMPUTE_TYPES_SIZE_T_HPP) #define BOOST_COMPUTE_TYPES_SIZE_T_HPP #include <sstream> #include <boost/mpl/if.hpp> #include <boost/compute/type_traits/is_fundamental.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/detail/meta_kernel.hpp> namespace boost { namespace compute { template <> struct is_fundamental<size_t> : boost::true_type {}; template <> struct is_fundamental<ptrdiff_t> : boost::true_type {}; namespace detail { template <> struct type_name_trait<size_t> : type_name_trait< boost::mpl::if_c<sizeof(size_t) == sizeof(cl_uint), cl_uint, cl_ulong>::type > {}; template <> struct type_name_trait<ptrdiff_t> : type_name_trait< boost::mpl::if_c<sizeof(ptrdiff_t) == sizeof(cl_int), cl_int, cl_long>::type > {}; inline meta_kernel& operator<<(meta_kernel &k, size_t v) { std::ostringstream s; s << v; return k << s.str(); } inline meta_kernel& operator<<(meta_kernel &k, ptrdiff_t v) { std::ostringstream s; s << v; return k << s.str(); } } // end detail namespace } // end compute namespace } // end boost namespace #endif types/pair.hpp 0000644 00000006151 15125510617 0007362 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPES_PAIR_HPP #define BOOST_COMPUTE_TYPES_PAIR_HPP #include <string> #include <utility> #include <boost/compute/functional/get.hpp> #include <boost/compute/type_traits/type_definition.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/detail/meta_kernel.hpp> namespace boost { namespace compute { namespace detail { // meta_kernel operator for std::pair literals template<class T1, class T2> inline meta_kernel& operator<<(meta_kernel &kernel, const std::pair<T1, T2> &x) { kernel << "(" << type_name<std::pair<T1, T2> >() << ")" << "{" << kernel.make_lit(x.first) << ", " << kernel.make_lit(x.second) << "}"; return kernel; } // inject_type() specialization for std::pair template<class T1, class T2> struct inject_type_impl<std::pair<T1, T2> > { void operator()(meta_kernel &kernel) { typedef std::pair<T1, T2> pair_type; kernel.inject_type<T1>(); kernel.inject_type<T2>(); kernel.add_type_declaration<pair_type>(type_definition<pair_type>()); } }; // get<N>() result type specialization for std::pair<> template<class T1, class T2> struct get_result_type<0, std::pair<T1, T2> > { typedef T1 type; }; template<class T1, class T2> struct get_result_type<1, std::pair<T1, T2> > { typedef T2 type; }; // get<N>() specialization for std::pair<> template<size_t N, class Arg, class T1, class T2> inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_get<N, Arg, std::pair<T1, T2> > &expr) { kernel.inject_type<std::pair<T1, T2> >(); return kernel << expr.m_arg << (N == 0 ? ".first" : ".second"); } } // end detail namespace namespace detail { // type_name() specialization for std::pair template<class T1, class T2> struct type_name_trait<std::pair<T1, T2> > { static const char* value() { static std::string name = std::string("_pair_") + type_name<T1>() + "_" + type_name<T2>() + "_t"; return name.c_str(); } }; // type_definition() specialization for std::pair template<class T1, class T2> struct type_definition_trait<std::pair<T1, T2> > { static std::string value() { typedef std::pair<T1, T2> pair_type; std::stringstream declaration; declaration << "typedef struct {\n" << " " << type_name<T1>() << " first;\n" << " " << type_name<T2>() << " second;\n" << "} " << type_name<pair_type>() << ";\n"; return declaration.str(); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPES_PAIR_HPP types/tuple.hpp 0000644 00000021143 15125510617 0007556 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPES_TUPLE_HPP #define BOOST_COMPUTE_TYPES_TUPLE_HPP #include <string> #include <utility> #include <boost/preprocessor/enum.hpp> #include <boost/preprocessor/expr_if.hpp> #include <boost/preprocessor/repetition.hpp> #include <boost/tuple/tuple.hpp> #include <boost/compute/config.hpp> #include <boost/compute/functional/get.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/detail/meta_kernel.hpp> #ifndef BOOST_COMPUTE_NO_STD_TUPLE #include <tuple> #endif namespace boost { namespace compute { namespace detail { // meta_kernel operators for boost::tuple literals #define BOOST_COMPUTE_PRINT_ELEM(z, n, unused) \ BOOST_PP_EXPR_IF(n, << ", ") \ << kernel.make_lit(boost::get<n>(x)) #define BOOST_COMPUTE_PRINT_TUPLE(z, n, unused) \ template<BOOST_PP_ENUM_PARAMS(n, class T)> \ inline meta_kernel& \ operator<<(meta_kernel &kernel, \ const boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> &x) \ { \ return kernel \ << "(" \ << type_name<boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> >() \ << ")" \ << "{" \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_PRINT_ELEM, ~) \ << "}"; \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_PRINT_TUPLE, ~) #undef BOOST_COMPUTE_PRINT_TUPLE #undef BOOST_COMPUTE_PRINT_ELEM // inject_type() specializations for boost::tuple #define BOOST_COMPUTE_INJECT_TYPE(z, n, unused) \ kernel.inject_type<T ## n>(); #define BOOST_COMPUTE_INJECT_DECL(z, n, unused) \ << " " << type_name<T ## n>() << " v" #n ";\n" #define BOOST_COMPUTE_INJECT_IMPL(z, n, unused) \ template<BOOST_PP_ENUM_PARAMS(n, class T)> \ struct inject_type_impl<boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > \ { \ void operator()(meta_kernel &kernel) \ { \ typedef boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> tuple_type; \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_INJECT_TYPE, ~) \ std::stringstream declaration; \ declaration << "typedef struct {\n" \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_INJECT_DECL, ~) \ << "} " << type_name<tuple_type>() << ";\n"; \ kernel.add_type_declaration<tuple_type>(declaration.str()); \ } \ }; BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_INJECT_IMPL, ~) #undef BOOST_COMPUTE_INJECT_IMPL #undef BOOST_COMPUTE_INJECT_DECL #undef BOOST_COMPUTE_INJECT_TYPE #ifdef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES // type_name() specializations for boost::tuple (without variadic templates) #define BOOST_COMPUTE_PRINT_TYPE(z, n, unused) \ + type_name<T ## n>() + "_" #define BOOST_COMPUTE_PRINT_TYPE_NAME(z, n, unused) \ template<BOOST_PP_ENUM_PARAMS(n, class T)> \ struct type_name_trait<boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > \ { \ static const char* value() \ { \ static std::string name = \ std::string("boost_tuple_") \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_PRINT_TYPE, ~) \ "t"; \ return name.c_str(); \ } \ }; BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_PRINT_TYPE_NAME, ~) #undef BOOST_COMPUTE_PRINT_TYPE_NAME #undef BOOST_COMPUTE_PRINT_TYPE #else template<size_t N, class T, class... Rest> struct write_tuple_type_names { void operator()(std::ostream &os) { os << type_name<T>() << "_"; write_tuple_type_names<N-1, Rest...>()(os); } }; template<class T, class... Rest> struct write_tuple_type_names<1, T, Rest...> { void operator()(std::ostream &os) { os << type_name<T>(); } }; // type_name<> specialization for boost::tuple<...> (with variadic templates) template<class... T> struct type_name_trait<boost::tuple<T...>> { static const char* value() { static std::string str = make_type_name(); return str.c_str(); } static std::string make_type_name() { typedef typename boost::tuple<T...> tuple_type; std::stringstream s; s << "boost_tuple_"; write_tuple_type_names< boost::tuples::length<tuple_type>::value, T... >()(s); s << "_t"; return s.str(); } }; #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES #ifndef BOOST_COMPUTE_NO_STD_TUPLE // type_name<> specialization for std::tuple<T...> template<class... T> struct type_name_trait<std::tuple<T...>> { static const char* value() { static std::string str = make_type_name(); return str.c_str(); } static std::string make_type_name() { typedef typename std::tuple<T...> tuple_type; std::stringstream s; s << "std_tuple_"; write_tuple_type_names< std::tuple_size<tuple_type>::value, T... >()(s); s << "_t"; return s.str(); } }; #endif // BOOST_COMPUTE_NO_STD_TUPLE // get<N>() result type specialization for boost::tuple<> #define BOOST_COMPUTE_GET_RESULT_TYPE(z, n, unused) \ template<size_t N, BOOST_PP_ENUM_PARAMS(n, class T)> \ struct get_result_type<N, boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > \ { \ typedef typename boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> T; \ typedef typename boost::tuples::element<N, T>::type type; \ }; BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_GET_RESULT_TYPE, ~) #undef BOOST_COMPUTE_GET_RESULT_TYPE // get<N>() specialization for boost::tuple<> #define BOOST_COMPUTE_GET_N(z, n, unused) \ template<size_t N, class Arg, BOOST_PP_ENUM_PARAMS(n, class T)> \ inline meta_kernel& operator<<(meta_kernel &kernel, \ const invoked_get<N, Arg, boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > &expr) \ { \ typedef typename boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> T; \ BOOST_STATIC_ASSERT(N < size_t(boost::tuples::length<T>::value)); \ kernel.inject_type<T>(); \ return kernel << expr.m_arg << ".v" << int_(N); \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_GET_N, ~) #undef BOOST_COMPUTE_GET_N } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPES_TUPLE_HPP types/complex.hpp 0000644 00000011452 15125510617 0010076 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPES_COMPLEX_HPP #define BOOST_COMPUTE_TYPES_COMPLEX_HPP #include <complex> #include <boost/compute/functional.hpp> #include <boost/compute/types/fundamental.hpp> #include <boost/compute/type_traits/make_vector_type.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/detail/meta_kernel.hpp> namespace boost { namespace compute { namespace detail { template<class T> meta_kernel& operator<<(meta_kernel &kernel, const std::complex<T> &x) { typedef typename std::complex<T> value_type; kernel << "(" << type_name<value_type>() << ")" << "(" << x.real() << ", " << x.imag() << ")"; return kernel; } // get<N>() result type specialization for std::complex<> template<size_t N, class T> struct get_result_type<N, std::complex<T> > { typedef T type; }; // get<N>() specialization for std::complex<> template<size_t N, class Arg, class T> inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_get<N, Arg, std::complex<T> > &expr) { BOOST_STATIC_ASSERT(N < 2); return kernel << expr.m_arg << (N == 0 ? ".x" : ".y"); } } // end detail namespace // returns the real component of a complex<T> template<class T> struct real { typedef T result_type; template<class Arg> detail::invoked_get<0, Arg, std::complex<T> > operator()(const Arg &x) const { return detail::invoked_get<0, Arg, std::complex<T> >(x); } }; // returns the imaginary component of a complex<T> template<class T> struct imag { typedef T result_type; template<class Arg> detail::invoked_get<1, Arg, std::complex<T> > operator()(const Arg &x) const { return detail::invoked_get<1, Arg, std::complex<T> >(x); } }; namespace detail { template<class Arg1, class Arg2, class T> struct invoked_complex_multiplies { typedef typename std::complex<T> result_type; invoked_complex_multiplies(const Arg1 &x, const Arg2 &y) : m_x(x), m_y(y) { } Arg1 m_x; Arg2 m_y; }; template<class Arg1, class Arg2, class T> inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_complex_multiplies<Arg1, Arg2, T> &expr) { typedef typename std::complex<T> value_type; kernel << "(" << type_name<value_type>() << ")" << "(" << expr.m_x << ".x*" << expr.m_y << ".x-" << expr.m_x << ".y*" << expr.m_y << ".y," << expr.m_x << ".y*" << expr.m_y << ".x+" << expr.m_x << ".x*" << expr.m_y << ".y" << ")"; return kernel; } template<class Arg, class T> struct invoked_complex_conj { typedef typename std::complex<T> result_type; invoked_complex_conj(const Arg &arg) : m_arg(arg) { } Arg m_arg; }; template<class Arg, class T> inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_complex_conj<Arg, T> &expr) { typedef typename std::complex<T> value_type; kernel << "(" << type_name<value_type>() << ")" << "(" << expr.m_arg << ".x" << ", -" << expr.m_arg << ".y" << ")"; return kernel; } } // end detail namespace // specialization for multiplies<T> template<class T> class multiplies<std::complex<T> > : public function<std::complex<T> (std::complex<T>, std::complex<T>)> { public: multiplies() : function< std::complex<T> (std::complex<T>, std::complex<T>) >("complex_multiplies") { } template<class Arg1, class Arg2> detail::invoked_complex_multiplies<Arg1, Arg2, T> operator()(const Arg1 &x, const Arg2 &y) const { return detail::invoked_complex_multiplies<Arg1, Arg2, T>(x, y); } }; // returns the complex conjugate of a complex<T> template<class T> struct conj { typedef typename std::complex<T> result_type; template<class Arg> detail::invoked_complex_conj<Arg, T> operator()(const Arg &x) const { return detail::invoked_complex_conj<Arg, T>(x); } }; namespace detail { // type_name() specialization for std::complex template<class T> struct type_name_trait<std::complex<T> > { static const char* value() { typedef typename make_vector_type<T, 2>::type vector_type; return type_name<vector_type>(); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPES_COMPLEX_HPP system.hpp 0000644 00000022254 15125510617 0006611 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_SYSTEM_HPP #define BOOST_COMPUTE_SYSTEM_HPP #include <string> #include <vector> #include <cstdlib> #include <boost/throw_exception.hpp> #include <boost/compute/cl.hpp> #include <boost/compute/device.hpp> #include <boost/compute/context.hpp> #include <boost/compute/platform.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/getenv.hpp> #include <boost/compute/exception/no_device_found.hpp> namespace boost { namespace compute { /// \class system /// \brief Provides access to platforms and devices on the system. /// /// The system class contains a set of static functions which provide access to /// the OpenCL platforms and compute devices on the host system. /// /// The default_device() convenience method automatically selects and returns /// the "best" compute device for the system following a set of heuristics and /// environment variables. This simplifies setup of the OpenCL enviornment. /// /// \see platform, device, context class system { public: /// Returns the default compute device for the system. /// /// The default device is selected based on a set of heuristics and can be /// influenced using one of the following environment variables: /// /// \li \c BOOST_COMPUTE_DEFAULT_DEVICE - /// name of the compute device (e.g. "GTX TITAN") /// \li \c BOOST_COMPUTE_DEFAULT_DEVICE_TYPE /// type of the compute device (e.g. "GPU" or "CPU") /// \li \c BOOST_COMPUTE_DEFAULT_PLATFORM - /// name of the platform (e.g. "NVIDIA CUDA") /// \li \c BOOST_COMPUTE_DEFAULT_VENDOR - /// name of the device vendor (e.g. "NVIDIA") /// \li \c BOOST_COMPUTE_DEFAULT_ENFORCE - /// If this is set to "1", then throw a no_device_found() exception /// if any of the above environment variables is set, but a matching /// device was not found. /// /// The default device is determined once on the first time this function /// is called. Calling this function multiple times will always result in /// the same device being returned. /// /// If no OpenCL device is found on the system, a no_device_found exception /// is thrown. /// /// For example, to print the name of the default compute device on the /// system: /// \code /// // get the default compute device /// boost::compute::device device = boost::compute::system::default_device(); /// /// // print the name of the device /// std::cout << "default device: " << device.name() << std::endl; /// \endcode static device default_device() { static device default_device = find_default_device(); return default_device; } /// Returns the device with \p name. /// /// \throws no_device_found if no device with \p name is found. static device find_device(const std::string &name) { const std::vector<device> devices = system::devices(); for(size_t i = 0; i < devices.size(); i++){ const device& device = devices[i]; if(device.name() == name){ return device; } } BOOST_THROW_EXCEPTION(no_device_found()); } /// Returns a vector containing all of the compute devices on /// the system. /// /// For example, to print out the name of each OpenCL-capable device /// available on the system: /// \code /// for(const auto &device : boost::compute::system::devices()){ /// std::cout << device.name() << std::endl; /// } /// \endcode static std::vector<device> devices() { std::vector<device> devices; const std::vector<platform> platforms = system::platforms(); for(size_t i = 0; i < platforms.size(); i++){ const std::vector<device> platform_devices = platforms[i].devices(); devices.insert( devices.end(), platform_devices.begin(), platform_devices.end() ); } return devices; } /// Returns the number of compute devices on the system. static size_t device_count() { size_t count = 0; const std::vector<platform> platforms = system::platforms(); for(size_t i = 0; i < platforms.size(); i++){ count += platforms[i].device_count(); } return count; } /// Returns the default context for the system. /// /// The default context is created for the default device on the system /// (as returned by default_device()). /// /// The default context is created once on the first time this function is /// called. Calling this function multiple times will always result in the /// same context object being returned. static context default_context() { static context default_context(default_device()); return default_context; } /// Returns the default command queue for the system. static command_queue& default_queue() { static command_queue queue(default_context(), default_device()); return queue; } /// Blocks until all outstanding computations on the default /// command queue are complete. /// /// This is equivalent to: /// \code /// system::default_queue().finish(); /// \endcode static void finish() { default_queue().finish(); } /// Returns a vector containing each of the OpenCL platforms on the system. /// /// For example, to print out the name of each OpenCL platform present on /// the system: /// \code /// for(const auto &platform : boost::compute::system::platforms()){ /// std::cout << platform.name() << std::endl; /// } /// \endcode static std::vector<platform> platforms() { cl_uint count = 0; clGetPlatformIDs(0, 0, &count); std::vector<platform> platforms; if(count > 0) { std::vector<cl_platform_id> platform_ids(count); clGetPlatformIDs(count, &platform_ids[0], 0); for(size_t i = 0; i < platform_ids.size(); i++){ platforms.push_back(platform(platform_ids[i])); } } return platforms; } /// Returns the number of compute platforms on the system. static size_t platform_count() { cl_uint count = 0; clGetPlatformIDs(0, 0, &count); return static_cast<size_t>(count); } private: /// \internal_ static device find_default_device() { // get a list of all devices on the system const std::vector<device> devices_ = devices(); if(devices_.empty()){ BOOST_THROW_EXCEPTION(no_device_found()); } // check for device from environment variable const char *name = detail::getenv("BOOST_COMPUTE_DEFAULT_DEVICE"); const char *type = detail::getenv("BOOST_COMPUTE_DEFAULT_DEVICE_TYPE"); const char *platform = detail::getenv("BOOST_COMPUTE_DEFAULT_PLATFORM"); const char *vendor = detail::getenv("BOOST_COMPUTE_DEFAULT_VENDOR"); const char *enforce = detail::getenv("BOOST_COMPUTE_DEFAULT_ENFORCE"); if(name || type || platform || vendor){ for(size_t i = 0; i < devices_.size(); i++){ const device& device = devices_[i]; if (name && !matches(device.name(), name)) continue; if (type && matches(std::string("GPU"), type)) if (!(device.type() & device::gpu)) continue; if (type && matches(std::string("CPU"), type)) if (!(device.type() & device::cpu)) continue; if (platform && !matches(device.platform().name(), platform)) continue; if (vendor && !matches(device.vendor(), vendor)) continue; return device; } if(enforce && enforce[0] == '1') BOOST_THROW_EXCEPTION(no_device_found()); } // find the first gpu device for(size_t i = 0; i < devices_.size(); i++){ const device& device = devices_[i]; if(device.type() & device::gpu){ return device; } } // find the first cpu device for(size_t i = 0; i < devices_.size(); i++){ const device& device = devices_[i]; if(device.type() & device::cpu){ return device; } } // return the first device found return devices_[0]; } /// \internal_ static bool matches(const std::string &str, const std::string &pattern) { return str.find(pattern) != std::string::npos; } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_SYSTEM_HPP type_traits/scalar_type.hpp 0000644 00000004476 15125510617 0012150 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_SCALAR_TYPE_HPP #define BOOST_COMPUTE_TYPE_TRAITS_SCALAR_TYPE_HPP #include <boost/preprocessor/cat.hpp> #include <boost/compute/types/fundamental.hpp> namespace boost { namespace compute { /// Meta-function returning the scalar type for a vector type. /// /// For example, /// \code /// scalar_type<float4_>::type == float /// \endcode template<class Vector> struct scalar_type { /// \internal_ typedef void type; }; /// \internal_ #define BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTION(scalar) \ template<> \ struct scalar_type<BOOST_PP_CAT(scalar, _)> \ { \ typedef BOOST_PP_CAT(scalar, _) type; \ }; /// \internal_ #define BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, size) \ template<> \ struct scalar_type<BOOST_PP_CAT(BOOST_PP_CAT(scalar, size), _)> \ { \ typedef BOOST_PP_CAT(scalar, _) type; \ }; /// \internal_ #define BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(scalar) \ BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTION(scalar) \ BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 2) \ BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 4) \ BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 8) \ BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 16) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(char) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(uchar) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(short) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(ushort) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(int) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(uint) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(long) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(ulong) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(float) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(double) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_SCALAR_TYPE_HPP type_traits/type_definition.hpp 0000644 00000002155 15125510617 0013023 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_TYPE_DEFINITION_HPP #define BOOST_COMPUTE_TYPE_TRAITS_TYPE_DEFINITION_HPP #include <string> namespace boost { namespace compute { namespace detail { template<class T> struct type_definition_trait { static std::string value() { return std::string(); } }; } // end detail namespace /// Returns the OpenCL type definition for \c T. /// /// \return a string containing the type definition for \c T /// /// \see type_name<T>() template<class T> inline std::string type_definition() { return detail::type_definition_trait<T>::value(); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_TYPE_DEFINITION_HPP type_traits/is_vector_type.hpp 0000644 00000002121 15125510617 0012661 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_IS_VECTOR_TYPE_HPP #define BOOST_COMPUTE_TYPE_TRAITS_IS_VECTOR_TYPE_HPP #include <boost/mpl/bool.hpp> #include <boost/compute/type_traits/vector_size.hpp> namespace boost { namespace compute { /// Meta-function returning \c true if \p T is a vector type. /// /// For example, /// \code /// is_vector_type<int>::value == false /// is_vector_type<float4_>::value == true /// \endcode /// /// \see make_vector_type, vector_size template<class T> struct is_vector_type : boost::mpl::bool_<vector_size<T>::value != 1> { }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_IS_VECTOR_TYPE_HPP type_traits/vector_size.hpp 0000644 00000004221 15125510617 0012162 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_VECTOR_SIZE_HPP #define BOOST_COMPUTE_TYPE_TRAITS_VECTOR_SIZE_HPP #include <boost/preprocessor/cat.hpp> #include <boost/compute/types/fundamental.hpp> namespace boost { namespace compute { /// Meta-function returning the size (number of components) of a vector type /// \p T. For scalar types this function returns \c 1. /// /// For example, /// \code /// vector_size<float>::value == 1 /// vector_size<float4_>::value == 4 /// \endcode template<class T> struct vector_size { /// \internal_ BOOST_STATIC_CONSTANT(size_t, value = 1); }; /// \internal_ #define BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, size) \ template<> \ struct vector_size<BOOST_PP_CAT(BOOST_PP_CAT(scalar, size), _)> \ { \ BOOST_STATIC_CONSTANT(size_t, value = size); \ }; /// \internal_ #define BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(scalar) \ BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 2) \ BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 4) \ BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 8) \ BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 16) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(char) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(uchar) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(short) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(ushort) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(int) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(uint) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(long) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(ulong) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(float) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(double) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_VECTOR_SIZE_HPP type_traits/common_type.hpp 0000644 00000004226 15125510617 0012164 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_COMMON_TYPE_HPP #define BOOST_COMPUTE_TYPE_TRAITS_COMMON_TYPE_HPP #include <boost/type_traits/common_type.hpp> #include <boost/compute/types/fundamental.hpp> namespace boost { /// \internal_ #define BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, size) \ template<> \ struct common_type<BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size), \ BOOST_COMPUTE_MAKE_SCALAR_TYPE(scalar)> \ { \ typedef BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) type; \ }; \ template<> \ struct common_type<BOOST_COMPUTE_MAKE_SCALAR_TYPE(scalar), \ BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size)> \ { \ typedef BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) type; \ }; /// \internal_ #define BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(scalar) \ BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 2) \ BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 4) \ BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 8) \ BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 16) \ BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(char) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(uchar) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(short) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(ushort) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(int) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(uint) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(long) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(ulong) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(float) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(double) } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_COMMON_TYPE_HPP type_traits/is_device_iterator.hpp 0000644 00000002507 15125510617 0013476 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_IS_DEVICE_ITERATOR_HPP #define BOOST_COMPUTE_TYPE_TRAITS_IS_DEVICE_ITERATOR_HPP #include <boost/type_traits/integral_constant.hpp> namespace boost { namespace compute { /// Meta-function returning \c true if \c Iterator is a device-iterator. /// /// By default, this function returns false. Device iterator types (such as /// buffer_iterator) should specialize this trait and return \c true. /// /// For example: /// \code /// is_device_iterator<buffer_iterator<int>>::value == true /// is_device_iterator<std::vector<int>::iterator>::value == false /// \endcode template<class Iterator> struct is_device_iterator : boost::false_type {}; /// \internal_ template<class Iterator> struct is_device_iterator<const Iterator> : is_device_iterator<Iterator> {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_IS_DEVICE_ITERATOR_HPP type_traits/is_fundamental.hpp 0000644 00000004776 15125510617 0012636 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_IS_FUNDAMENTAL_HPP #define BOOST_COMPUTE_TYPE_TRAITS_IS_FUNDAMENTAL_HPP #include <boost/compute/types/fundamental.hpp> namespace boost { namespace compute { /// Meta-function returning \c true if \p T is a fundamental (i.e. /// built-in) type. /// /// For example, /// \code /// is_fundamental<float>::value == true /// is_fundamental<std::pair<int, float>>::value == false /// \endcode template<class T> struct is_fundamental : public boost::false_type {}; /// \internal_ #define BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(type) \ template<> struct is_fundamental<BOOST_PP_CAT(type, _)> : boost::true_type {}; \ template<> struct is_fundamental<BOOST_PP_CAT(BOOST_PP_CAT(type, 2), _)> : boost::true_type {}; \ template<> struct is_fundamental<BOOST_PP_CAT(BOOST_PP_CAT(type, 4), _)> : boost::true_type {}; \ template<> struct is_fundamental<BOOST_PP_CAT(BOOST_PP_CAT(type, 8), _)> : boost::true_type {}; \ template<> struct is_fundamental<BOOST_PP_CAT(BOOST_PP_CAT(type, 16), _)> : boost::true_type {}; \ template<> struct is_fundamental<BOOST_PP_CAT(cl_, BOOST_PP_CAT(type, 2))> : boost::true_type {}; \ template<> struct is_fundamental<BOOST_PP_CAT(cl_, BOOST_PP_CAT(type, 4))> : boost::true_type {}; \ template<> struct is_fundamental<BOOST_PP_CAT(cl_, BOOST_PP_CAT(type, 8))> : boost::true_type {}; \ template<> struct is_fundamental<BOOST_PP_CAT(cl_, BOOST_PP_CAT(type, 16))> : boost::true_type {}; BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(char) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(uchar) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(short) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(ushort) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(int) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(uint) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(long) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(ulong) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(float) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(double) #undef BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_IS_FUNDAMENTAL_HPP type_traits/detail/capture_traits.hpp 0000644 00000001677 15125510617 0014135 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_DETAIL_CAPTURE_TRAITS_HPP #define BOOST_COMPUTE_TYPE_TRAITS_DETAIL_CAPTURE_TRAITS_HPP #include <boost/compute/type_traits/type_name.hpp> namespace boost { namespace compute { namespace detail { template<class T> struct capture_traits { static std::string type_name() { return ::boost::compute::type_name<T>(); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_DETAIL_CAPTURE_TRAITS_HPP type_traits/result_of.hpp 0000644 00000002440 15125510617 0011631 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_RESULT_OF_HPP #define BOOST_COMPUTE_TYPE_TRAITS_RESULT_OF_HPP #include <boost/utility/result_of.hpp> namespace boost { namespace compute { /// Returns the result of \c Function when called with \c Args. /// /// For example, /// \code /// // int + int = int /// result_of<plus(int, int)>::type == int /// \endcode template<class Signature> struct result_of { // the default implementation uses the TR1-style result_of protocol. note // that we explicitly do *not* use the C++11 decltype operator as we want // the result type as it would be on an OpenCL device, not the actual C++ // type resulting from "invoking" the function on the host. typedef typename ::boost::tr1_result_of<Signature>::type type; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_RESULT_OF_HPP type_traits/type_name.hpp 0000644 00000007175 15125510617 0011622 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_TYPE_NAME_HPP #define BOOST_COMPUTE_TYPE_TRAITS_TYPE_NAME_HPP #include <boost/preprocessor/cat.hpp> #include <boost/preprocessor/stringize.hpp> #include <boost/compute/types/fundamental.hpp> namespace boost { namespace compute { namespace detail { template<class T> struct type_name_trait; /// \internal_ #define BOOST_COMPUTE_DEFINE_SCALAR_TYPE_NAME_FUNCTION(type) \ template<> \ struct type_name_trait<BOOST_PP_CAT(type, _)> \ { \ static const char* value() \ { \ return BOOST_PP_STRINGIZE(type); \ } \ }; /// \internal_ #define BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, n) \ template<> \ struct type_name_trait<BOOST_PP_CAT(BOOST_PP_CAT(scalar, n), _)> \ { \ static const char* value() \ { \ return BOOST_PP_STRINGIZE(BOOST_PP_CAT(scalar, n)); \ } \ }; /// \internal_ #define BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(scalar) \ BOOST_COMPUTE_DEFINE_SCALAR_TYPE_NAME_FUNCTION(scalar) \ BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 2) \ BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 4) \ BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 8) \ BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 16) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(char) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(uchar) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(short) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(ushort) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(int) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(uint) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(long) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(ulong) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(float) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(double) /// \internal_ #define BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(type) \ template<> \ struct type_name_trait<type> \ { \ static const char* value() \ { \ return #type; \ } \ }; BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(bool) BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(char) BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(void) } // end detail namespace /// Returns the OpenCL type name for the type \c T as a string. /// /// \return a string containing the type name for \c T /// /// For example: /// \code /// type_name<float>() == "float" /// type_name<float4_>() == "float4" /// \endcode /// /// \see type_definition<T>() template<class T> inline const char* type_name() { return detail::type_name_trait<T>::value(); } } // end compute namespace } // end boost namespace /// Registers the OpenCL type for the C++ \p type to \p name. /// /// For example, the following will allow Eigen's \c Vector2f type /// to be used with Boost.Compute algorithms and containers as the /// built-in \c float2 type. /// \code /// BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2f, float2) /// \endcode /// /// This macro should be invoked in the global namespace. /// /// \see type_name() #define BOOST_COMPUTE_TYPE_NAME(type, name) \ namespace boost { namespace compute { \ template<> \ inline const char* type_name<type>() \ { \ return #name; \ }}} #endif // BOOST_COMPUTE_TYPE_TRAITS_TYPE_NAME_HPP type_traits/make_vector_type.hpp 0000644 00000004450 15125510617 0013172 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_MAKE_VECTOR_TYPE_HPP #define BOOST_COMPUTE_TYPE_TRAITS_MAKE_VECTOR_TYPE_HPP #include <boost/preprocessor/cat.hpp> #include <boost/compute/types/fundamental.hpp> namespace boost { namespace compute { /// Meta-function which returns a vector type for \p Scalar with \p Size. /// /// For example, /// \code /// make_vector_type<int, 2>::type == int2_ /// make_vector_type<float, 4>::type == float4_ /// \endcode /// /// \see is_vector_type template<class Scalar, size_t Size> struct make_vector_type { }; /// \internal_ template<class Scalar> struct make_vector_type<Scalar, 1> { typedef Scalar type; }; /// \internal_ #define BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, size) \ template<> \ struct make_vector_type<BOOST_PP_CAT(scalar, _), size> \ { \ typedef BOOST_PP_CAT(BOOST_PP_CAT(scalar, size), _) type; \ }; /// \internal_ #define BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(scalar) \ BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 2) \ BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 4) \ BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 8) \ BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 16) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(char) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(uchar) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(short) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(ushort) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(int) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(uint) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(long) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(ulong) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(float) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(double) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_MAKE_VECTOR_TYPE_HPP svm.hpp 0000644 00000003661 15125510617 0006073 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_SVM_HPP #define BOOST_COMPUTE_SVM_HPP #include <boost/compute/config.hpp> #include <boost/compute/context.hpp> #include <boost/compute/memory/svm_ptr.hpp> // svm functions require OpenCL 2.0 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) namespace boost { namespace compute { /// Allocates a shared virtual memory (SVM) buffer. // /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clSVMAlloc} /// /// \see svm_free() template<class T> inline svm_ptr<T> svm_alloc(const context &context, size_t size, cl_svm_mem_flags flags = CL_MEM_READ_WRITE, unsigned int alignment = 0) { svm_ptr<T> ptr( clSVMAlloc(context.get(), flags, size * sizeof(T), alignment), context ); if(!ptr.get()){ BOOST_THROW_EXCEPTION(opencl_error(CL_MEM_OBJECT_ALLOCATION_FAILURE)); } return ptr; } /// Deallocates a shared virtual memory (SVM) buffer. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clSVMFree} /// /// \see svm_alloc(), command_queue::enqueue_svm_free() template<class T> inline void svm_free(svm_ptr<T> ptr) { clSVMFree(ptr.get_context(), ptr.get()); } /// \overload template<class T> inline void svm_free(const context &context, svm_ptr<T> ptr) { clSVMFree(context.get(), ptr.get()); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CL_VERSION_2_0 #endif // BOOST_COMPUTE_PIPE_HPP cl.hpp 0000644 00000003131 15125510617 0005654 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CL_HPP #define BOOST_COMPUTE_CL_HPP #include <boost/compute/detail/cl_versions.hpp> #if defined(__APPLE__) #include <OpenCL/cl.h> #else #include <CL/cl.h> #endif // select what OpenCL core API versions to use #if defined(CL_VERSION_1_0) # define BOOST_COMPUTE_CL_VERSION_1_0 #endif #if defined(CL_VERSION_1_1) # if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 101 # define BOOST_COMPUTE_CL_VERSION_1_1 # endif #endif #if defined(CL_VERSION_1_2) # if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 102 # define BOOST_COMPUTE_CL_VERSION_1_2 # endif #endif #if defined(CL_VERSION_2_0) # if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 200 # define BOOST_COMPUTE_CL_VERSION_2_0 # endif #endif #if defined(CL_VERSION_2_1) # if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 201 # define BOOST_COMPUTE_CL_VERSION_2_1 # endif #endif #if defined(CL_VERSION_2_2) # if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 202 # define BOOST_COMPUTE_CL_VERSION_2_2 # endif #endif #endif // BOOST_COMPUTE_CL_HPP image_sampler.hpp 0000644 00000001061 15125510617 0010063 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // deprecated, use <boost/compute/image/image_sampler.hpp> instead #include <boost/compute/image/image_sampler.hpp> experimental/clamp_range.hpp 0000644 00000002674 15125510617 0012236 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXPERIMENTAL_CLAMP_RANGE_HPP #define BOOST_COMPUTE_EXPERIMENTAL_CLAMP_RANGE_HPP #include <iterator> #include <boost/compute/lambda.hpp> #include <boost/compute/algorithm/transform.hpp> namespace boost { namespace compute { namespace experimental { template<class InputIterator, class OutputIterator> inline OutputIterator clamp_range(InputIterator first, InputIterator last, OutputIterator result, typename std::iterator_traits<InputIterator>::value_type lo, typename std::iterator_traits<InputIterator>::value_type hi, command_queue &queue) { using ::boost::compute::lambda::_1; using ::boost::compute::lambda::_2; using ::boost::compute::lambda::clamp; return ::boost::compute::transform( first, last, result, clamp(_1, lo, hi), queue ); } } // end experimental namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXPERIMENTAL_CLAMP_RANGE_HPP experimental/malloc.hpp 0000644 00000002651 15125510617 0011230 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXPERIMENTAL_MALLOC_HPP #define BOOST_COMPUTE_EXPERIMENTAL_MALLOC_HPP #include <boost/compute/buffer.hpp> #include <boost/compute/system.hpp> #include <boost/compute/context.hpp> #include <boost/compute/detail/device_ptr.hpp> namespace boost { namespace compute { namespace experimental { // bring device_ptr into the experimental namespace using detail::device_ptr; template<class T> inline device_ptr<T> malloc(std::size_t size, const context &context = system::default_context()) { buffer buf(context, size * sizeof(T)); clRetainMemObject(buf.get()); return device_ptr<T>(buf); } inline device_ptr<char> malloc(std::size_t size, const context &context = system::default_context()) { return malloc<char>(size, context); } template<class T> inline void free(device_ptr<T> &ptr) { clReleaseMemObject(ptr.get_buffer().get()); } } // end experimental namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXPERIMENTAL_MALLOC_HPP experimental/sort_by_transform.hpp 0000644 00000003705 15125510617 0013536 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXPERIMENTAL_SORT_BY_TRANSFORM_HPP #define BOOST_COMPUTE_EXPERIMENTAL_SORT_BY_TRANSFORM_HPP #include <iterator> #include <boost/compute/algorithm/sort_by_key.hpp> #include <boost/compute/algorithm/transform.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/type_traits/result_of.hpp> namespace boost { namespace compute { namespace experimental { template<class Iterator, class Transform, class Compare> inline void sort_by_transform(Iterator first, Iterator last, Transform transform, Compare compare, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits<Iterator>::value_type value_type; typedef typename boost::compute::result_of<Transform(value_type)>::type key_type; size_t n = detail::iterator_range_size(first, last); if(n < 2){ return; } const context &context = queue.get_context(); ::boost::compute::vector<key_type> keys(n, context); ::boost::compute::transform( first, last, keys.begin(), transform, queue ); ::boost::compute::sort_by_key( keys.begin(), keys.end(), first, compare, queue ); } } // end experimental namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXPERIMENTAL_SORT_BY_TRANSFORM_HPP experimental/tabulate.hpp 0000644 00000002470 15125510617 0011561 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXPERIMENTAL_TABULATE_HPP #define BOOST_COMPUTE_EXPERIMENTAL_TABULATE_HPP #include <iterator> #include <boost/compute/algorithm/transform.hpp> #include <boost/compute/iterator/counting_iterator.hpp> namespace boost { namespace compute { namespace experimental { template<class Iterator, class UnaryFunction> inline void tabulate(Iterator first, Iterator last, UnaryFunction function, command_queue &queue) { size_t n = detail::iterator_range_size(first, last); ::boost::compute::transform( ::boost::compute::make_counting_iterator<int>(0), ::boost::compute::make_counting_iterator<int>(n), first, function, queue ); } } // end experimental namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXPERIMENTAL_TABULATE_HPP device.hpp 0000644 00000051323 15125510617 0006523 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DEVICE_HPP #define BOOST_COMPUTE_DEVICE_HPP #include <algorithm> #include <string> #include <vector> #include <boost/algorithm/string/split.hpp> #include <boost/algorithm/string/classification.hpp> #include <boost/compute/config.hpp> #include <boost/compute/exception.hpp> #include <boost/compute/types/fundamental.hpp> #include <boost/compute/detail/duration.hpp> #include <boost/compute/detail/get_object_info.hpp> #include <boost/compute/detail/assert_cl_success.hpp> namespace boost { namespace compute { class platform; /// \class device /// \brief A compute device. /// /// Typical compute devices include GPUs and multi-core CPUs. A list /// of all compute devices available on a platform can be obtained /// via the platform::devices() method. /// /// The default compute device for the system can be obtained with /// the system::default_device() method. For example: /// /// \snippet test/test_device.cpp default_gpu /// /// \see platform, context, command_queue class device { public: enum type { cpu = CL_DEVICE_TYPE_CPU, gpu = CL_DEVICE_TYPE_GPU, accelerator = CL_DEVICE_TYPE_ACCELERATOR }; /// Creates a null device object. device() : m_id(0) { } /// Creates a new device object for \p id. If \p retain is \c true, /// the reference count for the device will be incremented. explicit device(cl_device_id id, bool retain = true) : m_id(id) { #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(m_id && retain && is_subdevice()){ clRetainDevice(m_id); } #else (void) retain; #endif } /// Creates a new device object as a copy of \p other. device(const device &other) : m_id(other.m_id) { #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(m_id && is_subdevice()){ clRetainDevice(m_id); } #endif } /// Copies the device from \p other to \c *this. device& operator=(const device &other) { if(this != &other){ #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(m_id && is_subdevice()){ clReleaseDevice(m_id); } #endif m_id = other.m_id; #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(m_id && is_subdevice()){ clRetainDevice(m_id); } #endif } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new device object from \p other. device(device&& other) BOOST_NOEXCEPT : m_id(other.m_id) { other.m_id = 0; } /// Move-assigns the device from \p other to \c *this. device& operator=(device&& other) BOOST_NOEXCEPT { #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(m_id && is_subdevice()){ clReleaseDevice(m_id); } #endif m_id = other.m_id; other.m_id = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the device object. ~device() { #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(m_id && is_subdevice()){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseDevice(m_id) ); } #endif } /// Returns the ID of the device. cl_device_id id() const { return m_id; } /// Returns a reference to the underlying OpenCL device id. cl_device_id& get() const { return const_cast<cl_device_id&>(m_id); } /// Returns the type of the device. cl_device_type type() const { return get_info<cl_device_type>(CL_DEVICE_TYPE); } #ifdef BOOST_COMPUTE_DOXYGEN_INVOKED /// Returns the platform for the device. platform platform() const; #else boost::compute::platform platform() const; #endif /// Returns the name of the device. std::string name() const { return get_info<std::string>(CL_DEVICE_NAME); } /// Returns the name of the vendor for the device. std::string vendor() const { return get_info<std::string>(CL_DEVICE_VENDOR); } /// Returns the device profile string. std::string profile() const { return get_info<std::string>(CL_DEVICE_PROFILE); } /// Returns the device version string. std::string version() const { return get_info<std::string>(CL_DEVICE_VERSION); } /// Returns the driver version string. std::string driver_version() const { return get_info<std::string>(CL_DRIVER_VERSION); } /// Returns a list of extensions supported by the device. std::vector<std::string> extensions() const { std::string extensions_string = get_info<std::string>(CL_DEVICE_EXTENSIONS); std::vector<std::string> extensions_vector; boost::split(extensions_vector, extensions_string, boost::is_any_of("\t "), boost::token_compress_on); return extensions_vector; } /// Returns \c true if the device supports the extension with /// \p name. bool supports_extension(const std::string &name) const { const std::vector<std::string> extensions = this->extensions(); return std::find( extensions.begin(), extensions.end(), name) != extensions.end(); } /// Returns the number of address bits. uint_ address_bits() const { return get_info<uint_>(CL_DEVICE_ADDRESS_BITS); } /// Returns the global memory size in bytes. ulong_ global_memory_size() const { return get_info<ulong_>(CL_DEVICE_GLOBAL_MEM_SIZE); } /// Returns the local memory size in bytes. ulong_ local_memory_size() const { return get_info<ulong_>(CL_DEVICE_LOCAL_MEM_SIZE); } /// Returns the clock frequency for the device's compute units. uint_ clock_frequency() const { return get_info<uint_>(CL_DEVICE_MAX_CLOCK_FREQUENCY); } /// Returns the number of compute units in the device. uint_ compute_units() const { return get_info<uint_>(CL_DEVICE_MAX_COMPUTE_UNITS); } /// \internal_ ulong_ max_memory_alloc_size() const { return get_info<ulong_>(CL_DEVICE_MAX_MEM_ALLOC_SIZE); } /// \internal_ size_t max_work_group_size() const { return get_info<size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE); } /// \internal_ uint_ max_work_item_dimensions() const { return get_info<uint_>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS); } /// Returns the preferred vector width for type \c T. template<class T> uint_ preferred_vector_width() const { return 0; } /// Returns the profiling timer resolution in nanoseconds. size_t profiling_timer_resolution() const { return get_info<size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION); } /// Returns \c true if the device is a sub-device. bool is_subdevice() const { #if defined(BOOST_COMPUTE_CL_VERSION_1_2) try { return get_info<cl_device_id>(CL_DEVICE_PARENT_DEVICE) != 0; } catch(opencl_error&){ // the get_info() call above will throw if the device's opencl version // is less than 1.2 (in which case it can't be a sub-device). return false; } #else return false; #endif } /// Returns information about the device. /// /// For example, to get the number of compute units: /// \code /// device.get_info<cl_uint>(CL_DEVICE_MAX_COMPUTE_UNITS); /// \endcode /// /// Alternatively, the template-specialized version can be used which /// automatically determines the result type: /// \code /// device.get_info<CL_DEVICE_MAX_COMPUTE_UNITS>(); /// \endcode /// /// \see_opencl_ref{clGetDeviceInfo} template<class T> T get_info(cl_device_info info) const { return detail::get_object_info<T>(clGetDeviceInfo, m_id, info); } /// \overload template<int Enum> typename detail::get_object_info_type<device, Enum>::type get_info() const; #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Partitions the device into multiple sub-devices according to /// \p properties. /// /// \opencl_version_warning{1,2} std::vector<device> partition(const cl_device_partition_property *properties) const { // get sub-device count uint_ count = 0; int_ ret = clCreateSubDevices(m_id, properties, 0, 0, &count); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } // get sub-device ids std::vector<cl_device_id> ids(count); ret = clCreateSubDevices(m_id, properties, count, &ids[0], 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } // convert ids to device objects std::vector<device> devices(count); for(size_t i = 0; i < count; i++){ devices[i] = device(ids[i], false); } return devices; } /// \opencl_version_warning{1,2} std::vector<device> partition_equally(size_t count) const { cl_device_partition_property properties[] = { CL_DEVICE_PARTITION_EQUALLY, static_cast<cl_device_partition_property>(count), 0 }; return partition(properties); } /// \opencl_version_warning{1,2} std::vector<device> partition_by_counts(const std::vector<size_t> &counts) const { std::vector<cl_device_partition_property> properties; properties.push_back(CL_DEVICE_PARTITION_BY_COUNTS); for(size_t i = 0; i < counts.size(); i++){ properties.push_back( static_cast<cl_device_partition_property>(counts[i])); } properties.push_back(CL_DEVICE_PARTITION_BY_COUNTS_LIST_END); properties.push_back(0); return partition(&properties[0]); } /// \opencl_version_warning{1,2} std::vector<device> partition_by_affinity_domain(cl_device_affinity_domain domain) const { cl_device_partition_property properties[] = { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, static_cast<cl_device_partition_property>(domain), 0 }; return partition(properties); } #endif // BOOST_COMPUTE_CL_VERSION_1_2 #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Returns the current value of the host clock as seen by device /// in nanoseconds. /// /// \see_opencl21_ref{clGetHostTimer} /// /// \opencl_version_warning{2,1} ulong_ get_host_timer() const { ulong_ host_timestamp = 0; cl_int ret = clGetHostTimer(m_id, &host_timestamp); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return host_timestamp; } /// Returns a reasonably synchronized pair of timestamps from the device timer /// and the host timer as seen by device in nanoseconds. The first of returned /// std::pair is a device timer timestamp, the second is a host timer timestamp. /// /// \see_opencl21_ref{clGetDeviceAndHostTimer} /// /// \opencl_version_warning{2,1} std::pair<ulong_, ulong_> get_device_and_host_timer() const { ulong_ host_timestamp; ulong_ device_timestamp; cl_int ret = clGetDeviceAndHostTimer( m_id, &device_timestamp, &host_timestamp ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return std::make_pair( device_timestamp, host_timestamp ); } #if !defined(BOOST_COMPUTE_NO_HDR_CHRONO) || !defined(BOOST_COMPUTE_NO_BOOST_CHRONO) /// Returns the current value of the host clock as seen by device /// as duration. /// /// For example, to print the current value of the host clock as seen by device /// in milliseconds: /// \code /// std::cout << device.get_host_timer<std::chrono::milliseconds>().count() << " ms"; /// \endcode /// /// \see_opencl21_ref{clGetHostTimer} /// /// \opencl_version_warning{2,1} template<class Duration> Duration get_host_timer() const { const ulong_ nanoseconds = this->get_host_timer(); return detail::make_duration_from_nanoseconds(Duration(), nanoseconds); } /// Returns a reasonably synchronized pair of timestamps from the device timer /// and the host timer as seen by device as a std::pair<Duration, Duration> value. /// The first of returned std::pair is a device timer timestamp, the second is /// a host timer timestamp. /// /// \see_opencl21_ref{clGetDeviceAndHostTimer} /// /// \opencl_version_warning{2,1} template<class Duration> std::pair<Duration, Duration> get_device_and_host_timer() const { const std::pair<ulong_, ulong_> timestamps = this->get_device_and_host_timer(); return std::make_pair( detail::make_duration_from_nanoseconds(Duration(), timestamps.first), detail::make_duration_from_nanoseconds(Duration(), timestamps.second) ); } #endif // !defined(BOOST_COMPUTE_NO_HDR_CHRONO) || !defined(BOOST_COMPUTE_NO_BOOST_CHRONO) #endif // BOOST_COMPUTE_CL_VERSION_2_1 /// Returns \c true if the device is the same at \p other. bool operator==(const device &other) const { return m_id == other.m_id; } /// Returns \c true if the device is different from \p other. bool operator!=(const device &other) const { return m_id != other.m_id; } /// Returns \c true if the device OpenCL version is major.minor /// or newer; otherwise returns \c false. bool check_version(int major, int minor) const { std::stringstream stream; stream << version(); int actual_major, actual_minor; stream.ignore(7); // 'OpenCL ' stream >> actual_major; stream.ignore(1); // '.' stream >> actual_minor; return actual_major > major || (actual_major == major && actual_minor >= minor); } private: cl_device_id m_id; }; /// \internal_ template<> inline uint_ device::preferred_vector_width<short_>() const { return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT); } /// \internal_ template<> inline uint_ device::preferred_vector_width<int_>() const { return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT); } /// \internal_ template<> inline uint_ device::preferred_vector_width<long_>() const { return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG); } /// \internal_ template<> inline uint_ device::preferred_vector_width<float_>() const { return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT); } /// \internal_ template<> inline uint_ device::preferred_vector_width<double_>() const { return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE); } /// \internal_ define get_info() specializations for device BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((cl_uint, CL_DEVICE_ADDRESS_BITS)) ((bool, CL_DEVICE_AVAILABLE)) ((bool, CL_DEVICE_COMPILER_AVAILABLE)) ((bool, CL_DEVICE_ENDIAN_LITTLE)) ((bool, CL_DEVICE_ERROR_CORRECTION_SUPPORT)) ((cl_device_exec_capabilities, CL_DEVICE_EXECUTION_CAPABILITIES)) ((std::string, CL_DEVICE_EXTENSIONS)) ((cl_ulong, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE)) ((cl_device_mem_cache_type, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE)) ((cl_uint, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE)) ((cl_ulong, CL_DEVICE_GLOBAL_MEM_SIZE)) ((bool, CL_DEVICE_IMAGE_SUPPORT)) ((size_t, CL_DEVICE_IMAGE2D_MAX_HEIGHT)) ((size_t, CL_DEVICE_IMAGE2D_MAX_WIDTH)) ((size_t, CL_DEVICE_IMAGE3D_MAX_DEPTH)) ((size_t, CL_DEVICE_IMAGE3D_MAX_HEIGHT)) ((size_t, CL_DEVICE_IMAGE3D_MAX_WIDTH)) ((cl_ulong, CL_DEVICE_LOCAL_MEM_SIZE)) ((cl_device_local_mem_type, CL_DEVICE_LOCAL_MEM_TYPE)) ((cl_uint, CL_DEVICE_MAX_CLOCK_FREQUENCY)) ((cl_uint, CL_DEVICE_MAX_COMPUTE_UNITS)) ((cl_uint, CL_DEVICE_MAX_CONSTANT_ARGS)) ((cl_ulong, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE)) ((cl_ulong, CL_DEVICE_MAX_MEM_ALLOC_SIZE)) ((size_t, CL_DEVICE_MAX_PARAMETER_SIZE)) ((cl_uint, CL_DEVICE_MAX_READ_IMAGE_ARGS)) ((cl_uint, CL_DEVICE_MAX_SAMPLERS)) ((size_t, CL_DEVICE_MAX_WORK_GROUP_SIZE)) ((cl_uint, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS)) ((std::vector<size_t>, CL_DEVICE_MAX_WORK_ITEM_SIZES)) ((cl_uint, CL_DEVICE_MAX_WRITE_IMAGE_ARGS)) ((cl_uint, CL_DEVICE_MEM_BASE_ADDR_ALIGN)) ((cl_uint, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE)) ((std::string, CL_DEVICE_NAME)) ((cl_platform_id, CL_DEVICE_PLATFORM)) ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR)) ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT)) ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT)) ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG)) ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT)) ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE)) ((std::string, CL_DEVICE_PROFILE)) ((size_t, CL_DEVICE_PROFILING_TIMER_RESOLUTION)) ((cl_command_queue_properties, CL_DEVICE_QUEUE_PROPERTIES)) ((cl_device_fp_config, CL_DEVICE_SINGLE_FP_CONFIG)) ((cl_device_type, CL_DEVICE_TYPE)) ((std::string, CL_DEVICE_VENDOR)) ((cl_uint, CL_DEVICE_VENDOR_ID)) ((std::string, CL_DEVICE_VERSION)) ((std::string, CL_DRIVER_VERSION)) ) #ifdef CL_DEVICE_DOUBLE_FP_CONFIG BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((cl_device_fp_config, CL_DEVICE_DOUBLE_FP_CONFIG)) ) #endif #ifdef CL_DEVICE_HALF_FP_CONFIG BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((cl_device_fp_config, CL_DEVICE_HALF_FP_CONFIG)) ) #endif #ifdef BOOST_COMPUTE_CL_VERSION_1_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((bool, CL_DEVICE_HOST_UNIFIED_MEMORY)) ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR)) ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT)) ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT)) ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG)) ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT)) ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE)) ((std::string, CL_DEVICE_OPENCL_C_VERSION)) ) #endif // BOOST_COMPUTE_CL_VERSION_1_1 #ifdef BOOST_COMPUTE_CL_VERSION_1_2 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((std::string, CL_DEVICE_BUILT_IN_KERNELS)) ((bool, CL_DEVICE_LINKER_AVAILABLE)) ((cl_device_id, CL_DEVICE_PARENT_DEVICE)) ((cl_uint, CL_DEVICE_PARTITION_MAX_SUB_DEVICES)) ((cl_device_partition_property, CL_DEVICE_PARTITION_PROPERTIES)) ((cl_device_affinity_domain, CL_DEVICE_PARTITION_AFFINITY_DOMAIN)) ((cl_device_partition_property, CL_DEVICE_PARTITION_TYPE)) ((size_t, CL_DEVICE_PRINTF_BUFFER_SIZE)) ((bool, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC)) ((cl_uint, CL_DEVICE_REFERENCE_COUNT)) ) #endif // BOOST_COMPUTE_CL_VERSION_1_2 #ifdef BOOST_COMPUTE_CL_VERSION_2_0 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((size_t, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE)) ((size_t, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE)) ((cl_uint, CL_DEVICE_MAX_ON_DEVICE_EVENTS)) ((cl_uint, CL_DEVICE_MAX_ON_DEVICE_QUEUES)) ((cl_uint, CL_DEVICE_MAX_PIPE_ARGS)) ((cl_uint, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS)) ((cl_uint, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS)) ((cl_uint, CL_DEVICE_PIPE_MAX_PACKET_SIZE)) ((cl_uint, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT)) ((cl_uint, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT)) ((cl_uint, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT)) ((cl_uint, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE)) ((cl_uint, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE)) ((cl_command_queue_properties, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES)) ((cl_device_svm_capabilities, CL_DEVICE_SVM_CAPABILITIES)) ((cl_uint, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT)) ((cl_uint, CL_DEVICE_IMAGE_PITCH_ALIGNMENT)) ) #endif // BOOST_COMPUTE_CL_VERSION_2_0 #ifdef BOOST_COMPUTE_CL_VERSION_2_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((std::string, CL_DEVICE_IL_VERSION)) ((cl_uint, CL_DEVICE_MAX_NUM_SUB_GROUPS)) ((bool, CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS)) ) #endif // BOOST_COMPUTE_CL_VERSION_2_1 } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DEVICE_HPP config.hpp 0000644 00000004467 15125510617 0006540 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONFIG_HPP #define BOOST_COMPUTE_CONFIG_HPP #include <boost/config.hpp> #include <boost/version.hpp> #include <boost/compute/cl.hpp> // check for minimum required boost version #if BOOST_VERSION < 105400 #error Boost.Compute requires Boost version 1.54 or later #endif // the BOOST_COMPUTE_NO_VARIADIC_TEMPLATES macro is defined // if the compiler does not *fully* support variadic templates #if defined(BOOST_NO_CXX11_VARIADIC_TEMPLATES) || \ (defined(__GNUC__) && !defined(__clang__) && \ __GNUC__ == 4 && __GNUC_MINOR__ <= 6) #define BOOST_COMPUTE_NO_VARIADIC_TEMPLATES #endif // BOOST_NO_CXX11_VARIADIC_TEMPLATES // the BOOST_COMPUTE_NO_STD_TUPLE macro is defined if the // compiler/stdlib does not support std::tuple #if defined(BOOST_NO_CXX11_HDR_TUPLE) || \ defined(BOOST_COMPUTE_NO_VARIADIC_TEMPLATES) #define BOOST_COMPUTE_NO_STD_TUPLE #endif // BOOST_NO_CXX11_HDR_TUPLE // defines BOOST_COMPUTE_CL_CALLBACK to the value of CL_CALLBACK // if it is defined (it was added in OpenCL 1.1). this is used to // annotate certain callback functions registered with OpenCL #ifdef CL_CALLBACK # define BOOST_COMPUTE_CL_CALLBACK CL_CALLBACK #else # define BOOST_COMPUTE_CL_CALLBACK #endif // Maximum number of iterators acceptable for make_zip_iterator #ifndef BOOST_COMPUTE_MAX_ARITY // should be no more than max boost::tuple size (10 by default) # define BOOST_COMPUTE_MAX_ARITY 10 #endif #if !defined(BOOST_COMPUTE_DOXYGEN_INVOKED) && \ defined(BOOST_NO_CXX11_RVALUE_REFERENCES) # define BOOST_COMPUTE_NO_RVALUE_REFERENCES #endif // BOOST_NO_CXX11_RVALUE_REFERENCES #if defined(BOOST_NO_CXX11_HDR_INITIALIZER_LIST) # define BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST #endif // BOOST_NO_CXX11_HDR_INITIALIZER_LIST #if defined(BOOST_NO_CXX11_HDR_CHRONO) # define BOOST_COMPUTE_NO_HDR_CHRONO #endif // BOOST_NO_CXX11_HDR_CHRONO #endif // BOOST_COMPUTE_CONFIG_HPP functional/get.hpp 0000644 00000003533 15125510617 0010205 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_GET_HPP #define BOOST_COMPUTE_FUNCTIONAL_GET_HPP #include <cstddef> #include <boost/compute/types/fundamental.hpp> #include <boost/compute/type_traits/scalar_type.hpp> namespace boost { namespace compute { namespace detail { // meta-function returning the result type for get<N>() template<size_t N, class Arg> struct get_result_type { typedef typename scalar_type<Arg>::type type; }; template<size_t N, class Arg, class T> struct invoked_get { typedef typename get_result_type<N, T>::type result_type; invoked_get(const Arg &arg) : m_arg(arg) { } Arg m_arg; }; } // end detail namespace /// Returns the \c N'th element of an aggregate type (e.g. scalarN, /// pair, tuple, etc.). /// /// \see \ref field "field<T>" template<size_t N> struct get { /// \internal_ template<class> struct result; /// \internal_ template<class F, class Arg> struct result<F(Arg)> { typedef typename detail::get_result_type<N, Arg>::type type; }; template<class Arg> detail::invoked_get< N, Arg, typename boost::remove_cv<typename Arg::result_type>::type > operator()(const Arg &arg) const { typedef typename boost::remove_cv<typename Arg::result_type>::type T; return detail::invoked_get<N, Arg, T>(arg); } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_GET_HPP functional/relational.hpp 0000644 00000003677 15125510617 0011571 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_RELATIONAL_HPP #define BOOST_COMPUTE_FUNCTIONAL_RELATIONAL_HPP #include <boost/compute/functional/detail/macros.hpp> namespace boost { namespace compute { BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isequal, int (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isnotequal, int (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isgreater, int (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isgreaterequal, int (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isless, int (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(islessequal, int (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(islessgreater, int (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isfinite, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isinf, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isnan, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isnormal, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isordered, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isunordered, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(signbit, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(any, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(all, int (T), class T) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_RELATIONAL_HPP functional/math.hpp 0000644 00000010271 15125510617 0010354 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_MATH_HPP #define BOOST_COMPUTE_FUNCTIONAL_MATH_HPP #include <boost/compute/functional/detail/macros.hpp> namespace boost { namespace compute { BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(acos, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(acosh, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(acospi, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(asin, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(asinh, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(asinpi, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atan, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atan2, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atanh, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atanpi, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atan2pi, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cbrt, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(ceil, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(copysign, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cos, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cosh, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cospi, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(erf, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(erfc, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(exp, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(exp2, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(exp10, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(expm1, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fabs, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fdim, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(floor, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fma, T (T, T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fmax, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fmin, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fmod, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(hypot, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(ilogb, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(lgamma, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log2, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log10, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log1p, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(logb, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(mad, T (T, T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(nextafter, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(pow, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(pown, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(powr, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(remainder, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rint, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rootn, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(round, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rsqrt, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sin, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sinh, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sinpi, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sqrt, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tan, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tanh, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tanpi, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tgamma, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(trunc, T (T), class T) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_MATH_HPP functional/logical.hpp 0000644 00000011163 15125510617 0011036 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_LOGICAL_HPP #define BOOST_COMPUTE_FUNCTIONAL_LOGICAL_HPP namespace boost { namespace compute { namespace detail { template<class Predicate, class Expr> class invoked_unary_negate_function { public: typedef int result_type; invoked_unary_negate_function(const Predicate &pred, const Expr &expr) : m_pred(pred), m_expr(expr) { } Predicate pred() const { return m_pred; } Expr expr() const { return m_expr; } private: Predicate m_pred; Expr m_expr; }; template<class Predicate, class Expr1, class Expr2> class invoked_binary_negate_function { public: typedef int result_type; invoked_binary_negate_function(const Predicate &pred, const Expr1 &expr1, const Expr2 &expr2) : m_pred(pred), m_expr1(expr1), m_expr2(expr2) { } Predicate pred() const { return m_pred; } Expr1 expr1() const { return m_expr1; } Expr2 expr2() const { return m_expr2; } private: Predicate m_pred; Expr1 m_expr1; Expr2 m_expr2; }; } // end detail namespace /// \internal_ template<class Arg, class Result> struct unary_function { typedef Arg argument_type; typedef Result result_type; }; /// \internal_ template<class Arg1, class Arg2, class Result> struct binary_function { typedef Arg1 first_argument_type; typedef Arg2 second_argument_type; typedef Result result_type; }; /// \internal_ template<class Arg1, class Arg2, class Arg3, class Result> struct ternary_function { typedef Arg1 first_argument_type; typedef Arg2 second_argument_type; typedef Arg3 third_argument_type; typedef Result result_type; }; /// The unary_negate function adaptor negates a unary function. /// /// \see not1() template<class Predicate> class unary_negate : public unary_function<void, int> { public: explicit unary_negate(Predicate pred) : m_pred(pred) { } /// \internal_ template<class Arg> detail::invoked_unary_negate_function<Predicate, Arg> operator()(const Arg &arg) const { return detail::invoked_unary_negate_function< Predicate, Arg >(m_pred, arg); } private: Predicate m_pred; }; /// The binnary_negate function adaptor negates a binary function. /// /// \see not2() template<class Predicate> class binary_negate : public binary_function<void, void, int> { public: explicit binary_negate(Predicate pred) : m_pred(pred) { } /// \internal_ template<class Arg1, class Arg2> detail::invoked_binary_negate_function<Predicate, Arg1, Arg2> operator()(const Arg1 &arg1, const Arg2 &arg2) const { return detail::invoked_binary_negate_function< Predicate, Arg1, Arg2 >(m_pred, arg1, arg2); } private: Predicate m_pred; }; /// Returns a unary_negate adaptor around \p predicate. /// /// \param predicate the unary function to wrap /// /// \return a unary_negate wrapper around \p predicate template<class Predicate> inline unary_negate<Predicate> not1(const Predicate &predicate) { return unary_negate<Predicate>(predicate); } /// Returns a binary_negate adaptor around \p predicate. /// /// \param predicate the binary function to wrap /// /// \return a binary_negate wrapper around \p predicate template<class Predicate> inline binary_negate<Predicate> not2(const Predicate &predicate) { return binary_negate<Predicate>(predicate); } /// The logical_not function negates its argument and returns it. /// /// \see not1(), not2() template<class T> struct logical_not : public unary_function<T, int> { /// \internal_ template<class Expr> detail::invoked_function<int, boost::tuple<Expr> > operator()(const Expr &expr) const { return detail::invoked_function<int, boost::tuple<Expr> >( "!", std::string(), boost::make_tuple(expr) ); } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_LOGICAL_HPP functional/as.hpp 0000644 00000002260 15125510617 0010025 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_AS_HPP #define BOOST_COMPUTE_FUNCTIONAL_AS_HPP namespace boost { namespace compute { namespace detail { template<class T, class Arg> struct invoked_as { invoked_as(const Arg &arg) : m_arg(arg) { } Arg m_arg; }; } // end detail namespace /// The \ref as function converts its argument to type \c T (similar to /// reinterpret_cast<T>). /// /// \see \ref convert "convert<T>" template<class T> struct as { typedef T result_type; /// \internal_ template<class Arg> detail::invoked_as<T, Arg> operator()(const Arg &arg) const { return detail::invoked_as<T, Arg>(arg); } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_AS_HPP functional/identity.hpp 0000644 00000003021 15125510617 0011247 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_IDENTITY_HPP #define BOOST_COMPUTE_FUNCTIONAL_IDENTITY_HPP namespace boost { namespace compute { namespace detail { template<class T, class Arg> struct invoked_identity { typedef T result_type; invoked_identity(const Arg &arg) : m_arg(arg) { } Arg m_arg; }; } // end detail namespace /// Identity function which simply returns its input. /// /// For example, to directly copy values using the transform() algorithm: /// \code /// transform(input.begin(), input.end(), output.begin(), identity<int>(), queue); /// \endcode /// /// \see \ref as "as<T>", \ref convert "convert<T>" template<class T> class identity { public: /// Identity function result type. typedef T result_type; /// Creates a new identity function. identity() { } /// \internal_ template<class Arg> detail::invoked_identity<T, Arg> operator()(const Arg &arg) const { return detail::invoked_identity<T, Arg>(arg); } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_IDENTITY_HPP functional/bind.hpp 0000644 00000016076 15125510617 0010350 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_BIND_HPP #define BOOST_COMPUTE_FUNCTIONAL_BIND_HPP #include <boost/mpl/int.hpp> #include <boost/tuple/tuple.hpp> #include <boost/type_traits/conditional.hpp> #include <boost/compute/config.hpp> #include <boost/compute/detail/meta_kernel.hpp> namespace boost { namespace compute { namespace placeholders { /// \internal_ template<int I> struct placeholder : boost::integral_constant<int, I> { placeholder() { } }; placeholder<0> const _1; placeholder<1> const _2; } // end placeholders namespace /// Meta-function returning \c true if \c T is a placeholder type. template<class T> struct is_placeholder : boost::false_type { }; /// \internal_ template<int I> struct is_placeholder<placeholders::placeholder<I> > : boost::true_type { }; namespace detail { template<class Function, class BoundArgs, class Args> struct invoked_bound_function { invoked_bound_function(Function f, BoundArgs bound_args, Args args) : m_function(f), m_bound_args(bound_args), m_args(args) { } // meta-function returning true if the N'th argument is a placeholder template<int N> struct is_placeholder_arg { typedef typename boost::tuples::element<N, BoundArgs>::type nth_bound_arg; typedef typename is_placeholder<nth_bound_arg>::type type; static const bool value = is_placeholder<nth_bound_arg>::value; }; template<class Arg> struct get_arg_type { typedef Arg type; }; template<int I> struct get_arg_type<placeholders::placeholder<I> > { typedef typename boost::tuples::element<I, Args>::type type; }; // meta-function returning the type of the N'th argument when invoked template<int N> struct get_nth_arg_type { typedef typename boost::tuples::element<N, BoundArgs>::type nth_bound_arg; typedef typename get_arg_type<nth_bound_arg>::type type; }; template<int N> typename get_nth_arg_type<N>::type get_nth_arg( typename boost::enable_if_c<is_placeholder_arg<N>::value>::type* = 0 ) const { typedef typename boost::tuples::element<N, BoundArgs>::type nth_bound_arg; return boost::get<nth_bound_arg::value>(m_args); } template<int N> typename get_nth_arg_type<N>::type get_nth_arg( typename boost::disable_if_c<is_placeholder_arg<N>::value>::type* = 0 ) const { return boost::get<N>(m_bound_args); } Function m_function; BoundArgs m_bound_args; Args m_args; }; template<class Function, class BoundArgs, class Args> inline meta_kernel& apply_invoked_bound_function( meta_kernel &k, const invoked_bound_function<Function, BoundArgs, Args> &expr, typename boost::enable_if_c< boost::tuples::length<BoundArgs>::value == 1 >::type* = 0 ) { return k << expr.m_function(expr.template get_nth_arg<0>()); } template<class Function, class BoundArgs, class Args> inline meta_kernel& apply_invoked_bound_function( meta_kernel &k, const invoked_bound_function<Function, BoundArgs, Args> &expr, typename boost::enable_if_c< boost::tuples::length<BoundArgs>::value == 2 >::type* = 0 ) { return k << expr.m_function(expr.template get_nth_arg<0>(), expr.template get_nth_arg<1>()); } template<class Function, class BoundArgs, class Args> inline meta_kernel& apply_invoked_bound_function( meta_kernel &k, const invoked_bound_function<Function, BoundArgs, Args> &expr, typename boost::enable_if_c< boost::tuples::length<BoundArgs>::value == 3 >::type* = 0 ) { return k << expr.m_function(expr.template get_nth_arg<0>(), expr.template get_nth_arg<1>(), expr.template get_nth_arg<2>()); } template<class Function, class BoundArgs, class Args> inline meta_kernel& operator<<( meta_kernel &k, const invoked_bound_function<Function, BoundArgs, Args> &expr ) { return apply_invoked_bound_function(k, expr); } template<class Function, class BoundArgs> struct bound_function { typedef int result_type; bound_function(Function f, BoundArgs args) : m_function(f), m_args(args) { } template<class Arg1> detail::invoked_bound_function< Function, BoundArgs, boost::tuple<Arg1> > operator()(const Arg1 &arg1) const { return detail::invoked_bound_function< Function, BoundArgs, boost::tuple<Arg1> >(m_function, m_args, boost::make_tuple(arg1)); } template<class Arg1, class Arg2> detail::invoked_bound_function< Function, BoundArgs, boost::tuple<Arg1, Arg2> > operator()(const Arg1 &arg1, const Arg2 &arg2) const { return detail::invoked_bound_function< Function, BoundArgs, boost::tuple<Arg1, Arg2> >(m_function, m_args, boost::make_tuple(arg1, arg2)); } Function m_function; BoundArgs m_args; }; } // end detail namespace #if !defined(BOOST_COMPUTE_NO_VARIADIC_TEMPLATES) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Returns a function wrapper which invokes \p f with \p args when called. /// /// For example, to generate a unary function object which returns \c true /// when its argument is less than \c 7: /// \code /// using boost::compute::less; /// using boost::compute::placeholders::_1; /// /// auto less_than_seven = boost::compute::bind(less<int>(), _1, 7); /// \endcode template<class F, class... Args> inline detail::bound_function<F, boost::tuple<Args...> > bind(F f, Args... args) { typedef typename boost::tuple<Args...> ArgsTuple; return detail::bound_function<F, ArgsTuple>(f, boost::make_tuple(args...)); } #else template<class F, class A1> inline detail::bound_function<F, boost::tuple<A1> > bind(F f, A1 a1) { typedef typename boost::tuple<A1> Args; return detail::bound_function<F, Args>(f, boost::make_tuple(a1)); } template<class F, class A1, class A2> inline detail::bound_function<F, boost::tuple<A1, A2> > bind(F f, A1 a1, A2 a2) { typedef typename boost::tuple<A1, A2> Args; return detail::bound_function<F, Args>(f, boost::make_tuple(a1, a2)); } template<class F, class A1, class A2, class A3> inline detail::bound_function<F, boost::tuple<A1, A2, A3> > bind(F f, A1 a1, A2 a2, A3 a3) { typedef typename boost::tuple<A1, A2, A3> Args; return detail::bound_function<F, Args>(f, boost::make_tuple(a1, a2, a3)); } #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_BIND_HPP functional/convert.hpp 0000644 00000002316 15125510617 0011104 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_CONVERT_HPP #define BOOST_COMPUTE_FUNCTIONAL_CONVERT_HPP namespace boost { namespace compute { namespace detail { template<class T, class Arg> struct invoked_convert { invoked_convert(const Arg &arg) : m_arg(arg) { } Arg m_arg; }; } // end detail namespace /// The \ref convert function converts its argument to type \c T (similar to /// static_cast<T>). /// /// \see \ref as "as<T>" template<class T> struct convert { typedef T result_type; /// \internal_ template<class Arg> detail::invoked_convert<T, Arg> operator()(const Arg &arg) const { return detail::invoked_convert<T, Arg>(arg); } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_CONVERT_HPP functional/detail/macros.hpp 0000644 00000002350 15125510617 0012150 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_MACROS_HPP #define BOOST_COMPUTE_FUNCTIONAL_MACROS_HPP #include <boost/preprocessor/cat.hpp> #include <boost/preprocessor/stringize.hpp> #include <boost/compute/function.hpp> #define BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(name, signature, template_args) \ template<template_args> \ class name : public function<signature> \ { \ public: \ (name)() : function<signature>(BOOST_PP_STRINGIZE(name)) { } \ }; #define BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(name, signature, template_args) \ template<template_args> \ class BOOST_PP_CAT(name, _) : public function<signature> \ { \ public: \ BOOST_PP_CAT(name, _)() : function<signature>(BOOST_PP_STRINGIZE(name)) { } \ }; #endif // BOOST_COMPUTE_FUNCTIONAL_MACROS_HPP functional/detail/nvidia_popcount.hpp 0000644 00000002312 15125510617 0014063 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_POPCOUNT_HPP #define BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_POPCOUNT_HPP #include <boost/compute/function.hpp> namespace boost { namespace compute { namespace detail { template<class T> class nvidia_popcount : public function<T(T)> { public: nvidia_popcount() : function<T(T)>("nvidia_popcount") { this->set_source( "inline uint nvidia_popcount(const uint x)\n" "{\n" " uint count;\n" " asm(\"popc.b32 %0, %1;\" : \"=r\"(count) : \"r\"(x));\n" " return count;\n" "}\n" ); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_POPCOUNT_HPP functional/detail/unpack.hpp 0000644 00000010156 15125510617 0012150 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_DETAIL_UNPACK_HPP #define BOOST_COMPUTE_FUNCTIONAL_DETAIL_UNPACK_HPP #include <boost/compute/functional/get.hpp> #include <boost/compute/type_traits/is_vector_type.hpp> #include <boost/compute/type_traits/result_of.hpp> #include <boost/compute/type_traits/vector_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> namespace boost { namespace compute { namespace detail { template<class Function, class Arg, size_t Arity> struct invoked_unpacked { invoked_unpacked(const Function &f, const Arg &arg) : m_function(f), m_arg(arg) { } Function m_function; Arg m_arg; }; template<class Function, class Arg, size_t Arity> inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked<Function, Arg, Arity> &expr); template<class Function, class Arg> inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked<Function, Arg, 1> &expr) { return k << expr.m_function(get<0>()(expr.m_arg)); } template<class Function, class Arg> inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked<Function, Arg, 2> &expr) { return k << expr.m_function(get<0>()(expr.m_arg), get<1>()(expr.m_arg)); } template<class Function, class Arg> inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked<Function, Arg, 3> &expr) { return k << expr.m_function(get<0>()(expr.m_arg), get<1>()(expr.m_arg), get<2>()(expr.m_arg)); } template<class Function> struct unpacked { template<class T, class Enable = void> struct aggregate_length { BOOST_STATIC_CONSTANT(size_t, value = boost::tuples::length<T>::value); }; template<class T> struct aggregate_length<T, typename enable_if<is_vector_type<T> >::type> { BOOST_STATIC_CONSTANT(size_t, value = vector_size<T>::value); }; template<class TupleArg, size_t TupleSize> struct result_impl {}; template<class TupleArg> struct result_impl<TupleArg, 1> { typedef typename detail::get_result_type<0, TupleArg>::type T1; typedef typename boost::compute::result_of<Function(T1)>::type type; }; template<class TupleArg> struct result_impl<TupleArg, 2> { typedef typename detail::get_result_type<0, TupleArg>::type T1; typedef typename detail::get_result_type<1, TupleArg>::type T2; typedef typename boost::compute::result_of<Function(T1, T2)>::type type; }; template<class TupleArg> struct result_impl<TupleArg, 3> { typedef typename detail::get_result_type<0, TupleArg>::type T1; typedef typename detail::get_result_type<1, TupleArg>::type T2; typedef typename detail::get_result_type<2, TupleArg>::type T3; typedef typename boost::compute::result_of<Function(T1, T2, T3)>::type type; }; template<class Signature> struct result {}; template<class This, class Arg> struct result<This(Arg)> { typedef typename result_impl<Arg, aggregate_length<Arg>::value>::type type; }; unpacked(const Function &f) : m_function(f) { } template<class Arg> detail::invoked_unpacked< Function, Arg, aggregate_length<typename Arg::result_type>::value > operator()(const Arg &arg) const { return detail::invoked_unpacked< Function, Arg, aggregate_length<typename Arg::result_type>::value >(m_function, arg); } Function m_function; }; template<class Function> inline unpacked<Function> unpack(const Function &f) { return unpacked<Function>(f); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_DETAIL_UNPACK_HPP functional/detail/nvidia_ballot.hpp 0000644 00000002633 15125510617 0013477 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_BALLOT_HPP #define BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_BALLOT_HPP #include <boost/compute/function.hpp> #include <boost/compute/types/fundamental.hpp> namespace boost { namespace compute { namespace detail { template<class T> class nvidia_ballot : public function<uint_(T)> { public: nvidia_ballot() : function<uint_(T)>("nvidia_ballot") { this->set_source( "inline uint nvidia_ballot(const uint x)\n" "{\n" " uint result;\n" " asm volatile(\n" " \"setp.ne.u32 %%p1, %1, 0;\"\n" " \"vote.ballot.b32 %0, %%p1;\"\n" " : \"=r\"(result)\n" " : \"r\"(x)\n" " );\n" " return result;\n" "}\n" ); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_BALLOT_HPP functional/geometry.hpp 0000644 00000002647 15125510617 0011266 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_GEOMETRY_HPP #define BOOST_COMPUTE_FUNCTIONAL_GEOMETRY_HPP #include <boost/compute/type_traits.hpp> #include <boost/compute/functional/detail/macros.hpp> namespace boost { namespace compute { BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cross, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(dot, typename scalar_type<T>::type (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(distance, typename scalar_type<T>::type (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fast_distance, typename scalar_type<T>::type (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(length, typename scalar_type<T>::type (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fast_length, typename scalar_type<T>::type (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(normalize, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fast_normalize, T (T), class T) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_GEOMETRY_HPP functional/integer.hpp 0000644 00000002230 15125510617 0011054 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_INTEGER_HPP #define BOOST_COMPUTE_FUNCTIONAL_INTEGER_HPP #include <boost/compute/functional/detail/macros.hpp> namespace boost { namespace compute { BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(abs, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(abs_diff, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(add_sat, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(hadd, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rhadd, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(max, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(min, T (T, T), class T) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_INTEGER_HPP functional/hash.hpp 0000644 00000005015 15125510617 0010346 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_HASH_HPP #define BOOST_COMPUTE_FUNCTIONAL_HASH_HPP #include <boost/compute/function.hpp> #include <boost/compute/types/fundamental.hpp> namespace boost { namespace compute { namespace detail { template<class Key> std::string make_hash_function_name() { return std::string("boost_hash_") + type_name<Key>(); } template<class Key> inline std::string make_hash_function_source() { std::stringstream source; source << "inline ulong " << make_hash_function_name<Key>() << "(const " << type_name<Key>() << " x)\n" << "{\n" // note we reinterpret the argument as a 32-bit uint and // then promote it to a 64-bit ulong for the result type << " ulong a = as_uint(x);\n" << " a = (a ^ 61) ^ (a >> 16);\n" << " a = a + (a << 3);\n" << " a = a ^ (a >> 4);\n" << " a = a * 0x27d4eb2d;\n" << " a = a ^ (a >> 15);\n" << " return a;\n" << "}\n"; return source.str(); } template<class Key> struct hash_impl { typedef Key argument_type; typedef ulong_ result_type; hash_impl() : m_function("") { m_function = make_function_from_source<result_type(argument_type)>( make_hash_function_name<argument_type>(), make_hash_function_source<argument_type>() ); } template<class Arg> invoked_function<result_type, boost::tuple<Arg> > operator()(const Arg &arg) const { return m_function(arg); } function<result_type(argument_type)> m_function; }; } // end detail namespace /// The hash function returns a hash value for the input value. /// /// The return type is \c ulong_ (the OpenCL unsigned long type). template<class Key> struct hash; /// \internal_ template<> struct hash<int_> : detail::hash_impl<int_> { }; /// \internal_ template<> struct hash<uint_> : detail::hash_impl<uint_> { }; /// \internal_ template<> struct hash<float_> : detail::hash_impl<float_> { }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_HASH_HPP functional/atomic.hpp 0000644 00000005605 15125510617 0010704 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_ATOMIC_HPP #define BOOST_COMPUTE_FUNCTIONAL_ATOMIC_HPP #include <boost/compute/cl.hpp> #include <boost/compute/function.hpp> #ifndef BOOST_COMPUTE_DOXYGEN_INVOKED #ifdef BOOST_COMPUTE_CL_VERSION_1_1 #define BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "atomic_" #else #define BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "atom_" #endif #endif // BOOST_COMPUTE_DOXYGEN_INVOKED namespace boost { namespace compute { template<class T> class atomic_add : public function<T (T*, T)> { public: atomic_add() : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "add") { } }; template<class T> class atomic_sub : public function<T (T*, T)> { public: atomic_sub() : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "sub") { } }; template<class T> class atomic_xchg : public function<T (T*, T)> { public: atomic_xchg() : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "xchg") { } }; template<class T> class atomic_inc : public function<T (T*)> { public: atomic_inc() : function<T (T*)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "inc") { } }; template<class T> class atomic_dec : public function<T (T*)> { public: atomic_dec() : function<T (T*)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "dec") { } }; template<class T> class atomic_cmpxchg : public function<T (T*, T, T)> { public: atomic_cmpxchg() : function<T (T*, T, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "cmpxchg") { } }; template<class T> class atomic_max : public function<T (T*, T)> { public: atomic_max() : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "max") { } }; template<class T> class atomic_min : public function<T (T*, T)> { public: atomic_min() : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "min") { } }; template<class T> class atomic_and : public function<T (T*, T)> { public: atomic_and() : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "and") { } }; template<class T> class atomic_or : public function<T (T*, T)> { public: atomic_or() : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "or") { } }; template<class T> class atomic_xor : public function<T (T*, T)> { public: atomic_xor() : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "xor") { } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_ATOMIC_HPP functional/field.hpp 0000644 00000004061 15125510617 0010506 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_FIELD_HPP #define BOOST_COMPUTE_FUNCTIONAL_FIELD_HPP #include <string> namespace boost { namespace compute { namespace detail { template<class T, class Arg> struct invoked_field { typedef T result_type; invoked_field(const Arg &arg, const std::string &field) : m_arg(arg), m_field(field) { } Arg m_arg; std::string m_field; }; } // end detail namespace /// Returns the named field from a value. /// /// The template-type \c T specifies the field's value type. Note /// that the value type must match the actual type of the field /// otherwise runtime compilation or logic errors may occur. /// /// For example, to access the \c second field in a /// \c std::pair<int, float> object: /// \code /// field<float>("second"); /// \endcode /// /// This can also be used with vector types to access individual /// components as well as perform swizzle operations. /// /// For example, to access the first and third components of an /// \c int vector type (e.g. \c int4): /// \code /// field<int2_>("xz"); /// \endcode /// /// \see \ref get "get<N>" template<class T> class field { public: /// Result type. typedef T result_type; /// Creates a new field functor with \p field. field(const std::string &field) : m_field(field) { } /// \internal_ template<class Arg> detail::invoked_field<T, Arg> operator()(const Arg &arg) const { return detail::invoked_field<T, Arg>(arg, m_field); } private: std::string m_field; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_FIELD_HPP functional/common.hpp 0000644 00000002135 15125510617 0010713 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_COMMON_HPP #define BOOST_COMPUTE_FUNCTIONAL_COMMON_HPP #include <boost/compute/functional/detail/macros.hpp> namespace boost { namespace compute { BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(clamp, T (T, T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(degrees, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(radians, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sign, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(smoothstep, T (T, T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(step, T (T, T), class T) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_COMMON_HPP functional/popcount.hpp 0000644 00000003412 15125510617 0011271 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_POPCOUNT_HPP #define BOOST_COMPUTE_FUNCTIONAL_POPCOUNT_HPP #include <boost/compute/function.hpp> #include <boost/compute/type_traits/type_name.hpp> namespace boost { namespace compute { /// Returns the number of non-zero bits in \p x. /// /// \see_opencl_ref{popcount} template<class T> class popcount : public function<T(T)> { public: popcount() : function<T(T)>("boost_popcount") { std::stringstream s; s << "inline " << type_name<T>() << " boost_popcount" << "(const " << type_name<T>() << " x)\n" << "{\n" // use built-in popcount if opencl 1.2 is supported << "#if __OPENCL_VERSION__ >= 120\n" << " return popcount(x);\n" // fallback to generic popcount() implementation << "#else\n" << " " << type_name<T>() << " count = 0;\n" << " for(" << type_name<T>() << " i = 0; i < sizeof(i) * CHAR_BIT; i++){\n" << " if(x & (" << type_name<T>() << ") 1 << i){\n" << " count++;\n" << " }\n" << " }\n" << " return count;\n" << "#endif\n" << "}\n"; this->set_source(s.str()); } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_POPCOUNT_HPP functional/operator.hpp 0000644 00000006001 15125510617 0011252 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_OPERATORS_HPP #define BOOST_COMPUTE_FUNCTIONAL_OPERATORS_HPP #include <string> namespace boost { namespace compute { namespace detail { template<class Expr1, class Expr2, class Result> struct invoked_binary_operator { typedef Result result_type; invoked_binary_operator(const std::string &op, const Expr1 &arg1, const Expr2 &arg2) : m_op(op), m_expr1(arg1), m_expr2(arg2) { } std::string op() const { return m_op; } Expr1 arg1() const { return m_expr1; } Expr2 arg2() const { return m_expr2; } std::string m_op; Expr1 m_expr1; Expr2 m_expr2; }; } // end detail namespace /// \internal_ #define BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(name, op, return_type, arg_type) \ template<class arg_type> \ class name : public function<return_type (arg_type, arg_type)> \ { \ public: \ name() : function<return_type (arg_type, arg_type)>(BOOST_PP_STRINGIZE(name)) { } \ \ template<class Arg1, class Arg2> \ detail::invoked_binary_operator<Arg1, Arg2, T> \ operator()(const Arg1 &x, const Arg2 &y) const \ { \ return detail::invoked_binary_operator<Arg1, Arg2, T>(op, x, y); \ } \ }; // arithmetic operations BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(plus, "+", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(minus, "-", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(multiplies, "*", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(divides, "/", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(modulus, "%", T, T) // comparisons BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(equal_to, "==", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(not_equal_to, "!=", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(greater, ">", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(less, "<", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(greater_equal, ">=", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(less_equal, "<=", T, T) // logical operators BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(logical_and, "&&", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(logical_or, "||", T, T) // bitwise operations BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(bit_and, "&", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(bit_or, "|", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(bit_xor, "^", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(shift_left, "<<", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(shift_right, ">>", T, T) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_OPERATORS_HPP utility.hpp 0000644 00000001467 15125510617 0006773 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_UTILITY_HPP #define BOOST_COMPUTE_UTILITY_HPP #include <boost/compute/utility/dim.hpp> #include <boost/compute/utility/extents.hpp> #include <boost/compute/utility/invoke.hpp> #include <boost/compute/utility/program_cache.hpp> #include <boost/compute/utility/source.hpp> #include <boost/compute/utility/wait_list.hpp> #endif // BOOST_COMPUTE_UTILITY_HPP async/future.hpp 0000644 00000007616 15125510617 0007721 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ASYNC_FUTURE_HPP #define BOOST_COMPUTE_ASYNC_FUTURE_HPP #include <boost/compute/event.hpp> namespace boost { namespace compute { /// \class future /// \brief Holds the result of an asynchronous computation. /// /// \see event, wait_list template<class T> class future { public: future() : m_event(0) { } future(const T &result, const event &event) : m_result(result), m_event(event) { } future(const future<T> &other) : m_result(other.m_result), m_event(other.m_event) { } future& operator=(const future<T> &other) { if(this != &other){ m_result = other.m_result; m_event = other.m_event; } return *this; } ~future() { } /// Returns the result of the computation. This will block until /// the result is ready. T get() { wait(); return m_result; } /// Returns \c true if the future is valid. bool valid() const { return m_event != 0; } /// Blocks until the computation is complete. void wait() const { m_event.wait(); } /// Returns the underlying event object. event get_event() const { return m_event; } #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Invokes a generic callback function once the future is ready. /// /// The function specified by callback must be invokable with zero arguments. /// /// \see_opencl_ref{clSetEventCallback} /// \opencl_version_warning{1,1} template<class Function> future& then(Function callback) { m_event.set_callback(callback); return *this; } #endif // BOOST_COMPUTE_CL_VERSION_1_1 private: T m_result; event m_event; }; /// \internal_ template<> class future<void> { public: future() : m_event(0) { } template<class T> future(const future<T> &other) : m_event(other.get_event()) { } explicit future(const event &event) : m_event(event) { } template<class T> future<void> &operator=(const future<T> &other) { m_event = other.get_event(); return *this; } future<void> &operator=(const future<void> &other) { if(this != &other){ m_event = other.m_event; } return *this; } ~future() { } void get() { wait(); } bool valid() const { return m_event != 0; } void wait() const { m_event.wait(); } event get_event() const { return m_event; } #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Invokes a generic callback function once the future is ready. /// /// The function specified by callback must be invokable with zero arguments. /// /// \see_opencl_ref{clSetEventCallback} /// \opencl_version_warning{1,1} template<class Function> future<void> &then(Function callback) { m_event.set_callback(callback); return *this; } #endif // BOOST_COMPUTE_CL_VERSION_1_1 private: event m_event; }; /// \internal_ template<class Result> inline future<Result> make_future(const Result &result, const event &event) { return future<Result>(result, event); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ASYNC_FUTURE_HPP async/wait_guard.hpp 0000644 00000003506 15125510617 0010527 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ASYNC_WAIT_GUARD_HPP #define BOOST_COMPUTE_ASYNC_WAIT_GUARD_HPP #include <boost/noncopyable.hpp> namespace boost { namespace compute { /// \class wait_guard /// \brief A guard object for synchronizing an operation on the device /// /// The wait_guard class stores a waitable object representing an operation /// on a compute device (e.g. \ref event, \ref future "future<T>") and calls /// its \c wait() method when the guard object goes out of scope. /// /// This is useful for ensuring that an OpenCL operation completes before /// leaving the current scope and cleaning up any resources. /// /// For example: /// \code /// // enqueue a compute kernel for execution /// event e = queue.enqueue_nd_range_kernel(...); /// /// // call e.wait() upon exiting the current scope /// wait_guard<event> guard(e); /// \endcode /// /// \ref wait_list, wait_for_all() template<class Waitable> class wait_guard : boost::noncopyable { public: /// Creates a new wait_guard object for \p waitable. wait_guard(const Waitable &waitable) : m_waitable(waitable) { } /// Destroys the wait_guard object. The default implementation will call /// \c wait() on the stored waitable object. ~wait_guard() { m_waitable.wait(); } private: Waitable m_waitable; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ASYNC_WAIT_GUARD_HPP async/wait.hpp 0000644 00000003164 15125510617 0007345 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ASYNC_WAIT_HPP #define BOOST_COMPUTE_ASYNC_WAIT_HPP #include <boost/compute/config.hpp> #include <boost/compute/utility/wait_list.hpp> namespace boost { namespace compute { namespace detail { #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES template<class Event> inline void insert_events_variadic(wait_list &l, Event&& event) { l.insert(std::forward<Event>(event)); } template<class Event, class... Rest> inline void insert_events_variadic(wait_list &l, Event&& event, Rest&&... rest) { l.insert(std::forward<Event>(event)); insert_events_variadic(l, std::forward<Rest>(rest)...); } #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES } // end detail namespace #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES /// Blocks until all events have completed. Events can either be \ref event /// objects or \ref future "future<T>" objects. /// /// \see event, wait_list template<class... Events> inline void wait_for_all(Events&&... events) { wait_list l; detail::insert_events_variadic(l, std::forward<Events>(events)...); l.wait(); } #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ASYNC_WAIT_HPP lambda.hpp 0000644 00000001536 15125510617 0006505 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_HPP #define BOOST_COMPUTE_LAMBDA_HPP #include <boost/compute/lambda/context.hpp> #include <boost/compute/lambda/functional.hpp> #include <boost/compute/lambda/get.hpp> #include <boost/compute/lambda/make_pair.hpp> #include <boost/compute/lambda/make_tuple.hpp> #include <boost/compute/lambda/placeholders.hpp> #include <boost/compute/lambda/result_of.hpp> #endif // BOOST_COMPUTE_LAMBDA_HPP pipe.hpp 0000644 00000007650 15125510617 0006225 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_PIPE_HPP #define BOOST_COMPUTE_PIPE_HPP #include <boost/compute/cl.hpp> #include <boost/compute/context.hpp> #include <boost/compute/memory_object.hpp> #include <boost/compute/exception/opencl_error.hpp> #include <boost/compute/detail/get_object_info.hpp> // pipe objects require opencl 2.0 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) namespace boost { namespace compute { /// \class pipe /// \brief A FIFO data pipe /// /// \opencl_version_warning{2,0} /// /// \see memory_object class pipe : public memory_object { public: /// Creates a null pipe object. pipe() : memory_object() { } /// Creates a pipe object for \p mem. If \p retain is \c true, the /// reference count for \p mem will be incremented. explicit pipe(cl_mem mem, bool retain = true) : memory_object(mem, retain) { } /// Creates a new pipe in \p context. pipe(const context &context, uint_ pipe_packet_size, uint_ pipe_max_packets, cl_mem_flags flags = read_write, const cl_pipe_properties *properties = 0) { cl_int error = 0; m_mem = clCreatePipe(context, flags, pipe_packet_size, pipe_max_packets, properties, &error); if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new pipe object as a copy of \p other. pipe(const pipe &other) : memory_object(other) { } /// Copies the pipe object from \p other to \c *this. pipe& operator=(const pipe &other) { if(this != &other){ memory_object::operator=(other); } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new pipe object from \p other. pipe(pipe&& other) BOOST_NOEXCEPT : memory_object(std::move(other)) { } /// Move-assigns the pipe from \p other to \c *this. pipe& operator=(pipe&& other) BOOST_NOEXCEPT { memory_object::operator=(std::move(other)); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the pipe object. ~pipe() { } /// Returns the packet size. uint_ packet_size() const { return get_info<uint_>(CL_PIPE_PACKET_SIZE); } /// Returns the max number of packets. uint_ max_packets() const { return get_info<uint_>(CL_PIPE_MAX_PACKETS); } /// Returns information about the pipe. /// /// \see_opencl2_ref{clGetPipeInfo} template<class T> T get_info(cl_pipe_info info) const { return detail::get_object_info<T>(clGetPipeInfo, m_mem, info); } /// \overload template<int Enum> typename detail::get_object_info_type<pipe, Enum>::type get_info() const; }; /// \internal_ define get_info() specializations for pipe BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(pipe, ((cl_uint, CL_PIPE_PACKET_SIZE)) ((cl_uint, CL_PIPE_MAX_PACKETS)) ) namespace detail { // set_kernel_arg specialization for pipe template<> struct set_kernel_arg<pipe> { void operator()(kernel &kernel_, size_t index, const pipe &pipe_) { kernel_.set_arg(index, pipe_.get()); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CL_VERSION_2_0 #endif // BOOST_COMPUTE_PIPE_HPP event.hpp 0000644 00000024050 15125510617 0006402 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EVENT_HPP #define BOOST_COMPUTE_EVENT_HPP #include <boost/function.hpp> #include <boost/compute/config.hpp> #include <boost/compute/exception.hpp> #include <boost/compute/detail/duration.hpp> #include <boost/compute/detail/get_object_info.hpp> #include <boost/compute/detail/assert_cl_success.hpp> #include <boost/compute/types/fundamental.hpp> namespace boost { namespace compute { /// \class event /// \brief An event corresponding to an operation on a compute device /// /// Event objects are used to track operations running on the device (such as /// kernel executions and memory transfers). Event objects are returned by the /// various \c enqueue_* methods of the command_queue class. /// /// Events can be used to synchronize operations between the host and the /// device. The \c wait() method will block execution on the host until the /// operation corresponding to the event on the device has completed. The /// status of the operation can also be polled with the \c status() method. /// /// Event objects can also be used for performance profiling. In order to use /// events for profiling, the command queue must be constructed with the /// \c CL_QUEUE_PROFILING_ENABLE flag. Then the \c duration() method can be /// used to retrieve the total duration of the operation on the device: /// \code /// std::cout << "time = " << e.duration<std::chrono::milliseconds>().count() << "ms\n"; /// \endcode /// /// \see \ref future "future<T>", wait_list class event { public: /// \internal_ enum execution_status { complete = CL_COMPLETE, running = CL_RUNNING, submitted = CL_SUBMITTED, queued = CL_QUEUED }; /// \internal_ enum command_type { ndrange_kernel = CL_COMMAND_NDRANGE_KERNEL, task = CL_COMMAND_TASK, native_kernel = CL_COMMAND_NATIVE_KERNEL, read_buffer = CL_COMMAND_READ_BUFFER, write_buffer = CL_COMMAND_WRITE_BUFFER, copy_buffer = CL_COMMAND_COPY_BUFFER, read_image = CL_COMMAND_READ_IMAGE, write_image = CL_COMMAND_WRITE_IMAGE, copy_image = CL_COMMAND_COPY_IMAGE, copy_image_to_buffer = CL_COMMAND_COPY_IMAGE_TO_BUFFER, copy_buffer_to_image = CL_COMMAND_COPY_BUFFER_TO_IMAGE, map_buffer = CL_COMMAND_MAP_BUFFER, map_image = CL_COMMAND_MAP_IMAGE, unmap_mem_object = CL_COMMAND_UNMAP_MEM_OBJECT, marker = CL_COMMAND_MARKER, aquire_gl_objects = CL_COMMAND_ACQUIRE_GL_OBJECTS, release_gl_object = CL_COMMAND_RELEASE_GL_OBJECTS #if defined(BOOST_COMPUTE_CL_VERSION_1_1) , read_buffer_rect = CL_COMMAND_READ_BUFFER_RECT, write_buffer_rect = CL_COMMAND_WRITE_BUFFER_RECT, copy_buffer_rect = CL_COMMAND_COPY_BUFFER_RECT #endif }; /// \internal_ enum profiling_info { profiling_command_queued = CL_PROFILING_COMMAND_QUEUED, profiling_command_submit = CL_PROFILING_COMMAND_SUBMIT, profiling_command_start = CL_PROFILING_COMMAND_START, profiling_command_end = CL_PROFILING_COMMAND_END }; /// Creates a null event object. event() : m_event(0) { } explicit event(cl_event event, bool retain = true) : m_event(event) { if(m_event && retain){ clRetainEvent(event); } } /// Makes a new event as a copy of \p other. event(const event &other) : m_event(other.m_event) { if(m_event){ clRetainEvent(m_event); } } /// Copies the event object from \p other to \c *this. event& operator=(const event &other) { if(this != &other){ if(m_event){ clReleaseEvent(m_event); } m_event = other.m_event; if(m_event){ clRetainEvent(m_event); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new event object from \p other. event(event&& other) BOOST_NOEXCEPT : m_event(other.m_event) { other.m_event = 0; } /// Move-assigns the event from \p other to \c *this. event& operator=(event&& other) BOOST_NOEXCEPT { if(m_event){ clReleaseEvent(m_event); } m_event = other.m_event; other.m_event = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the event object. ~event() { if(m_event){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseEvent(m_event) ); } } /// Returns a reference to the underlying OpenCL event object. cl_event& get() const { return const_cast<cl_event &>(m_event); } /// Returns the status of the event. cl_int status() const { return get_info<cl_int>(CL_EVENT_COMMAND_EXECUTION_STATUS); } /// Returns the command type for the event. cl_command_type get_command_type() const { return get_info<cl_command_type>(CL_EVENT_COMMAND_TYPE); } /// Returns information about the event. /// /// \see_opencl_ref{clGetEventInfo} template<class T> T get_info(cl_event_info info) const { return detail::get_object_info<T>(clGetEventInfo, m_event, info); } /// \overload template<int Enum> typename detail::get_object_info_type<event, Enum>::type get_info() const; /// Returns profiling information for the event. /// /// \see event::duration() /// /// \see_opencl_ref{clGetEventProfilingInfo} template<class T> T get_profiling_info(cl_profiling_info info) const { return detail::get_object_info<T>(clGetEventProfilingInfo, m_event, info); } /// Blocks until the actions corresponding to the event have /// completed. void wait() const { cl_int ret = clWaitForEvents(1, &m_event); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Registers a function to be called when the event status changes to /// \p status (by default CL_COMPLETE). The callback is passed the OpenCL /// event object, the event status, and a pointer to arbitrary user data. /// /// \see_opencl_ref{clSetEventCallback} /// /// \opencl_version_warning{1,1} void set_callback(void (BOOST_COMPUTE_CL_CALLBACK *callback)( cl_event event, cl_int status, void *user_data ), cl_int status = CL_COMPLETE, void *user_data = 0) { cl_int ret = clSetEventCallback(m_event, status, callback, user_data); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } /// Registers a generic function to be called when the event status /// changes to \p status (by default \c CL_COMPLETE). /// /// The function specified by \p callback must be invokable with zero /// arguments (e.g. \c callback()). /// /// \opencl_version_warning{1,1} template<class Function> void set_callback(Function callback, cl_int status = CL_COMPLETE) { set_callback( event_callback_invoker, status, new boost::function<void()>(callback) ); } #endif // BOOST_COMPUTE_CL_VERSION_1_1 /// Returns the total duration of the event from \p start to \p end. /// /// For example, to print the number of milliseconds the event took to /// execute: /// \code /// std::cout << event.duration<std::chrono::milliseconds>().count() << " ms" << std::endl; /// \endcode /// /// \see event::get_profiling_info() template<class Duration> Duration duration(cl_profiling_info start = CL_PROFILING_COMMAND_START, cl_profiling_info end = CL_PROFILING_COMMAND_END) const { const ulong_ nanoseconds = get_profiling_info<ulong_>(end) - get_profiling_info<ulong_>(start); return detail::make_duration_from_nanoseconds(Duration(), nanoseconds); } /// Returns \c true if the event is the same as \p other. bool operator==(const event &other) const { return m_event == other.m_event; } /// Returns \c true if the event is different from \p other. bool operator!=(const event &other) const { return m_event != other.m_event; } /// \internal_ operator cl_event() const { return m_event; } /// \internal_ (deprecated) cl_int get_status() const { return status(); } private: #ifdef BOOST_COMPUTE_CL_VERSION_1_1 /// \internal_ static void BOOST_COMPUTE_CL_CALLBACK event_callback_invoker(cl_event, cl_int, void *user_data) { boost::function<void()> *callback = static_cast<boost::function<void()> *>(user_data); (*callback)(); delete callback; } #endif // BOOST_COMPUTE_CL_VERSION_1_1 protected: cl_event m_event; }; /// \internal_ define get_info() specializations for event BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(event, ((cl_command_queue, CL_EVENT_COMMAND_QUEUE)) ((cl_command_type, CL_EVENT_COMMAND_TYPE)) ((cl_int, CL_EVENT_COMMAND_EXECUTION_STATUS)) ((cl_uint, CL_EVENT_REFERENCE_COUNT)) ) #ifdef BOOST_COMPUTE_CL_VERSION_1_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(event, ((cl_context, CL_EVENT_CONTEXT)) ) #endif } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EVENT_HPP memory_object.hpp 0000644 00000015522 15125510617 0010123 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_MEMORY_OBJECT_HPP #define BOOST_COMPUTE_MEMORY_OBJECT_HPP #include <boost/compute/config.hpp> #include <boost/compute/context.hpp> #include <boost/compute/kernel.hpp> #include <boost/compute/detail/get_object_info.hpp> #include <boost/compute/detail/assert_cl_success.hpp> namespace boost { namespace compute { /// \class memory_object /// \brief Base-class for memory objects. /// /// The memory_object class is the base-class for memory objects on /// compute devices. /// /// \see buffer, vector class memory_object { public: /// Flags for the creation of memory objects. enum mem_flags { read_write = CL_MEM_READ_WRITE, read_only = CL_MEM_READ_ONLY, write_only = CL_MEM_WRITE_ONLY, use_host_ptr = CL_MEM_USE_HOST_PTR, alloc_host_ptr = CL_MEM_ALLOC_HOST_PTR, copy_host_ptr = CL_MEM_COPY_HOST_PTR #ifdef BOOST_COMPUTE_CL_VERSION_1_2 , host_write_only = CL_MEM_HOST_WRITE_ONLY, host_read_only = CL_MEM_HOST_READ_ONLY, host_no_access = CL_MEM_HOST_NO_ACCESS #endif }; /// Symbolic names for the OpenCL address spaces. enum address_space { global_memory, local_memory, private_memory, constant_memory }; /// Returns the underlying OpenCL memory object. cl_mem& get() const { return const_cast<cl_mem &>(m_mem); } /// Returns the size of the memory object in bytes. size_t get_memory_size() const { return get_memory_info<size_t>(CL_MEM_SIZE); } /// Returns the type for the memory object. cl_mem_object_type get_memory_type() const { return get_memory_info<cl_mem_object_type>(CL_MEM_TYPE); } /// Returns the flags for the memory object. cl_mem_flags get_memory_flags() const { return get_memory_info<cl_mem_flags>(CL_MEM_FLAGS); } /// Returns the context for the memory object. context get_context() const { return context(get_memory_info<cl_context>(CL_MEM_CONTEXT)); } /// Returns the host pointer associated with the memory object. void* get_host_ptr() const { return get_memory_info<void *>(CL_MEM_HOST_PTR); } /// Returns the reference count for the memory object. uint_ reference_count() const { return get_memory_info<uint_>(CL_MEM_REFERENCE_COUNT); } /// Returns information about the memory object. /// /// \see_opencl_ref{clGetMemObjectInfo} template<class T> T get_memory_info(cl_mem_info info) const { return detail::get_object_info<T>(clGetMemObjectInfo, m_mem, info); } #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Registers a function to be called when the memory object is deleted /// and its resources freed. /// /// \see_opencl_ref{clSetMemObjectDestructorCallback} /// /// \opencl_version_warning{1,1} void set_destructor_callback(void (BOOST_COMPUTE_CL_CALLBACK *callback)( cl_mem memobj, void *user_data ), void *user_data = 0) { cl_int ret = clSetMemObjectDestructorCallback(m_mem, callback, user_data); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } /// Registers a function to be called when the memory object is deleted /// and its resources freed. /// /// The function specified by \p callback must be invokable with zero /// arguments (e.g. \c callback()). /// /// \opencl_version_warning{1,1} template<class Function> void set_destructor_callback(Function callback) { set_destructor_callback( destructor_callback_invoker, new boost::function<void()>(callback) ); } #endif // BOOST_COMPUTE_CL_VERSION_1_1 /// Returns \c true if the memory object is the same as \p other. bool operator==(const memory_object &other) const { return m_mem == other.m_mem; } /// Returns \c true if the memory object is different from \p other. bool operator!=(const memory_object &other) const { return m_mem != other.m_mem; } private: #ifdef BOOST_COMPUTE_CL_VERSION_1_1 /// \internal_ static void BOOST_COMPUTE_CL_CALLBACK destructor_callback_invoker(cl_mem, void *user_data) { boost::function<void()> *callback = static_cast<boost::function<void()> *>(user_data); (*callback)(); delete callback; } #endif // BOOST_COMPUTE_CL_VERSION_1_1 protected: /// \internal_ memory_object() : m_mem(0) { } /// \internal_ explicit memory_object(cl_mem mem, bool retain = true) : m_mem(mem) { if(m_mem && retain){ clRetainMemObject(m_mem); } } /// \internal_ memory_object(const memory_object &other) : m_mem(other.m_mem) { if(m_mem){ clRetainMemObject(m_mem); } } /// \internal_ memory_object& operator=(const memory_object &other) { if(this != &other){ if(m_mem){ clReleaseMemObject(m_mem); } m_mem = other.m_mem; if(m_mem){ clRetainMemObject(m_mem); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// \internal_ memory_object(memory_object&& other) BOOST_NOEXCEPT : m_mem(other.m_mem) { other.m_mem = 0; } /// \internal_ memory_object& operator=(memory_object&& other) BOOST_NOEXCEPT { if(m_mem){ clReleaseMemObject(m_mem); } m_mem = other.m_mem; other.m_mem = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// \internal_ ~memory_object() { if(m_mem){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseMemObject(m_mem) ); } } protected: cl_mem m_mem; }; namespace detail { // set_kernel_arg specialization for memory_object template<> struct set_kernel_arg<memory_object> { void operator()(kernel &kernel_, size_t index, const memory_object &mem) { kernel_.set_arg(index, mem.get()); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_MEMORY_OBJECT_HPP wait_list.hpp 0000644 00000001055 15125510617 0007260 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // deprecated, use <boost/compute/utility/wait_list.hpp> instead #include <boost/compute/utility/wait_list.hpp> image.hpp 0000644 00000001576 15125510617 0006353 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_IMAGE_HPP #define BOOST_COMPUTE_IMAGE_HPP /// \file /// /// Meta-header to include all Boost.Compute image headers. #include <boost/compute/image/image1d.hpp> #include <boost/compute/image/image2d.hpp> #include <boost/compute/image/image3d.hpp> #include <boost/compute/image/image_format.hpp> #include <boost/compute/image/image_object.hpp> #include <boost/compute/image/image_sampler.hpp> #endif // BOOST_COMPUTE_IMAGE_HPP container.hpp 0000644 00000001775 15125510617 0007254 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_HPP #define BOOST_COMPUTE_CONTAINER_HPP /// \file /// /// Meta-header to include all Boost.Compute container headers. #include <boost/compute/container/array.hpp> #include <boost/compute/container/basic_string.hpp> #include <boost/compute/container/dynamic_bitset.hpp> #include <boost/compute/container/flat_map.hpp> #include <boost/compute/container/flat_set.hpp> #include <boost/compute/container/mapped_view.hpp> #include <boost/compute/container/string.hpp> #include <boost/compute/container/vector.hpp> #endif // BOOST_COMPUTE_CONTAINER_HPP types.hpp 0000644 00000001546 15125510617 0006432 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPES_HPP #define BOOST_COMPUTE_TYPES_HPP /// \file /// /// Meta-header to include all Boost.Compute types headers. #include <boost/compute/types/complex.hpp> #include <boost/compute/types/fundamental.hpp> #include <boost/compute/types/pair.hpp> #include <boost/compute/types/struct.hpp> #include <boost/compute/types/tuple.hpp> #include <boost/compute/types/size_t.hpp> #endif // BOOST_COMPUTE_TYPES_HPP context.hpp 0000644 00000014702 15125510617 0006750 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTEXT_HPP #define BOOST_COMPUTE_CONTEXT_HPP #include <vector> #include <boost/throw_exception.hpp> #include <boost/compute/config.hpp> #include <boost/compute/device.hpp> #include <boost/compute/exception/opencl_error.hpp> #include <boost/compute/detail/assert_cl_success.hpp> namespace boost { namespace compute { /// \class context /// \brief A compute context. /// /// The context class represents a compute context. /// /// A context object manages a set of OpenCL resources including memory /// buffers and program objects. Before allocating memory on the device or /// executing kernels you must set up a context object. /// /// To create a context for the default device on the system: /// \code /// // get the default compute device /// boost::compute::device gpu = boost::compute::system::default_device(); /// /// // create a context for the device /// boost::compute::context context(gpu); /// \endcode /// /// Once a context is created, memory can be allocated using the buffer class /// and kernels can be executed using the command_queue class. /// /// \see device, command_queue class context { public: /// Create a null context object. context() : m_context(0) { } /// Creates a new context for \p device with \p properties. /// /// \see_opencl_ref{clCreateContext} explicit context(const device &device, const cl_context_properties *properties = 0) { BOOST_ASSERT(device.id() != 0); cl_device_id device_id = device.id(); cl_int error = 0; m_context = clCreateContext(properties, 1, &device_id, 0, 0, &error); if(!m_context){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new context for \p devices with \p properties. /// /// \see_opencl_ref{clCreateContext} explicit context(const std::vector<device> &devices, const cl_context_properties *properties = 0) { BOOST_ASSERT(!devices.empty()); cl_int error = 0; m_context = clCreateContext( properties, static_cast<cl_uint>(devices.size()), reinterpret_cast<const cl_device_id *>(&devices[0]), 0, 0, &error ); if(!m_context){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new context object for \p context. If \p retain is /// \c true, the reference count for \p context will be incremented. explicit context(cl_context context, bool retain = true) : m_context(context) { if(m_context && retain){ clRetainContext(m_context); } } /// Creates a new context object as a copy of \p other. context(const context &other) : m_context(other.m_context) { if(m_context){ clRetainContext(m_context); } } /// Copies the context object from \p other to \c *this. context& operator=(const context &other) { if(this != &other){ if(m_context){ clReleaseContext(m_context); } m_context = other.m_context; if(m_context){ clRetainContext(m_context); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new context object from \p other. context(context&& other) BOOST_NOEXCEPT : m_context(other.m_context) { other.m_context = 0; } /// Move-assigns the context from \p other to \c *this. context& operator=(context&& other) BOOST_NOEXCEPT { if(m_context){ clReleaseContext(m_context); } m_context = other.m_context; other.m_context = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the context object. ~context() { if(m_context){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseContext(m_context) ); } } /// Returns the underlying OpenCL context. cl_context& get() const { return const_cast<cl_context &>(m_context); } /// Returns the device for the context. If the context contains multiple /// devices, the first is returned. device get_device() const { std::vector<device> devices = get_devices(); if(devices.empty()) { return device(); } return devices.front(); } /// Returns a vector of devices for the context. std::vector<device> get_devices() const { return get_info<std::vector<device> >(CL_CONTEXT_DEVICES); } /// Returns information about the context. /// /// \see_opencl_ref{clGetContextInfo} template<class T> T get_info(cl_context_info info) const { return detail::get_object_info<T>(clGetContextInfo, m_context, info); } /// \overload template<int Enum> typename detail::get_object_info_type<context, Enum>::type get_info() const; /// Returns \c true if the context is the same as \p other. bool operator==(const context &other) const { return m_context == other.m_context; } /// Returns \c true if the context is different from \p other. bool operator!=(const context &other) const { return m_context != other.m_context; } /// \internal_ operator cl_context() const { return m_context; } private: cl_context m_context; }; /// \internal_ define get_info() specializations for context BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(context, ((cl_uint, CL_CONTEXT_REFERENCE_COUNT)) ((std::vector<cl_device_id>, CL_CONTEXT_DEVICES)) ((std::vector<cl_context_properties>, CL_CONTEXT_PROPERTIES)) ) #ifdef BOOST_COMPUTE_CL_VERSION_1_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(context, ((cl_uint, CL_CONTEXT_NUM_DEVICES)) ) #endif // BOOST_COMPUTE_CL_VERSION_1_1 } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTEXT_HPP image_format.hpp 0000644 00000001057 15125510617 0007715 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // deprecated, use <boost/compute/image/image_format.hpp> instead #include <boost/compute/image/image_format.hpp> version.hpp 0000644 00000001231 15125510617 0006742 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_VERSION_HPP #define BOOST_COMPUTE_VERSION_HPP #define BOOST_COMPUTE_VERSION_MAJOR 0 #define BOOST_COMPUTE_VERSION_MINOR 5 #define BOOST_COMPUTE_VERSION_PATCH 0 #endif // BOOST_COMPUTE_VERSION_HPP exception.hpp 0000644 00000001532 15125510617 0007257 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXCEPTION_HPP #define BOOST_COMPUTE_EXCEPTION_HPP /// \file /// /// Meta-header to include all Boost.Compute exception headers. #include <boost/compute/exception/context_error.hpp> #include <boost/compute/exception/no_device_found.hpp> #include <boost/compute/exception/opencl_error.hpp> #include <boost/compute/exception/unsupported_extension_error.hpp> #endif // BOOST_COMPUTE_EXCEPTION_HPP program.hpp 0000644 00000061376 15125510617 0006744 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_PROGRAM_HPP #define BOOST_COMPUTE_PROGRAM_HPP #include <string> #include <vector> #include <fstream> #include <streambuf> #ifdef BOOST_COMPUTE_DEBUG_KERNEL_COMPILATION #include <iostream> #endif #include <boost/compute/config.hpp> #include <boost/compute/context.hpp> #include <boost/compute/exception.hpp> #include <boost/compute/exception/program_build_failure.hpp> #include <boost/compute/detail/assert_cl_success.hpp> #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE #include <sstream> #include <boost/optional.hpp> #include <boost/compute/platform.hpp> #include <boost/compute/detail/getenv.hpp> #include <boost/compute/detail/path.hpp> #include <boost/compute/detail/sha1.hpp> #endif namespace boost { namespace compute { class kernel; /// \class program /// \brief A compute program. /// /// The program class represents an OpenCL program. /// /// Program objects are created with one of the static \c create_with_* /// functions. For example, to create a program from a source string: /// /// \snippet test/test_program.cpp create_with_source /// /// And to create a program from a source file: /// \code /// boost::compute::program bar_program = /// boost::compute::program::create_with_source_file("/path/to/bar.cl", context); /// \endcode /// /// Once a program object has been successfully created, it can be compiled /// using the \c build() method: /// \code /// // build the program /// foo_program.build(); /// \endcode /// /// Once the program is built, \ref kernel objects can be created using the /// \c create_kernel() method by passing their name: /// \code /// // create a kernel from the compiled program /// boost::compute::kernel foo_kernel = foo_program.create_kernel("foo"); /// \endcode /// /// \see kernel class program { public: /// Creates a null program object. program() : m_program(0) { } /// Creates a program object for \p program. If \p retain is \c true, /// the reference count for \p program will be incremented. explicit program(cl_program program, bool retain = true) : m_program(program) { if(m_program && retain){ clRetainProgram(m_program); } } /// Creates a new program object as a copy of \p other. program(const program &other) : m_program(other.m_program) { if(m_program){ clRetainProgram(m_program); } } /// Copies the program object from \p other to \c *this. program& operator=(const program &other) { if(this != &other){ if(m_program){ clReleaseProgram(m_program); } m_program = other.m_program; if(m_program){ clRetainProgram(m_program); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new program object from \p other. program(program&& other) BOOST_NOEXCEPT : m_program(other.m_program) { other.m_program = 0; } /// Move-assigns the program from \p other to \c *this. program& operator=(program&& other) BOOST_NOEXCEPT { if(m_program){ clReleaseProgram(m_program); } m_program = other.m_program; other.m_program = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the program object. ~program() { if(m_program){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseProgram(m_program) ); } } /// Returns the underlying OpenCL program. cl_program& get() const { return const_cast<cl_program &>(m_program); } /// Returns the source code for the program. std::string source() const { return get_info<std::string>(CL_PROGRAM_SOURCE); } /// Returns the binary for the program. std::vector<unsigned char> binary() const { size_t binary_size = get_info<size_t>(CL_PROGRAM_BINARY_SIZES); std::vector<unsigned char> binary(binary_size); unsigned char *binary_ptr = &binary[0]; cl_int error = clGetProgramInfo(m_program, CL_PROGRAM_BINARIES, sizeof(unsigned char **), &binary_ptr, 0); if(error != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(error)); } return binary; } #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Returns the SPIR-V binary for the program. std::vector<unsigned char> il_binary() const { return get_info<std::vector<unsigned char> >(CL_PROGRAM_IL); } #endif // BOOST_COMPUTE_CL_VERSION_2_1 std::vector<device> get_devices() const { std::vector<cl_device_id> device_ids = get_info<std::vector<cl_device_id> >(CL_PROGRAM_DEVICES); std::vector<device> devices; for(size_t i = 0; i < device_ids.size(); i++){ devices.push_back(device(device_ids[i])); } return devices; } /// Returns the context for the program. context get_context() const { return context(get_info<cl_context>(CL_PROGRAM_CONTEXT)); } /// Returns information about the program. /// /// \see_opencl_ref{clGetProgramInfo} template<class T> T get_info(cl_program_info info) const { return detail::get_object_info<T>(clGetProgramInfo, m_program, info); } /// \overload template<int Enum> typename detail::get_object_info_type<program, Enum>::type get_info() const; /// Returns build information about the program. /// /// For example, this function can be used to retreive the options used /// to build the program: /// \code /// std::string build_options = /// program.get_build_info<std::string>(CL_PROGRAM_BUILD_OPTIONS); /// \endcode /// /// \see_opencl_ref{clGetProgramInfo} template<class T> T get_build_info(cl_program_build_info info, const device &device) const { return detail::get_object_info<T>(clGetProgramBuildInfo, m_program, info, device.id()); } /// Builds the program with \p options. /// /// If the program fails to compile, this function will throw an /// opencl_error exception. /// \code /// try { /// // attempt to compile to program /// program.build(); /// } /// catch(boost::compute::opencl_error &e){ /// // program failed to compile, print out the build log /// std::cout << program.build_log() << std::endl; /// } /// \endcode /// /// \see_opencl_ref{clBuildProgram} void build(const std::string &options = std::string()) { const char *options_string = 0; if(!options.empty()){ options_string = options.c_str(); } cl_int ret = clBuildProgram(m_program, 0, 0, options_string, 0, 0); #ifdef BOOST_COMPUTE_DEBUG_KERNEL_COMPILATION if(ret != CL_SUCCESS){ // print the error, source code and build log std::cerr << "Boost.Compute: " << "kernel compilation failed (" << ret << ")\n" << "--- source ---\n" << source() << "\n--- build log ---\n" << build_log() << std::endl; } #endif if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(program_build_failure(ret, build_log())); } } #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Compiles the program with \p options. /// /// \opencl_version_warning{1,2} /// /// \see_opencl_ref{clCompileProgram} void compile(const std::string &options = std::string(), const std::vector<std::pair<std::string, program> > &headers = std::vector<std::pair<std::string, program> >()) { const char *options_string = 0; if(!options.empty()){ options_string = options.c_str(); } cl_int ret; if (headers.empty()) { ret = clCompileProgram( m_program, 0, 0, options_string, 0, 0, 0, 0, 0 ); } else { std::vector<const char*> header_names(headers.size()); std::vector<cl_program> header_programs(headers.size()); for (size_t i = 0; i < headers.size(); ++i) { header_names[i] = headers[i].first.c_str(); header_programs[i] = headers[i].second.m_program; } ret = clCompileProgram( m_program, 0, 0, options_string, static_cast<cl_uint>(headers.size()), header_programs.data(), header_names.data(), 0, 0 ); } if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } /// Links the programs in \p programs with \p options in \p context. /// /// \opencl_version_warning{1,2} /// /// \see_opencl_ref{clLinkProgram} static program link(const std::vector<program> &programs, const context &context, const std::string &options = std::string()) { const char *options_string = 0; if(!options.empty()){ options_string = options.c_str(); } cl_int ret; cl_program program_ = clLinkProgram( context.get(), 0, 0, options_string, static_cast<uint_>(programs.size()), reinterpret_cast<const cl_program*>(&programs[0]), 0, 0, &ret ); if(!program_){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return program(program_, false); } #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Returns the build log. std::string build_log() const { return get_build_info<std::string>(CL_PROGRAM_BUILD_LOG, get_devices().front()); } /// Creates and returns a new kernel object for \p name. /// /// For example, to create the \c "foo" kernel (after the program has been /// created and built): /// \code /// boost::compute::kernel foo_kernel = foo_program.create_kernel("foo"); /// \endcode kernel create_kernel(const std::string &name) const; /// Returns \c true if the program is the same at \p other. bool operator==(const program &other) const { return m_program == other.m_program; } /// Returns \c true if the program is different from \p other. bool operator!=(const program &other) const { return m_program != other.m_program; } /// \internal_ operator cl_program() const { return m_program; } /// Creates a new program with \p source in \p context. /// /// \see_opencl_ref{clCreateProgramWithSource} static program create_with_source(const std::string &source, const context &context) { const char *source_string = source.c_str(); cl_int error = 0; cl_program program_ = clCreateProgramWithSource(context, uint_(1), &source_string, 0, &error); if(!program_){ BOOST_THROW_EXCEPTION(opencl_error(error)); } return program(program_, false); } /// Creates a new program with \p sources in \p context. /// /// \see_opencl_ref{clCreateProgramWithSource} static program create_with_source(const std::vector<std::string> &sources, const context &context) { std::vector<const char*> source_strings(sources.size()); for(size_t i = 0; i < sources.size(); i++){ source_strings[i] = sources[i].c_str(); } cl_int error = 0; cl_program program_ = clCreateProgramWithSource(context, uint_(sources.size()), &source_strings[0], 0, &error); if(!program_){ BOOST_THROW_EXCEPTION(opencl_error(error)); } return program(program_, false); } /// Creates a new program with \p file in \p context. /// /// \see_opencl_ref{clCreateProgramWithSource} static program create_with_source_file(const std::string &file, const context &context) { // create program return create_with_source(read_source_file(file), context); } /// Creates a new program with \p files in \p context. /// /// \see_opencl_ref{clCreateProgramWithSource} static program create_with_source_file(const std::vector<std::string> &files, const context &context) { std::vector<std::string> sources(files.size()); for(size_t i = 0; i < files.size(); ++i) { // open file stream std::ifstream stream(files[i].c_str()); if(stream.fail()){ BOOST_THROW_EXCEPTION(std::ios_base::failure("failed to create stream.")); } // read source sources[i] = std::string( (std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>() ); } // create program return create_with_source(sources, context); } /// Creates a new program with \p binary of \p binary_size in /// \p context. /// /// \see_opencl_ref{clCreateProgramWithBinary} static program create_with_binary(const unsigned char *binary, size_t binary_size, const context &context) { const cl_device_id device = context.get_device().id(); cl_int error = 0; cl_int binary_status = 0; cl_program program_ = clCreateProgramWithBinary(context, uint_(1), &device, &binary_size, &binary, &binary_status, &error); if(!program_){ BOOST_THROW_EXCEPTION(opencl_error(error)); } if(binary_status != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(binary_status)); } return program(program_, false); } /// Creates a new program with \p binary in \p context. /// /// \see_opencl_ref{clCreateProgramWithBinary} static program create_with_binary(const std::vector<unsigned char> &binary, const context &context) { return create_with_binary(&binary[0], binary.size(), context); } /// Creates a new program with \p file in \p context. /// /// \see_opencl_ref{clCreateProgramWithBinary} static program create_with_binary_file(const std::string &file, const context &context) { // open file stream std::ifstream stream(file.c_str(), std::ios::in | std::ios::binary); // read binary std::vector<unsigned char> binary( (std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>() ); // create program return create_with_binary(&binary[0], binary.size(), context); } #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Creates a new program with the built-in kernels listed in /// \p kernel_names for \p devices in \p context. /// /// \opencl_version_warning{1,2} /// /// \see_opencl_ref{clCreateProgramWithBuiltInKernels} static program create_with_builtin_kernels(const context &context, const std::vector<device> &devices, const std::string &kernel_names) { cl_int error = 0; cl_program program_ = clCreateProgramWithBuiltInKernels( context.get(), static_cast<uint_>(devices.size()), reinterpret_cast<const cl_device_id *>(&devices[0]), kernel_names.c_str(), &error ); if(!program_){ BOOST_THROW_EXCEPTION(opencl_error(error)); } return program(program_, false); } #endif // BOOST_COMPUTE_CL_VERSION_1_2 #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Creates a new program with \p il_binary (SPIR-V binary) /// of \p il_size size in \p context. /// /// \opencl_version_warning{2,1} /// /// \see_opencl21_ref{clCreateProgramWithIL} static program create_with_il(const void * il_binary, const size_t il_size, const context &context) { cl_int error = 0; cl_program program_ = clCreateProgramWithIL( context.get(), il_binary, il_size, &error ); if(!program_){ BOOST_THROW_EXCEPTION(opencl_error(error)); } return program(program_, false); } /// Creates a new program with \p il_binary (SPIR-V binary) /// in \p context. /// /// \opencl_version_warning{2,1} /// /// \see_opencl_ref{clCreateProgramWithIL} static program create_with_il(const std::vector<unsigned char> &il_binary, const context &context) { return create_with_il(&il_binary[0], il_binary.size(), context); } /// Creates a new program in \p context using SPIR-V /// binary \p file. /// /// \opencl_version_warning{2,1} /// /// \see_opencl_ref{clCreateProgramWithIL} static program create_with_il_file(const std::string &file, const context &context) { // open file stream std::ifstream stream(file.c_str(), std::ios::in | std::ios::binary); // read binary std::vector<unsigned char> il( (std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>() ); // create program return create_with_il(&il[0], il.size(), context); } #endif // BOOST_COMPUTE_CL_VERSION_2_1 /// Create a new program with \p source in \p context and builds it with \p options. /** * In case BOOST_COMPUTE_USE_OFFLINE_CACHE macro is defined, * the compiled binary is stored for reuse in the offline cache located in * $HOME/.boost_compute on UNIX-like systems and in %APPDATA%/boost_compute * on Windows. */ static program build_with_source( const std::string &source, const context &context, const std::string &options = std::string() ) { #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE // Get hash string for the kernel. device d = context.get_device(); platform p = d.platform(); detail::sha1 hash; hash.process( p.name() ) .process( p.version() ) .process( d.name() ) .process( options ) .process( source ) ; std::string hash_string = hash; // Try to get cached program binaries: try { boost::optional<program> prog = load_program_binary(hash_string, context); if (prog) { prog->build(options); return *prog; } } catch (...) { // Something bad happened. Fallback to normal compilation. } // Cache is apparently not available. Just compile the sources. #endif const char *source_string = source.c_str(); cl_int error = 0; cl_program program_ = clCreateProgramWithSource(context, uint_(1), &source_string, 0, &error); if(!program_){ BOOST_THROW_EXCEPTION(opencl_error(error)); } program prog(program_, false); prog.build(options); #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE // Save program binaries for future reuse. save_program_binary(hash_string, prog); #endif return prog; } /// Create a new program with \p file in \p context and builds it with \p options. /** * In case BOOST_COMPUTE_USE_OFFLINE_CACHE macro is defined, * the compiled binary is stored for reuse in the offline cache located in * $HOME/.boost_compute on UNIX-like systems and in %APPDATA%/boost_compute * on Windows. */ static program build_with_source_file( const std::string &file, const context &context, const std::string &options = std::string() ) { return build_with_source(read_source_file(file), context, options); } private: #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE // Saves program binaries for future reuse. static void save_program_binary(const std::string &hash, const program &prog) { std::string fname = detail::program_binary_path(hash, true) + "kernel"; std::ofstream bfile(fname.c_str(), std::ios::binary); if (!bfile) return; std::vector<unsigned char> binary = prog.binary(); size_t binary_size = binary.size(); bfile.write((char*)&binary_size, sizeof(size_t)); bfile.write((char*)binary.data(), binary_size); } // Tries to read program binaries from file cache. static boost::optional<program> load_program_binary( const std::string &hash, const context &ctx ) { std::string fname = detail::program_binary_path(hash) + "kernel"; std::ifstream bfile(fname.c_str(), std::ios::binary); if (!bfile) return boost::optional<program>(); size_t binary_size; std::vector<unsigned char> binary; bfile.read((char*)&binary_size, sizeof(size_t)); binary.resize(binary_size); bfile.read((char*)binary.data(), binary_size); return boost::optional<program>( program::create_with_binary( binary.data(), binary_size, ctx ) ); } #endif // BOOST_COMPUTE_USE_OFFLINE_CACHE static std::string read_source_file(const std::string &file) { // open file stream std::ifstream stream(file.c_str()); if(stream.fail()){ BOOST_THROW_EXCEPTION(std::ios_base::failure("failed to create stream.")); } // read source return std::string( (std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>() ); } private: cl_program m_program; }; /// \internal_ define get_info() specializations for program BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(program, ((cl_uint, CL_PROGRAM_REFERENCE_COUNT)) ((cl_context, CL_PROGRAM_CONTEXT)) ((cl_uint, CL_PROGRAM_NUM_DEVICES)) ((std::vector<cl_device_id>, CL_PROGRAM_DEVICES)) ((std::string, CL_PROGRAM_SOURCE)) ((std::vector<size_t>, CL_PROGRAM_BINARY_SIZES)) ((std::vector<unsigned char *>, CL_PROGRAM_BINARIES)) ) #ifdef BOOST_COMPUTE_CL_VERSION_1_2 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(program, ((size_t, CL_PROGRAM_NUM_KERNELS)) ((std::string, CL_PROGRAM_KERNEL_NAMES)) ) #endif // BOOST_COMPUTE_CL_VERSION_1_2 #ifdef BOOST_COMPUTE_CL_VERSION_2_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(program, ((std::vector<unsigned char>, CL_PROGRAM_IL)) ) #endif // BOOST_COMPUTE_CL_VERSION_2_1 } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_PROGRAM_HPP source.hpp 0000644 00000001047 15125510617 0006562 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // deprecated, use <boost/compute/utility/source.hpp> instead #include <boost/compute/utility/source.hpp> algorithm.hpp 0000644 00000010457 15125510617 0007255 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_HPP #define BOOST_COMPUTE_ALGORITHM_HPP /// \file /// /// Meta-header to include all Boost.Compute algorithm headers. #include <boost/compute/algorithm/accumulate.hpp> #include <boost/compute/algorithm/adjacent_difference.hpp> #include <boost/compute/algorithm/adjacent_find.hpp> #include <boost/compute/algorithm/all_of.hpp> #include <boost/compute/algorithm/any_of.hpp> #include <boost/compute/algorithm/binary_search.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/algorithm/copy_if.hpp> #include <boost/compute/algorithm/copy_n.hpp> #include <boost/compute/algorithm/count.hpp> #include <boost/compute/algorithm/count_if.hpp> #include <boost/compute/algorithm/equal.hpp> #include <boost/compute/algorithm/equal_range.hpp> #include <boost/compute/algorithm/exclusive_scan.hpp> #include <boost/compute/algorithm/fill.hpp> #include <boost/compute/algorithm/fill_n.hpp> #include <boost/compute/algorithm/find.hpp> #include <boost/compute/algorithm/find_end.hpp> #include <boost/compute/algorithm/find_if.hpp> #include <boost/compute/algorithm/find_if_not.hpp> #include <boost/compute/algorithm/for_each.hpp> #include <boost/compute/algorithm/for_each_n.hpp> #include <boost/compute/algorithm/gather.hpp> #include <boost/compute/algorithm/generate.hpp> #include <boost/compute/algorithm/generate_n.hpp> #include <boost/compute/algorithm/inclusive_scan.hpp> #include <boost/compute/algorithm/includes.hpp> #include <boost/compute/algorithm/inner_product.hpp> #include <boost/compute/algorithm/iota.hpp> #include <boost/compute/algorithm/is_partitioned.hpp> #include <boost/compute/algorithm/is_permutation.hpp> #include <boost/compute/algorithm/is_sorted.hpp> #include <boost/compute/algorithm/lower_bound.hpp> #include <boost/compute/algorithm/lexicographical_compare.hpp> #include <boost/compute/algorithm/max_element.hpp> #include <boost/compute/algorithm/merge.hpp> #include <boost/compute/algorithm/min_element.hpp> #include <boost/compute/algorithm/minmax_element.hpp> #include <boost/compute/algorithm/mismatch.hpp> #include <boost/compute/algorithm/next_permutation.hpp> #include <boost/compute/algorithm/none_of.hpp> #include <boost/compute/algorithm/partial_sum.hpp> #include <boost/compute/algorithm/partition.hpp> #include <boost/compute/algorithm/partition_copy.hpp> #include <boost/compute/algorithm/partition_point.hpp> #include <boost/compute/algorithm/prev_permutation.hpp> #include <boost/compute/algorithm/random_shuffle.hpp> #include <boost/compute/algorithm/reduce.hpp> #include <boost/compute/algorithm/reduce_by_key.hpp> #include <boost/compute/algorithm/remove.hpp> #include <boost/compute/algorithm/remove_if.hpp> #include <boost/compute/algorithm/replace.hpp> #include <boost/compute/algorithm/replace_copy.hpp> #include <boost/compute/algorithm/reverse.hpp> #include <boost/compute/algorithm/reverse_copy.hpp> #include <boost/compute/algorithm/rotate.hpp> #include <boost/compute/algorithm/rotate_copy.hpp> #include <boost/compute/algorithm/scatter.hpp> #include <boost/compute/algorithm/search.hpp> #include <boost/compute/algorithm/search_n.hpp> #include <boost/compute/algorithm/set_difference.hpp> #include <boost/compute/algorithm/set_intersection.hpp> #include <boost/compute/algorithm/set_symmetric_difference.hpp> #include <boost/compute/algorithm/set_union.hpp> #include <boost/compute/algorithm/sort.hpp> #include <boost/compute/algorithm/sort_by_key.hpp> #include <boost/compute/algorithm/stable_partition.hpp> #include <boost/compute/algorithm/stable_sort.hpp> #include <boost/compute/algorithm/stable_sort_by_key.hpp> #include <boost/compute/algorithm/swap_ranges.hpp> #include <boost/compute/algorithm/transform.hpp> #include <boost/compute/algorithm/transform_reduce.hpp> #include <boost/compute/algorithm/unique.hpp> #include <boost/compute/algorithm/unique_copy.hpp> #include <boost/compute/algorithm/upper_bound.hpp> #endif // BOOST_COMPUTE_ALGORITHM_HPP command_queue.hpp 0000644 00000172044 15125510617 0010112 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_COMMAND_QUEUE_HPP #define BOOST_COMPUTE_COMMAND_QUEUE_HPP #include <cstddef> #include <algorithm> #include <boost/assert.hpp> #include <boost/compute/config.hpp> #include <boost/compute/event.hpp> #include <boost/compute/buffer.hpp> #include <boost/compute/device.hpp> #include <boost/compute/kernel.hpp> #include <boost/compute/context.hpp> #include <boost/compute/exception.hpp> #include <boost/compute/image/image1d.hpp> #include <boost/compute/image/image2d.hpp> #include <boost/compute/image/image3d.hpp> #include <boost/compute/image/image_object.hpp> #include <boost/compute/utility/wait_list.hpp> #include <boost/compute/detail/get_object_info.hpp> #include <boost/compute/detail/assert_cl_success.hpp> #include <boost/compute/detail/diagnostic.hpp> #include <boost/compute/utility/extents.hpp> namespace boost { namespace compute { namespace detail { inline void BOOST_COMPUTE_CL_CALLBACK nullary_native_kernel_trampoline(void *user_func_ptr) { void (*user_func)(); std::memcpy(&user_func, user_func_ptr, sizeof(user_func)); user_func(); } } // end detail namespace /// \class command_queue /// \brief A command queue. /// /// Command queues provide the interface for interacting with compute /// devices. The command_queue class provides methods to copy data to /// and from a compute device as well as execute compute kernels. /// /// Command queues are created for a compute device within a compute /// context. /// /// For example, to create a context and command queue for the default device /// on the system (this is the normal set up code used by almost all OpenCL /// programs): /// \code /// #include <boost/compute/core.hpp> /// /// // get the default compute device /// boost::compute::device device = boost::compute::system::default_device(); /// /// // set up a compute context and command queue /// boost::compute::context context(device); /// boost::compute::command_queue queue(context, device); /// \endcode /// /// The default command queue for the system can be obtained with the /// system::default_queue() method. /// /// \see buffer, context, kernel class command_queue { public: enum properties { enable_profiling = CL_QUEUE_PROFILING_ENABLE, enable_out_of_order_execution = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE #ifdef BOOST_COMPUTE_CL_VERSION_2_0 , on_device = CL_QUEUE_ON_DEVICE, on_device_default = CL_QUEUE_ON_DEVICE_DEFAULT #endif }; enum map_flags { map_read = CL_MAP_READ, map_write = CL_MAP_WRITE #ifdef BOOST_COMPUTE_CL_VERSION_1_2 , map_write_invalidate_region = CL_MAP_WRITE_INVALIDATE_REGION #endif }; #ifdef BOOST_COMPUTE_CL_VERSION_1_2 enum mem_migration_flags { migrate_to_host = CL_MIGRATE_MEM_OBJECT_HOST, migrate_content_undefined = CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED }; #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Creates a null command queue. command_queue() : m_queue(0) { } explicit command_queue(cl_command_queue queue, bool retain = true) : m_queue(queue) { if(m_queue && retain){ clRetainCommandQueue(m_queue); } } /// Creates a command queue in \p context for \p device with /// \p properties. /// /// \see_opencl_ref{clCreateCommandQueue} command_queue(const context &context, const device &device, cl_command_queue_properties properties = 0) { BOOST_ASSERT(device.id() != 0); cl_int error = 0; #ifdef BOOST_COMPUTE_CL_VERSION_2_0 if (device.check_version(2, 0)){ std::vector<cl_queue_properties> queue_properties; if(properties){ queue_properties.push_back(CL_QUEUE_PROPERTIES); queue_properties.push_back(cl_queue_properties(properties)); queue_properties.push_back(cl_queue_properties(0)); } const cl_queue_properties *queue_properties_ptr = queue_properties.empty() ? 0 : &queue_properties[0]; m_queue = clCreateCommandQueueWithProperties( context, device.id(), queue_properties_ptr, &error ); } else #endif { // Suppress deprecated declarations warning BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS(); m_queue = clCreateCommandQueue( context, device.id(), properties, &error ); BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS(); } if(!m_queue){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new command queue object as a copy of \p other. command_queue(const command_queue &other) : m_queue(other.m_queue) { if(m_queue){ clRetainCommandQueue(m_queue); } } /// Copies the command queue object from \p other to \c *this. command_queue& operator=(const command_queue &other) { if(this != &other){ if(m_queue){ clReleaseCommandQueue(m_queue); } m_queue = other.m_queue; if(m_queue){ clRetainCommandQueue(m_queue); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new command queue object from \p other. command_queue(command_queue&& other) BOOST_NOEXCEPT : m_queue(other.m_queue) { other.m_queue = 0; } /// Move-assigns the command queue from \p other to \c *this. command_queue& operator=(command_queue&& other) BOOST_NOEXCEPT { if(m_queue){ clReleaseCommandQueue(m_queue); } m_queue = other.m_queue; other.m_queue = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the command queue. /// /// \see_opencl_ref{clReleaseCommandQueue} ~command_queue() { if(m_queue){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseCommandQueue(m_queue) ); } } /// Returns the underlying OpenCL command queue. cl_command_queue& get() const { return const_cast<cl_command_queue &>(m_queue); } /// Returns the device that the command queue issues commands to. device get_device() const { return device(get_info<cl_device_id>(CL_QUEUE_DEVICE)); } /// Returns the context for the command queue. context get_context() const { return context(get_info<cl_context>(CL_QUEUE_CONTEXT)); } /// Returns information about the command queue. /// /// \see_opencl_ref{clGetCommandQueueInfo} template<class T> T get_info(cl_command_queue_info info) const { return detail::get_object_info<T>(clGetCommandQueueInfo, m_queue, info); } /// \overload template<int Enum> typename detail::get_object_info_type<command_queue, Enum>::type get_info() const; /// Returns the properties for the command queue. cl_command_queue_properties get_properties() const { return get_info<cl_command_queue_properties>(CL_QUEUE_PROPERTIES); } #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Returns the current default device command queue for the underlying device. /// /// \opencl_version_warning{2,1} command_queue get_default_device_queue() const { return command_queue(get_info<cl_command_queue>(CL_QUEUE_DEVICE_DEFAULT)); } /// Replaces the default device command queue for the underlying device /// with this command queue. Command queue must have been created /// with CL_QUEUE_ON_DEVICE flag. /// /// \see_opencl21_ref{clSetDefaultDeviceCommandQueue} /// /// \opencl_version_warning{2,1} void set_as_default_device_queue() const { cl_int ret = clSetDefaultDeviceCommandQueue( this->get_context().get(), this->get_device().get(), m_queue ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } #endif // BOOST_COMPUTE_CL_VERSION_2_1 /// Enqueues a command to read data from \p buffer to host memory. /// /// \see_opencl_ref{clEnqueueReadBuffer} /// /// \see copy() event enqueue_read_buffer(const buffer &buffer, size_t offset, size_t size, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueReadBuffer( m_queue, buffer.get(), CL_TRUE, offset, size, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to read data from \p buffer to host memory. The /// copy will be performed asynchronously. /// /// \see_opencl_ref{clEnqueueReadBuffer} /// /// \see copy_async() event enqueue_read_buffer_async(const buffer &buffer, size_t offset, size_t size, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueReadBuffer( m_queue, buffer.get(), CL_FALSE, offset, size, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to read a rectangular region from \p buffer to /// host memory. /// /// \see_opencl_ref{clEnqueueReadBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_read_buffer_rect(const buffer &buffer, const size_t buffer_origin[3], const size_t host_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueReadBufferRect( m_queue, buffer.get(), CL_TRUE, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to read a rectangular region from \p buffer to /// host memory. The copy will be performed asynchronously. /// /// \see_opencl_ref{clEnqueueReadBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_read_buffer_rect_async(const buffer &buffer, const size_t buffer_origin[3], const size_t host_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueReadBufferRect( m_queue, buffer.get(), CL_FALSE, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_1_1 /// Enqueues a command to write data from host memory to \p buffer. /// /// \see_opencl_ref{clEnqueueWriteBuffer} /// /// \see copy() event enqueue_write_buffer(const buffer &buffer, size_t offset, size_t size, const void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueWriteBuffer( m_queue, buffer.get(), CL_TRUE, offset, size, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to write data from host memory to \p buffer. /// The copy is performed asynchronously. /// /// \see_opencl_ref{clEnqueueWriteBuffer} /// /// \see copy_async() event enqueue_write_buffer_async(const buffer &buffer, size_t offset, size_t size, const void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueWriteBuffer( m_queue, buffer.get(), CL_FALSE, offset, size, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to write a rectangular region from host memory /// to \p buffer. /// /// \see_opencl_ref{clEnqueueWriteBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_write_buffer_rect(const buffer &buffer, const size_t buffer_origin[3], const size_t host_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueWriteBufferRect( m_queue, buffer.get(), CL_TRUE, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to write a rectangular region from host memory /// to \p buffer. The copy is performed asynchronously. /// /// \see_opencl_ref{clEnqueueWriteBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_write_buffer_rect_async(const buffer &buffer, const size_t buffer_origin[3], const size_t host_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueWriteBufferRect( m_queue, buffer.get(), CL_FALSE, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_1_1 /// Enqueues a command to copy data from \p src_buffer to /// \p dst_buffer. /// /// \see_opencl_ref{clEnqueueCopyBuffer} /// /// \see copy() event enqueue_copy_buffer(const buffer &src_buffer, const buffer &dst_buffer, size_t src_offset, size_t dst_offset, size_t size, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_offset + size <= src_buffer.size()); BOOST_ASSERT(dst_offset + size <= dst_buffer.size()); BOOST_ASSERT(src_buffer.get_context() == this->get_context()); BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueCopyBuffer( m_queue, src_buffer.get(), dst_buffer.get(), src_offset, dst_offset, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to copy a rectangular region from /// \p src_buffer to \p dst_buffer. /// /// \see_opencl_ref{clEnqueueCopyBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_copy_buffer_rect(const buffer &src_buffer, const buffer &dst_buffer, const size_t src_origin[3], const size_t dst_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_buffer.get_context() == this->get_context()); BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueCopyBufferRect( m_queue, src_buffer.get(), dst_buffer.get(), src_origin, dst_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_1_1 #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to fill \p buffer with \p pattern. /// /// \see_opencl_ref{clEnqueueFillBuffer} /// /// \opencl_version_warning{1,2} /// /// \see fill() event enqueue_fill_buffer(const buffer &buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueFillBuffer( m_queue, buffer.get(), pattern, pattern_size, offset, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Enqueues a command to map \p buffer into the host address space. /// Event associated with map operation is returned through /// \p map_buffer_event parameter. /// /// \see_opencl_ref{clEnqueueMapBuffer} void* enqueue_map_buffer(const buffer &buffer, cl_map_flags flags, size_t offset, size_t size, event &map_buffer_event, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapBuffer( m_queue, buffer.get(), CL_TRUE, flags, offset, size, events.size(), events.get_event_ptr(), &map_buffer_event.get(), &ret ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return pointer; } /// \overload void* enqueue_map_buffer(const buffer &buffer, cl_map_flags flags, size_t offset, size_t size, const wait_list &events = wait_list()) { event event_; return enqueue_map_buffer(buffer, flags, offset, size, event_, events); } /// Enqueues a command to map \p buffer into the host address space. /// Map operation is performed asynchronously. The pointer to the mapped /// region cannot be used until the map operation has completed. /// /// Event associated with map operation is returned through /// \p map_buffer_event parameter. /// /// \see_opencl_ref{clEnqueueMapBuffer} void* enqueue_map_buffer_async(const buffer &buffer, cl_map_flags flags, size_t offset, size_t size, event &map_buffer_event, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapBuffer( m_queue, buffer.get(), CL_FALSE, flags, offset, size, events.size(), events.get_event_ptr(), &map_buffer_event.get(), &ret ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return pointer; } /// Enqueues a command to unmap \p buffer from the host memory space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} event enqueue_unmap_buffer(const buffer &buffer, void *mapped_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(buffer.get_context() == this->get_context()); return enqueue_unmap_mem_object(buffer.get(), mapped_ptr, events); } /// Enqueues a command to unmap \p mem from the host memory space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} event enqueue_unmap_mem_object(cl_mem mem, void *mapped_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueUnmapMemObject( m_queue, mem, mapped_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to read data from \p image to host memory. /// /// \see_opencl_ref{clEnqueueReadImage} event enqueue_read_image(const image_object& image, const size_t *origin, const size_t *region, size_t row_pitch, size_t slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueReadImage( m_queue, image.get(), CL_TRUE, origin, region, row_pitch, slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template<size_t N> event enqueue_read_image(const image_object& image, const extents<N> origin, const extents<N> region, void *host_ptr, size_t row_pitch = 0, size_t slice_pitch = 0, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_read_image( image, origin3, region3, row_pitch, slice_pitch, host_ptr, events ); } /// Enqueues a command to write data from host memory to \p image. /// /// \see_opencl_ref{clEnqueueWriteImage} event enqueue_write_image(image_object& image, const size_t *origin, const size_t *region, const void *host_ptr, size_t input_row_pitch = 0, size_t input_slice_pitch = 0, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueWriteImage( m_queue, image.get(), CL_TRUE, origin, region, input_row_pitch, input_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template<size_t N> event enqueue_write_image(image_object& image, const extents<N> origin, const extents<N> region, const void *host_ptr, const size_t input_row_pitch = 0, const size_t input_slice_pitch = 0, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_write_image( image, origin3, region3, host_ptr, input_row_pitch, input_slice_pitch, events ); } /// Enqueues a command to map \p image into the host address space. /// /// Event associated with map operation is returned through /// \p map_image_event parameter. /// /// \see_opencl_ref{clEnqueueMapImage} void* enqueue_map_image(const image_object &image, cl_map_flags flags, const size_t *origin, const size_t *region, size_t &output_row_pitch, size_t &output_slice_pitch, event &map_image_event, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(image.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapImage( m_queue, image.get(), CL_TRUE, flags, origin, region, &output_row_pitch, &output_slice_pitch, events.size(), events.get_event_ptr(), &map_image_event.get(), &ret ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return pointer; } /// \overload void* enqueue_map_image(const image_object &image, cl_map_flags flags, const size_t *origin, const size_t *region, size_t &output_row_pitch, size_t &output_slice_pitch, const wait_list &events = wait_list()) { event event_; return enqueue_map_image( image, flags, origin, region, output_row_pitch, output_slice_pitch, event_, events ); } /// \overload template<size_t N> void* enqueue_map_image(image_object& image, cl_map_flags flags, const extents<N> origin, const extents<N> region, size_t &output_row_pitch, size_t &output_slice_pitch, event &map_image_event, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_map_image( image, flags, origin3, region3, output_row_pitch, output_slice_pitch, map_image_event, events ); } /// \overload template<size_t N> void* enqueue_map_image(image_object& image, cl_map_flags flags, const extents<N> origin, const extents<N> region, size_t &output_row_pitch, size_t &output_slice_pitch, const wait_list &events = wait_list()) { event event_; return enqueue_map_image( image, flags, origin, region, output_row_pitch, output_slice_pitch, event_, events ); } /// Enqueues a command to map \p image into the host address space. /// Map operation is performed asynchronously. The pointer to the mapped /// region cannot be used until the map operation has completed. /// /// Event associated with map operation is returned through /// \p map_image_event parameter. /// /// \see_opencl_ref{clEnqueueMapImage} void* enqueue_map_image_async(const image_object &image, cl_map_flags flags, const size_t *origin, const size_t *region, size_t &output_row_pitch, size_t &output_slice_pitch, event &map_image_event, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(image.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapImage( m_queue, image.get(), CL_FALSE, flags, origin, region, &output_row_pitch, &output_slice_pitch, events.size(), events.get_event_ptr(), &map_image_event.get(), &ret ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return pointer; } /// \overload template<size_t N> void* enqueue_map_image_async(image_object& image, cl_map_flags flags, const extents<N> origin, const extents<N> region, size_t &output_row_pitch, size_t &output_slice_pitch, event &map_image_event, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_map_image_async( image, flags, origin3, region3, output_row_pitch, output_slice_pitch, map_image_event, events ); } /// Enqueues a command to unmap \p image from the host memory space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} event enqueue_unmap_image(const image_object &image, void *mapped_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); return enqueue_unmap_mem_object(image.get(), mapped_ptr, events); } /// Enqueues a command to copy data from \p src_image to \p dst_image. /// /// \see_opencl_ref{clEnqueueCopyImage} event enqueue_copy_image(const image_object& src_image, image_object& dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueCopyImage( m_queue, src_image.get(), dst_image.get(), src_origin, dst_origin, region, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template<size_t N> event enqueue_copy_image(const image_object& src_image, image_object& dst_image, const extents<N> src_origin, const extents<N> dst_origin, const extents<N> region, const wait_list &events = wait_list()) { BOOST_ASSERT(src_image.get_context() == this->get_context()); BOOST_ASSERT(dst_image.get_context() == this->get_context()); BOOST_ASSERT_MSG(src_image.format() == dst_image.format(), "Source and destination image formats must match."); size_t src_origin3[3] = { 0, 0, 0 }; size_t dst_origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(src_origin.data(), src_origin.data() + N, src_origin3); std::copy(dst_origin.data(), dst_origin.data() + N, dst_origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_copy_image( src_image, dst_image, src_origin3, dst_origin3, region3, events ); } /// Enqueues a command to copy data from \p src_image to \p dst_buffer. /// /// \see_opencl_ref{clEnqueueCopyImageToBuffer} event enqueue_copy_image_to_buffer(const image_object& src_image, memory_object& dst_buffer, const size_t *src_origin, const size_t *region, size_t dst_offset, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueCopyImageToBuffer( m_queue, src_image.get(), dst_buffer.get(), src_origin, region, dst_offset, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to copy data from \p src_buffer to \p dst_image. /// /// \see_opencl_ref{clEnqueueCopyBufferToImage} event enqueue_copy_buffer_to_image(const memory_object& src_buffer, image_object& dst_image, size_t src_offset, const size_t *dst_origin, const size_t *region, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueCopyBufferToImage( m_queue, src_buffer.get(), dst_image.get(), src_offset, dst_origin, region, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to fill \p image with \p fill_color. /// /// \see_opencl_ref{clEnqueueFillImage} /// /// \opencl_version_warning{1,2} event enqueue_fill_image(image_object& image, const void *fill_color, const size_t *origin, const size_t *region, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueFillImage( m_queue, image.get(), fill_color, origin, region, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template<size_t N> event enqueue_fill_image(image_object& image, const void *fill_color, const extents<N> origin, const extents<N> region, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_fill_image( image, fill_color, origin3, region3, events ); } /// Enqueues a command to migrate \p mem_objects. /// /// \see_opencl_ref{clEnqueueMigrateMemObjects} /// /// \opencl_version_warning{1,2} event enqueue_migrate_memory_objects(uint_ num_mem_objects, const cl_mem *mem_objects, cl_mem_migration_flags flags, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueMigrateMemObjects( m_queue, num_mem_objects, mem_objects, flags, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Enqueues a kernel for execution. /// /// \see_opencl_ref{clEnqueueNDRangeKernel} event enqueue_nd_range_kernel(const kernel &kernel, size_t work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(kernel.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueNDRangeKernel( m_queue, kernel, static_cast<cl_uint>(work_dim), global_work_offset, global_work_size, local_work_size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template<size_t N> event enqueue_nd_range_kernel(const kernel &kernel, const extents<N> &global_work_offset, const extents<N> &global_work_size, const extents<N> &local_work_size, const wait_list &events = wait_list()) { return enqueue_nd_range_kernel( kernel, N, global_work_offset.data(), global_work_size.data(), local_work_size.data(), events ); } /// Convenience method which calls enqueue_nd_range_kernel() with a /// one-dimensional range. event enqueue_1d_range_kernel(const kernel &kernel, size_t global_work_offset, size_t global_work_size, size_t local_work_size, const wait_list &events = wait_list()) { return enqueue_nd_range_kernel( kernel, 1, &global_work_offset, &global_work_size, local_work_size ? &local_work_size : 0, events ); } /// Enqueues a kernel to execute using a single work-item. /// /// \see_opencl_ref{clEnqueueTask} event enqueue_task(const kernel &kernel, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(kernel.get_context() == this->get_context()); event event_; // clEnqueueTask() was deprecated in OpenCL 2.0. In that case we // just forward to the equivalent clEnqueueNDRangeKernel() call. #ifdef BOOST_COMPUTE_CL_VERSION_2_0 size_t one = 1; cl_int ret = clEnqueueNDRangeKernel( m_queue, kernel, 1, 0, &one, &one, events.size(), events.get_event_ptr(), &event_.get() ); #else cl_int ret = clEnqueueTask( m_queue, kernel, events.size(), events.get_event_ptr(), &event_.get() ); #endif if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a function to execute on the host. event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void *), void *args, size_t cb_args, uint_ num_mem_objects, const cl_mem *mem_list, const void **args_mem_loc, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueNativeKernel( m_queue, user_func, args, cb_args, num_mem_objects, mem_list, args_mem_loc, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Convenience overload for enqueue_native_kernel() which enqueues a /// native kernel on the host with a nullary function. event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void), const wait_list &events = wait_list()) { return enqueue_native_kernel( detail::nullary_native_kernel_trampoline, reinterpret_cast<void *>(&user_func), sizeof(user_func), 0, 0, 0, events ); } /// Flushes the command queue. /// /// \see_opencl_ref{clFlush} void flush() { BOOST_ASSERT(m_queue != 0); cl_int ret = clFlush(m_queue); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } /// Blocks until all outstanding commands in the queue have finished. /// /// \see_opencl_ref{clFinish} void finish() { BOOST_ASSERT(m_queue != 0); cl_int ret = clFinish(m_queue); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } /// Enqueues a barrier in the queue. void enqueue_barrier() { BOOST_ASSERT(m_queue != 0); cl_int ret = CL_SUCCESS; #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(get_device().check_version(1, 2)){ ret = clEnqueueBarrierWithWaitList(m_queue, 0, 0, 0); } else #endif // BOOST_COMPUTE_CL_VERSION_1_2 { // Suppress deprecated declarations warning BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS(); ret = clEnqueueBarrier(m_queue); BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS(); } if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a barrier in the queue after \p events. /// /// \opencl_version_warning{1,2} event enqueue_barrier(const wait_list &events) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = CL_SUCCESS; ret = clEnqueueBarrierWithWaitList( m_queue, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Enqueues a marker in the queue and returns an event that can be /// used to track its progress. event enqueue_marker() { event event_; cl_int ret = CL_SUCCESS; #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(get_device().check_version(1, 2)){ ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, &event_.get()); } else #endif { // Suppress deprecated declarations warning BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS(); ret = clEnqueueMarker(m_queue, &event_.get()); BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS(); } if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a marker after \p events in the queue and returns an /// event that can be used to track its progress. /// /// \opencl_version_warning{1,2} event enqueue_marker(const wait_list &events) { event event_; cl_int ret = clEnqueueMarkerWithWaitList( m_queue, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_1_2 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to copy \p size bytes of data from \p src_ptr to /// \p dst_ptr. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMemcpy} event enqueue_svm_memcpy(void *dst_ptr, const void *src_ptr, size_t size, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMemcpy( m_queue, CL_TRUE, dst_ptr, src_ptr, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to copy \p size bytes of data from \p src_ptr to /// \p dst_ptr. The operation is performed asynchronously. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMemcpy} event enqueue_svm_memcpy_async(void *dst_ptr, const void *src_ptr, size_t size, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMemcpy( m_queue, CL_FALSE, dst_ptr, src_ptr, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to fill \p size bytes of data at \p svm_ptr with /// \p pattern. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMemFill} event enqueue_svm_fill(void *svm_ptr, const void *pattern, size_t pattern_size, size_t size, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMemFill( m_queue, svm_ptr, pattern, pattern_size, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to free \p svm_ptr. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMFree} /// /// \see svm_free() event enqueue_svm_free(void *svm_ptr, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMFree( m_queue, 1, &svm_ptr, 0, 0, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to map \p svm_ptr to the host memory space. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMap} event enqueue_svm_map(void *svm_ptr, size_t size, cl_map_flags flags, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMap( m_queue, CL_TRUE, flags, svm_ptr, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to unmap \p svm_ptr from the host memory space. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMUnmap} event enqueue_svm_unmap(void *svm_ptr, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMUnmap( m_queue, svm_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_2_0 #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to indicate which device a set of ranges of SVM allocations /// should be associated with. The pair \p svm_ptrs[i] and \p sizes[i] together define /// the starting address and number of bytes in a range to be migrated. /// /// If \p sizes is empty, then that means every allocation containing any \p svm_ptrs[i] /// is to be migrated. Also, if \p sizes[i] is zero, then the entire allocation containing /// \p svm_ptrs[i] is migrated. /// /// \opencl_version_warning{2,1} /// /// \see_opencl21_ref{clEnqueueSVMMigrateMem} event enqueue_svm_migrate_memory(const std::vector<const void*> &svm_ptrs, const std::vector<size_t> &sizes, const cl_mem_migration_flags flags = 0, const wait_list &events = wait_list()) { BOOST_ASSERT(svm_ptrs.size() == sizes.size() || sizes.size() == 0); event event_; cl_int ret = clEnqueueSVMMigrateMem( m_queue, static_cast<cl_uint>(svm_ptrs.size()), const_cast<void const **>(&svm_ptrs[0]), sizes.size() > 0 ? &sizes[0] : NULL, flags, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to indicate which device a range of SVM allocation /// should be associated with. The pair \p svm_ptr and \p size together define /// the starting address and number of bytes in a range to be migrated. /// /// If \p size is 0, then the entire allocation containing \p svm_ptr is /// migrated. The default value for \p size is 0. /// /// \opencl_version_warning{2,1} /// /// \see_opencl21_ref{clEnqueueSVMMigrateMem} event enqueue_svm_migrate_memory(const void* svm_ptr, const size_t size = 0, const cl_mem_migration_flags flags = 0, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMigrateMem( m_queue, cl_uint(1), &svm_ptr, &size, flags, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // BOOST_COMPUTE_CL_VERSION_2_1 /// Returns \c true if the command queue is the same at \p other. bool operator==(const command_queue &other) const { return m_queue == other.m_queue; } /// Returns \c true if the command queue is different from \p other. bool operator!=(const command_queue &other) const { return m_queue != other.m_queue; } /// \internal_ operator cl_command_queue() const { return m_queue; } /// \internal_ bool check_device_version(int major, int minor) const { return get_device().check_version(major, minor); } private: cl_command_queue m_queue; }; inline buffer buffer::clone(command_queue &queue) const { buffer copy(get_context(), size(), get_memory_flags()); queue.enqueue_copy_buffer(*this, copy, 0, 0, size()); return copy; } inline image1d image1d::clone(command_queue &queue) const { image1d copy( get_context(), width(), format(), get_memory_flags() ); queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size()); return copy; } inline image2d image2d::clone(command_queue &queue) const { image2d copy( get_context(), width(), height(), format(), get_memory_flags() ); queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size()); return copy; } inline image3d image3d::clone(command_queue &queue) const { image3d copy( get_context(), width(), height(), depth(), format(), get_memory_flags() ); queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size()); return copy; } /// \internal_ define get_info() specializations for command_queue BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(command_queue, ((cl_context, CL_QUEUE_CONTEXT)) ((cl_device_id, CL_QUEUE_DEVICE)) ((uint_, CL_QUEUE_REFERENCE_COUNT)) ((cl_command_queue_properties, CL_QUEUE_PROPERTIES)) ) #ifdef BOOST_COMPUTE_CL_VERSION_2_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(command_queue, ((cl_command_queue, CL_QUEUE_DEVICE_DEFAULT)) ) #endif // BOOST_COMPUTE_CL_VERSION_2_1 } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_COMMAND_QUEUE_HPP image/image_sampler.hpp 0000644 00000013741 15125510617 0011155 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_IMAGE_IMAGE_SAMPLER_HPP #define BOOST_COMPUTE_IMAGE_IMAGE_SAMPLER_HPP #include <boost/throw_exception.hpp> #include <boost/compute/config.hpp> #include <boost/compute/context.hpp> #include <boost/compute/kernel.hpp> #include <boost/compute/detail/get_object_info.hpp> #include <boost/compute/detail/assert_cl_success.hpp> #include <boost/compute/exception/opencl_error.hpp> #include <boost/compute/type_traits/type_name.hpp> namespace boost { namespace compute { /// \class image_sampler /// \brief An OpenCL image sampler object /// /// \see image2d, image_format class image_sampler { public: enum addressing_mode { none = CL_ADDRESS_NONE, clamp_to_edge = CL_ADDRESS_CLAMP_TO_EDGE, clamp = CL_ADDRESS_CLAMP, repeat = CL_ADDRESS_REPEAT }; enum filter_mode { nearest = CL_FILTER_NEAREST, linear = CL_FILTER_LINEAR }; image_sampler() : m_sampler(0) { } image_sampler(const context &context, bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode) { cl_int error = 0; #ifdef BOOST_COMPUTE_CL_VERSION_2_0 std::vector<cl_sampler_properties> sampler_properties; sampler_properties.push_back(CL_SAMPLER_NORMALIZED_COORDS); sampler_properties.push_back(cl_sampler_properties(normalized_coords)); sampler_properties.push_back(CL_SAMPLER_ADDRESSING_MODE); sampler_properties.push_back(cl_sampler_properties(addressing_mode)); sampler_properties.push_back(CL_SAMPLER_FILTER_MODE); sampler_properties.push_back(cl_sampler_properties(filter_mode)); sampler_properties.push_back(cl_sampler_properties(0)); m_sampler = clCreateSamplerWithProperties( context, &sampler_properties[0], &error ); #else m_sampler = clCreateSampler( context, normalized_coords, addressing_mode, filter_mode, &error ); #endif if(!m_sampler){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } explicit image_sampler(cl_sampler sampler, bool retain = true) : m_sampler(sampler) { if(m_sampler && retain){ clRetainSampler(m_sampler); } } /// Creates a new image sampler object as a copy of \p other. image_sampler(const image_sampler &other) : m_sampler(other.m_sampler) { if(m_sampler){ clRetainSampler(m_sampler); } } /// Copies the image sampler object from \p other to \c *this. image_sampler& operator=(const image_sampler &other) { if(this != &other){ if(m_sampler){ clReleaseSampler(m_sampler); } m_sampler = other.m_sampler; if(m_sampler){ clRetainSampler(m_sampler); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES image_sampler(image_sampler&& other) BOOST_NOEXCEPT : m_sampler(other.m_sampler) { other.m_sampler = 0; } image_sampler& operator=(image_sampler&& other) BOOST_NOEXCEPT { if(m_sampler){ clReleaseSampler(m_sampler); } m_sampler = other.m_sampler; other.m_sampler = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the image sampler object. ~image_sampler() { if(m_sampler){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseSampler(m_sampler) ); } } /// Returns the underlying \c cl_sampler object. cl_sampler& get() const { return const_cast<cl_sampler &>(m_sampler); } /// Returns the context for the image sampler object. context get_context() const { return context(get_info<cl_context>(CL_SAMPLER_CONTEXT)); } /// Returns information about the sampler. /// /// \see_opencl_ref{clGetSamplerInfo} template<class T> T get_info(cl_sampler_info info) const { return detail::get_object_info<T>(clGetSamplerInfo, m_sampler, info); } /// \overload template<int Enum> typename detail::get_object_info_type<image_sampler, Enum>::type get_info() const; /// Returns \c true if the sampler is the same at \p other. bool operator==(const image_sampler &other) const { return m_sampler == other.m_sampler; } /// Returns \c true if the sampler is different from \p other. bool operator!=(const image_sampler &other) const { return m_sampler != other.m_sampler; } operator cl_sampler() const { return m_sampler; } private: cl_sampler m_sampler; }; /// \internal_ define get_info() specializations for image_sampler BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image_sampler, ((cl_uint, CL_SAMPLER_REFERENCE_COUNT)) ((cl_context, CL_SAMPLER_CONTEXT)) ((cl_addressing_mode, CL_SAMPLER_ADDRESSING_MODE)) ((cl_filter_mode, CL_SAMPLER_FILTER_MODE)) ((bool, CL_SAMPLER_NORMALIZED_COORDS)) ) namespace detail { // set_kernel_arg specialization for image samplers template<> struct set_kernel_arg<image_sampler> { void operator()(kernel &kernel_, size_t index, const image_sampler &sampler) { kernel_.set_arg(index, sampler.get()); } }; } // end detail namespace } // end compute namespace } // end boost namespace BOOST_COMPUTE_TYPE_NAME(boost::compute::image_sampler, sampler_t) #endif // BOOST_COMPUTE_IMAGE_IMAGE_SAMPLER_HPP image/image_object.hpp 0000644 00000011040 15125510617 0010746 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_IMAGE_IMAGE_OBJECT_HPP #define BOOST_COMPUTE_IMAGE_IMAGE_OBJECT_HPP #include <algorithm> #include <vector> #include <boost/compute/config.hpp> #include <boost/compute/memory_object.hpp> #include <boost/compute/detail/get_object_info.hpp> #include <boost/compute/image/image_format.hpp> namespace boost { namespace compute { /// \class image_object /// \brief Base-class for image objects. /// /// The image_object class is the base-class for image objects on compute /// devices. /// /// \see image1d, image2d, image3d class image_object : public memory_object { public: image_object() : memory_object() { } explicit image_object(cl_mem mem, bool retain = true) : memory_object(mem, retain) { } image_object(const image_object &other) : memory_object(other) { } image_object& operator=(const image_object &other) { if(this != &other){ memory_object::operator=(other); } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES image_object(image_object&& other) BOOST_NOEXCEPT : memory_object(std::move(other)) { } /// \internal_ image_object& operator=(image_object&& other) BOOST_NOEXCEPT { memory_object::operator=(std::move(other)); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the image object. ~image_object() { } /// Returns information about the image object. /// /// \see_opencl_ref{clGetImageInfo} template<class T> T get_image_info(cl_mem_info info) const { return detail::get_object_info<T>(clGetImageInfo, m_mem, info); } /// Returns the format for the image. image_format format() const { return image_format(get_image_info<cl_image_format>(CL_IMAGE_FORMAT)); } /// \internal_ (deprecated) image_format get_format() const { return format(); } /// Returns the width of the image. size_t width() const { return get_image_info<size_t>(CL_IMAGE_WIDTH); } /// Returns the height of the image. /// /// For 1D images, this function will return \c 1. size_t height() const { return get_image_info<size_t>(CL_IMAGE_HEIGHT); } /// Returns the depth of the image. /// /// For 1D and 2D images, this function will return \c 1. size_t depth() const { return get_image_info<size_t>(CL_IMAGE_DEPTH); } /// Returns the supported image formats for the \p type in \p context. /// /// \see_opencl_ref{clGetSupportedImageFormats} static std::vector<image_format> get_supported_formats(const context &context, cl_mem_object_type type, cl_mem_flags flags = read_write) { cl_uint count = 0; clGetSupportedImageFormats(context, flags, type, 0, 0, &count); std::vector<cl_image_format> cl_formats(count); clGetSupportedImageFormats(context, flags, type, count, &cl_formats[0], 0); std::vector<image_format> formats; formats.reserve(count); for(cl_uint i = 0; i < count; i++){ formats.push_back(image_format(cl_formats[i])); } return formats; } /// Returns \c true if \p format is a supported image format for /// \p type in \p context with \p flags. static bool is_supported_format(const image_format &format, const context &context, cl_mem_object_type type, cl_mem_flags flags = read_write) { const std::vector<image_format> formats = get_supported_formats(context, type, flags); return std::find(formats.begin(), formats.end(), format) != formats.end(); } }; namespace detail { // set_kernel_arg() specialization for image_object template<> struct set_kernel_arg<image_object> : public set_kernel_arg<memory_object> { }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_IMAGE_IMAGE_OBJECT_HPP image/image_format.hpp 0000644 00000007626 15125510617 0011007 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_IMAGE_IMAGE_FORMAT_HPP #define BOOST_COMPUTE_IMAGE_IMAGE_FORMAT_HPP #include <boost/compute/cl.hpp> namespace boost { namespace compute { /// \class image_format /// \brief A OpenCL image format /// /// For example, to create a format for a 8-bit RGBA image: /// \code /// boost::compute::image_format rgba8(CL_RGBA, CL_UNSIGNED_INT8); /// \endcode /// /// After being constructed, image_format objects are usually passed to the /// constructor of the various image classes (e.g. \ref image2d, \ref image3d) /// to create an image object on a compute device. /// /// Image formats supported by a context can be queried with the static /// get_supported_formats() in each image class. For example: /// \code /// std::vector<image_format> formats = image2d::get_supported_formats(ctx); /// \endcode /// /// \see image2d class image_format { public: enum channel_order { r = CL_R, a = CL_A, intensity = CL_INTENSITY, luminance = CL_LUMINANCE, rg = CL_RG, ra = CL_RA, rgb = CL_RGB, rgba = CL_RGBA, argb = CL_ARGB, bgra = CL_BGRA }; enum channel_data_type { snorm_int8 = CL_SNORM_INT8, snorm_int16 = CL_SNORM_INT16, unorm_int8 = CL_UNORM_INT8, unorm_int16 = CL_UNORM_INT16, unorm_short_565 = CL_UNORM_SHORT_565, unorm_short_555 = CL_UNORM_SHORT_555, unorm_int_101010 = CL_UNORM_INT_101010, signed_int8 = CL_SIGNED_INT8, signed_int16 = CL_SIGNED_INT16, signed_int32 = CL_SIGNED_INT32, unsigned_int8 = CL_UNSIGNED_INT8, unsigned_int16 = CL_UNSIGNED_INT16, unsigned_int32 = CL_UNSIGNED_INT32, float16 = CL_HALF_FLOAT, float32 = CL_FLOAT }; /// Creates a new image format object with \p order and \p type. explicit image_format(cl_channel_order order, cl_channel_type type) { m_format.image_channel_order = order; m_format.image_channel_data_type = type; } /// Creates a new image format object from \p format. explicit image_format(const cl_image_format &format) { m_format.image_channel_order = format.image_channel_order; m_format.image_channel_data_type = format.image_channel_data_type; } /// Creates a new image format object as a copy of \p other. image_format(const image_format &other) : m_format(other.m_format) { } /// Copies the format from \p other to \c *this. image_format& operator=(const image_format &other) { if(this != &other){ m_format = other.m_format; } return *this; } /// Destroys the image format object. ~image_format() { } /// Returns a pointer to the \c cl_image_format object. const cl_image_format* get_format_ptr() const { return &m_format; } /// Returns \c true if \c *this is the same as \p other. bool operator==(const image_format &other) const { return m_format.image_channel_order == other.m_format.image_channel_order && m_format.image_channel_data_type == other.m_format.image_channel_data_type; } /// Returns \c true if \c *this is not the same as \p other. bool operator!=(const image_format &other) const { return !(*this == other); } private: cl_image_format m_format; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_IMAGE_IMAGE_FORMAT_HPP image/image1d.hpp 0000644 00000013027 15125510617 0007654 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_IMAGE_IMAGE1D_HPP #define BOOST_COMPUTE_IMAGE_IMAGE1D_HPP #include <boost/throw_exception.hpp> #include <boost/compute/config.hpp> #include <boost/compute/exception/opencl_error.hpp> #include <boost/compute/image/image_format.hpp> #include <boost/compute/image/image_object.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/utility/extents.hpp> namespace boost { namespace compute { // forward declarations class command_queue; /// \class image1d /// \brief An OpenCL 1D image object /// /// \opencl_version_warning{1,2} /// /// \see image_format, image2d class image1d : public image_object { public: /// Creates a null image1d object. image1d() : image_object() { } /// Creates a new image1d object. /// /// \see_opencl_ref{clCreateImage} image1d(const context &context, size_t image_width, const image_format &format, cl_mem_flags flags = read_write, void *host_ptr = 0) { #ifdef BOOST_COMPUTE_CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE1D; desc.image_width = image_width; desc.image_height = 1; desc.image_depth = 1; desc.image_array_size = 0; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; #ifdef BOOST_COMPUTE_CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; #endif cl_int error = 0; m_mem = clCreateImage( context, flags, format.get_format_ptr(), &desc, host_ptr, &error ); if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } #else // image1d objects are only supported in OpenCL 1.2 and later BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED)); #endif } /// Creates a new image1d as a copy of \p other. image1d(const image1d &other) : image_object(other) { } /// Copies the image1d from \p other. image1d& operator=(const image1d &other) { image_object::operator=(other); return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new image object from \p other. image1d(image1d&& other) BOOST_NOEXCEPT : image_object(std::move(other)) { } /// Move-assigns the image from \p other to \c *this. image1d& operator=(image1d&& other) BOOST_NOEXCEPT { image_object::operator=(std::move(other)); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the image1d object. ~image1d() { } /// Returns the size (width) of the image. extents<1> size() const { extents<1> size; size[0] = get_info<size_t>(CL_IMAGE_WIDTH); return size; } /// Returns the origin of the image (\c 0). extents<1> origin() const { return extents<1>(); } /// Returns information about the image. /// /// \see_opencl_ref{clGetImageInfo} template<class T> T get_info(cl_image_info info) const { return get_image_info<T>(info); } /// \overload template<int Enum> typename detail::get_object_info_type<image1d, Enum>::type get_info() const; /// Returns the supported image formats for the context. /// /// \see_opencl_ref{clGetSupportedImageFormats} static std::vector<image_format> get_supported_formats(const context &context, cl_mem_flags flags = read_write) { #ifdef BOOST_COMPUTE_CL_VERSION_1_2 return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE1D, flags); #else return std::vector<image_format>(); #endif } /// Returns \c true if \p format is a supported 1D image format for /// \p context. static bool is_supported_format(const image_format &format, const context &context, cl_mem_flags flags = read_write) { #ifdef BOOST_COMPUTE_CL_VERSION_1_2 return image_object::is_supported_format( format, context, CL_MEM_OBJECT_IMAGE1D, flags ); #else return false; #endif } /// Creates a new image with a copy of the data in \c *this. Uses \p queue /// to perform the copy operation. image1d clone(command_queue &queue) const; }; /// \internal_ define get_info() specializations for image1d BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image1d, ((cl_image_format, CL_IMAGE_FORMAT)) ((size_t, CL_IMAGE_ELEMENT_SIZE)) ((size_t, CL_IMAGE_ROW_PITCH)) ((size_t, CL_IMAGE_SLICE_PITCH)) ((size_t, CL_IMAGE_WIDTH)) ((size_t, CL_IMAGE_HEIGHT)) ((size_t, CL_IMAGE_DEPTH)) ) namespace detail { // set_kernel_arg() specialization for image1d template<> struct set_kernel_arg<image1d> : public set_kernel_arg<image_object> { }; } // end detail namespace } // end compute namespace } // end boost namespace BOOST_COMPUTE_TYPE_NAME(boost::compute::image1d, image1d_t) #endif // BOOST_COMPUTE_IMAGE_IMAGE1D_HPP image/image2d.hpp 0000644 00000017140 15125510617 0007655 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_IMAGE_IMAGE2D_HPP #define BOOST_COMPUTE_IMAGE_IMAGE2D_HPP #include <boost/throw_exception.hpp> #include <boost/compute/config.hpp> #include <boost/compute/context.hpp> #include <boost/compute/exception/opencl_error.hpp> #include <boost/compute/image/image_format.hpp> #include <boost/compute/image/image_object.hpp> #include <boost/compute/detail/get_object_info.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/utility/extents.hpp> namespace boost { namespace compute { // forward declarations class command_queue; /// \class image2d /// \brief An OpenCL 2D image object /// /// For example, to create a 640x480 8-bit RGBA image: /// /// \snippet test/test_image2d.cpp create_image /// /// \see image_format, image3d class image2d : public image_object { public: /// Creates a null image2d object. image2d() : image_object() { } /// Creates a new image2d object. /// /// \see_opencl_ref{clCreateImage} image2d(const context &context, size_t image_width, size_t image_height, const image_format &format, cl_mem_flags flags = read_write, void *host_ptr = 0, size_t image_row_pitch = 0) { cl_int error = 0; #ifdef BOOST_COMPUTE_CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = image_width; desc.image_height = image_height; desc.image_depth = 1; desc.image_array_size = 0; desc.image_row_pitch = image_row_pitch; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; #ifdef BOOST_COMPUTE_CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; #endif m_mem = clCreateImage(context, flags, format.get_format_ptr(), &desc, host_ptr, &error); #else m_mem = clCreateImage2D(context, flags, format.get_format_ptr(), image_width, image_height, image_row_pitch, host_ptr, &error); #endif if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// \internal_ (deprecated) image2d(const context &context, cl_mem_flags flags, const image_format &format, size_t image_width, size_t image_height, size_t image_row_pitch = 0, void *host_ptr = 0) { cl_int error = 0; #ifdef BOOST_COMPUTE_CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = image_width; desc.image_height = image_height; desc.image_depth = 1; desc.image_array_size = 0; desc.image_row_pitch = image_row_pitch; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; #ifdef BOOST_COMPUTE_CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; #endif m_mem = clCreateImage(context, flags, format.get_format_ptr(), &desc, host_ptr, &error); #else m_mem = clCreateImage2D(context, flags, format.get_format_ptr(), image_width, image_height, image_row_pitch, host_ptr, &error); #endif if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new image2d as a copy of \p other. image2d(const image2d &other) : image_object(other) { } /// Copies the image2d from \p other. image2d& operator=(const image2d &other) { image_object::operator=(other); return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new image object from \p other. image2d(image2d&& other) BOOST_NOEXCEPT : image_object(std::move(other)) { } /// Move-assigns the image from \p other to \c *this. image2d& operator=(image2d&& other) BOOST_NOEXCEPT { image_object::operator=(std::move(other)); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the image2d object. ~image2d() { } /// Returns the size (width, height) of the image. extents<2> size() const { extents<2> size; size[0] = get_info<size_t>(CL_IMAGE_WIDTH); size[1] = get_info<size_t>(CL_IMAGE_HEIGHT); return size; } /// Returns the origin of the image (\c 0, \c 0). extents<2> origin() const { return extents<2>(); } /// Returns information about the image. /// /// \see_opencl_ref{clGetImageInfo} template<class T> T get_info(cl_image_info info) const { return detail::get_object_info<T>(clGetImageInfo, m_mem, info); } /// \overload template<int Enum> typename detail::get_object_info_type<image2d, Enum>::type get_info() const; /// Returns the supported image formats for the context. /// /// \see_opencl_ref{clGetSupportedImageFormats} static std::vector<image_format> get_supported_formats(const context &context, cl_mem_flags flags = read_write) { return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE2D, flags); } /// Returns \c true if \p format is a supported 2D image format for /// \p context. static bool is_supported_format(const image_format &format, const context &context, cl_mem_flags flags = read_write) { return image_object::is_supported_format( format, context, CL_MEM_OBJECT_IMAGE2D, flags ); } /// Creates a new image with a copy of the data in \c *this. Uses \p queue /// to perform the copy operation. image2d clone(command_queue &queue) const; }; /// \internal_ define get_info() specializations for image2d BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image2d, ((cl_image_format, CL_IMAGE_FORMAT)) ((size_t, CL_IMAGE_ELEMENT_SIZE)) ((size_t, CL_IMAGE_ROW_PITCH)) ((size_t, CL_IMAGE_SLICE_PITCH)) ((size_t, CL_IMAGE_WIDTH)) ((size_t, CL_IMAGE_HEIGHT)) ((size_t, CL_IMAGE_DEPTH)) ) namespace detail { // set_kernel_arg() specialization for image2d template<> struct set_kernel_arg<image2d> : public set_kernel_arg<image_object> { }; } // end detail namespace } // end compute namespace } // end boost namespace BOOST_COMPUTE_TYPE_NAME(boost::compute::image2d, image2d_t) #endif // BOOST_COMPUTE_IMAGE_IMAGE2D_HPP image/image3d.hpp 0000644 00000017562 15125510617 0007666 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_IMAGE_IMAGE3D_HPP #define BOOST_COMPUTE_IMAGE_IMAGE3D_HPP #include <boost/throw_exception.hpp> #include <boost/compute/detail/get_object_info.hpp> #include <boost/compute/exception/opencl_error.hpp> #include <boost/compute/image/image_format.hpp> #include <boost/compute/image/image_object.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/utility/extents.hpp> namespace boost { namespace compute { // forward declarations class command_queue; /// \class image3d /// \brief An OpenCL 3D image object /// /// \see image_format, image2d class image3d : public image_object { public: /// Creates a null image3d object. image3d() : image_object() { } /// Creates a new image3d object. /// /// \see_opencl_ref{clCreateImage} image3d(const context &context, size_t image_width, size_t image_height, size_t image_depth, const image_format &format, cl_mem_flags flags = read_write, void *host_ptr = 0, size_t image_row_pitch = 0, size_t image_slice_pitch = 0) { cl_int error = 0; #ifdef BOOST_COMPUTE_CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_width = image_width; desc.image_height = image_height; desc.image_depth = image_depth; desc.image_array_size = 0; desc.image_row_pitch = image_row_pitch; desc.image_slice_pitch = image_slice_pitch; desc.num_mip_levels = 0; desc.num_samples = 0; #ifdef BOOST_COMPUTE_CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; #endif m_mem = clCreateImage(context, flags, format.get_format_ptr(), &desc, host_ptr, &error); #else m_mem = clCreateImage3D(context, flags, format.get_format_ptr(), image_width, image_height, image_depth, image_row_pitch, image_slice_pitch, host_ptr, &error); #endif if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// \internal_ (deprecated) image3d(const context &context, cl_mem_flags flags, const image_format &format, size_t image_width, size_t image_height, size_t image_depth, size_t image_row_pitch, size_t image_slice_pitch = 0, void *host_ptr = 0) { cl_int error = 0; #ifdef BOOST_COMPUTE_CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_width = image_width; desc.image_height = image_height; desc.image_depth = image_depth; desc.image_array_size = 0; desc.image_row_pitch = image_row_pitch; desc.image_slice_pitch = image_slice_pitch; desc.num_mip_levels = 0; desc.num_samples = 0; #ifdef BOOST_COMPUTE_CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; #endif m_mem = clCreateImage(context, flags, format.get_format_ptr(), &desc, host_ptr, &error); #else m_mem = clCreateImage3D(context, flags, format.get_format_ptr(), image_width, image_height, image_depth, image_row_pitch, image_slice_pitch, host_ptr, &error); #endif if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new image3d as a copy of \p other. image3d(const image3d &other) : image_object(other) { } /// Copies the image3d from \p other. image3d& operator=(const image3d &other) { image_object::operator=(other); return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new image object from \p other. image3d(image3d&& other) BOOST_NOEXCEPT : image_object(std::move(other)) { } /// Move-assigns the image from \p other to \c *this. image3d& operator=(image3d&& other) BOOST_NOEXCEPT { image_object::operator=(std::move(other)); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the image3d object. ~image3d() { } /// Returns the size (width, height, depth) of the image. extents<3> size() const { extents<3> size; size[0] = get_info<size_t>(CL_IMAGE_WIDTH); size[1] = get_info<size_t>(CL_IMAGE_HEIGHT); size[2] = get_info<size_t>(CL_IMAGE_DEPTH); return size; } /// Returns the origin of the image (\c 0, \c 0, \c 0). extents<3> origin() const { return extents<3>(); } /// Returns information about the image. /// /// \see_opencl_ref{clGetImageInfo} template<class T> T get_info(cl_image_info info) const { return detail::get_object_info<T>(clGetImageInfo, m_mem, info); } /// \overload template<int Enum> typename detail::get_object_info_type<image3d, Enum>::type get_info() const; /// Returns the supported 3D image formats for the context. /// /// \see_opencl_ref{clGetSupportedImageFormats} static std::vector<image_format> get_supported_formats(const context &context, cl_mem_flags flags = read_write) { return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE3D, flags); } /// Returns \c true if \p format is a supported 3D image format for /// \p context. static bool is_supported_format(const image_format &format, const context &context, cl_mem_flags flags = read_write) { return image_object::is_supported_format( format, context, CL_MEM_OBJECT_IMAGE3D, flags ); } /// Creates a new image with a copy of the data in \c *this. Uses \p queue /// to perform the copy operation. image3d clone(command_queue &queue) const; }; /// \internal_ define get_info() specializations for image3d BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image3d, ((cl_image_format, CL_IMAGE_FORMAT)) ((size_t, CL_IMAGE_ELEMENT_SIZE)) ((size_t, CL_IMAGE_ROW_PITCH)) ((size_t, CL_IMAGE_SLICE_PITCH)) ((size_t, CL_IMAGE_WIDTH)) ((size_t, CL_IMAGE_HEIGHT)) ((size_t, CL_IMAGE_DEPTH)) ) namespace detail { // set_kernel_arg() specialization for image3d template<> struct set_kernel_arg<image3d> : public set_kernel_arg<image_object> { }; } // end detail namespace } // end compute namespace } // end boost namespace BOOST_COMPUTE_TYPE_NAME(boost::compute::image3d, image3d_t) #endif // BOOST_COMPUTE_IMAGE_IMAGE3D_HPP iterator.hpp 0000644 00000002164 15125510617 0007114 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_HPP /// \file /// /// Meta-header to include all Boost.Compute iterator headers. #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/iterator/constant_iterator.hpp> #include <boost/compute/iterator/constant_buffer_iterator.hpp> #include <boost/compute/iterator/counting_iterator.hpp> #include <boost/compute/iterator/discard_iterator.hpp> #include <boost/compute/iterator/function_input_iterator.hpp> #include <boost/compute/iterator/permutation_iterator.hpp> #include <boost/compute/iterator/transform_iterator.hpp> #include <boost/compute/iterator/zip_iterator.hpp> #endif // BOOST_COMPUTE_ITERATOR_HPP detail/variadic_macros.hpp 0000644 00000003454 15125510617 0011656 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_VARIDAIC_MACROS_HPP #define BOOST_COMPUTE_DETAIL_VARIDAIC_MACROS_HPP #include <boost/preprocessor/cat.hpp> #include <boost/preprocessor/config/config.hpp> #include <boost/preprocessor/tuple/to_seq.hpp> #if BOOST_PP_VARIADICS == 1 # include <boost/preprocessor/variadic/size.hpp> #endif #ifdef BOOST_PP_VARIADIC_SIZE # define BOOST_COMPUTE_PP_VARIADIC_SIZE BOOST_PP_VARIADIC_SIZE #else # define BOOST_COMPUTE_PP_VARIADIC_SIZE(...) BOOST_COMPUTE_PP_VARIADIC_SIZE_I(__VA_ARGS__, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,) # define BOOST_COMPUTE_PP_VARIADIC_SIZE_I(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35, e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52, e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63, size, ...) size #endif #define BOOST_COMPUTE_PP_TUPLE_SIZE(tuple) \ BOOST_COMPUTE_PP_VARIADIC_SIZE tuple #define BOOST_COMPUTE_PP_TUPLE_TO_SEQ(tuple) \ BOOST_PP_TUPLE_TO_SEQ(BOOST_COMPUTE_PP_TUPLE_SIZE(tuple), tuple) #endif // BOOST_COMPUTE_DETAIL_VARIDAIC_MACROS_HPP detail/iterator_plus_distance.hpp 0000644 00000003176 15125510617 0013277 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_ITERATOR_PLUS_DISTANCE_HPP #define BOOST_COMPUTE_DETAIL_ITERATOR_PLUS_DISTANCE_HPP #include <iterator> namespace boost { namespace compute { namespace detail { template<class Iterator, class Distance, class Tag> inline Iterator iterator_plus_distance(Iterator i, Distance n, Tag) { while(n--){ i++; } return i; } template<class Iterator, class Distance> inline Iterator iterator_plus_distance(Iterator i, Distance n, std::random_access_iterator_tag) { typedef typename std::iterator_traits<Iterator>::difference_type difference_type; return i + static_cast<difference_type>(n); } // similar to std::advance() except returns the advanced iterator and // also works with iterators that don't define difference_type template<class Iterator, class Distance> inline Iterator iterator_plus_distance(Iterator i, Distance n) { typedef typename std::iterator_traits<Iterator>::iterator_category tag; return iterator_plus_distance(i, n, tag()); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_ITERATOR_PLUS_DISTANCE_HPP detail/iterator_range_size.hpp 0000644 00000002606 15125510617 0012565 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_ITERATOR_RANGE_SIZE_H #define BOOST_COMPUTE_DETAIL_ITERATOR_RANGE_SIZE_H #include <cstddef> #include <algorithm> #include <iterator> namespace boost { namespace compute { namespace detail { // This is a convenience function which returns the size of a range // bounded by two iterators. This function has two differences from // the std::distance() function: 1) the return type (size_t) is // unsigned, and 2) the return value is always positive. template<class Iterator> inline size_t iterator_range_size(Iterator first, Iterator last) { typedef typename std::iterator_traits<Iterator>::difference_type difference_type; difference_type difference = std::distance(first, last); return static_cast<size_t>( (std::max)(difference, static_cast<difference_type>(0)) ); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_ITERATOR_RANGE_SIZE_H detail/diagnostic.hpp 0000644 00000012114 15125510617 0010645 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2016 Jakub Szuppe <j.szuppe@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_DIAGNOSTIC_HPP #define BOOST_COMPUTE_DETAIL_DIAGNOSTIC_HPP // Macros for suppressing warnings for GCC version 4.6 or later. Usage: // // BOOST_COMPUTE_BOOST_COMPUTE_GCC_DIAG_OFF(sign-compare); // if(a < b){ // BOOST_COMPUTE_BOOST_COMPUTE_GCC_DIAG_ON(sign-compare); // // Source: https://svn.boost.org/trac/boost/wiki/Guidelines/WarningsGuidelines #if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402 #define BOOST_COMPUTE_GCC_DIAG_STR(s) #s #define BOOST_COMPUTE_GCC_DIAG_JOINSTR(x,y) BOOST_COMPUTE_GCC_DIAG_STR(x ## y) # define BOOST_COMPUTE_GCC_DIAG_DO_PRAGMA(x) _Pragma (#x) # define BOOST_COMPUTE_GCC_DIAG_PRAGMA(x) BOOST_COMPUTE_GCC_DIAG_DO_PRAGMA(GCC diagnostic x) # if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406 # define BOOST_COMPUTE_GCC_DIAG_OFF(x) BOOST_COMPUTE_GCC_DIAG_PRAGMA(push) \ BOOST_COMPUTE_GCC_DIAG_PRAGMA(ignored BOOST_COMPUTE_GCC_DIAG_JOINSTR(-W,x)) # define BOOST_COMPUTE_GCC_DIAG_ON(x) BOOST_COMPUTE_GCC_DIAG_PRAGMA(pop) # else # define BOOST_COMPUTE_GCC_DIAG_OFF(x) \ BOOST_COMPUTE_GCC_DIAG_PRAGMA(ignored BOOST_COMPUTE_GCC_DIAG_JOINSTR(-W,x)) # define BOOST_COMPUTE_GCC_DIAG_ON(x) \ BOOST_COMPUTE_GCC_DIAG_PRAGMA(warning BOOST_COMPUTE_GCC_DIAG_JOINSTR(-W,x)) # endif #else // Ensure these macros do nothing for other compilers. # define BOOST_COMPUTE_GCC_DIAG_OFF(x) # define BOOST_COMPUTE_GCC_DIAG_ON(x) #endif // Macros for suppressing warnings for Clang. // // BOOST_COMPUTE_BOOST_COMPUTE_CLANG_DIAG_OFF(sign-compare); // if(a < b){ // BOOST_COMPUTE_BOOST_COMPUTE_CLANG_DIAG_ON(sign-compare); // // Source: https://svn.boost.org/trac/boost/wiki/Guidelines/WarningsGuidelines #ifdef __clang__ # define BOOST_COMPUTE_CLANG_DIAG_STR(s) # s // stringize s to "no-sign-compare" # define BOOST_COMPUTE_CLANG_DIAG_JOINSTR(x,y) BOOST_COMPUTE_CLANG_DIAG_STR(x ## y) // join -W with no-unused-variable to "-Wno-sign-compare" # define BOOST_COMPUTE_CLANG_DIAG_DO_PRAGMA(x) _Pragma (#x) // _Pragma is unary operator #pragma ("") # define BOOST_COMPUTE_CLANG_DIAG_PRAGMA(x) \ BOOST_COMPUTE_CLANG_DIAG_DO_PRAGMA(clang diagnostic x) # define BOOST_COMPUTE_CLANG_DIAG_OFF(x) BOOST_COMPUTE_CLANG_DIAG_PRAGMA(push) \ BOOST_COMPUTE_CLANG_DIAG_PRAGMA(ignored BOOST_COMPUTE_CLANG_DIAG_JOINSTR(-W,x)) // For example: #pragma clang diagnostic ignored "-Wno-sign-compare" # define BOOST_COMPUTE_CLANG_DIAG_ON(x) BOOST_COMPUTE_CLANG_DIAG_PRAGMA(pop) // For example: #pragma clang diagnostic warning "-Wno-sign-compare" #else // Ensure these macros do nothing for other compilers. # define BOOST_COMPUTE_CLANG_DIAG_OFF(x) # define BOOST_COMPUTE_CLANG_DIAG_ON(x) # define BOOST_COMPUTE_CLANG_DIAG_PRAGMA(x) #endif // Macros for suppressing warnings for MSVC. Usage: // // BOOST_COMPUTE_BOOST_COMPUTE_MSVC_DIAG_OFF(4018); //sign-compare // if(a < b){ // BOOST_COMPUTE_BOOST_COMPUTE_MSVC_DIAG_ON(4018); // #if defined(_MSC_VER) # define BOOST_COMPUTE_MSVC_DIAG_DO_PRAGMA(x) __pragma(x) # define BOOST_COMPUTE_MSVC_DIAG_PRAGMA(x) \ BOOST_COMPUTE_MSVC_DIAG_DO_PRAGMA(warning(x)) # define BOOST_COMPUTE_MSVC_DIAG_OFF(x) BOOST_COMPUTE_MSVC_DIAG_PRAGMA(push) \ BOOST_COMPUTE_MSVC_DIAG_PRAGMA(disable: x) # define BOOST_COMPUTE_MSVC_DIAG_ON(x) BOOST_COMPUTE_MSVC_DIAG_PRAGMA(pop) #else // Ensure these macros do nothing for other compilers. # define BOOST_COMPUTE_MSVC_DIAG_OFF(x) # define BOOST_COMPUTE_MSVC_DIAG_ON(x) #endif // Macros for suppressing warnings for GCC, Clang and MSVC. Usage: // // BOOST_COMPUTE_DIAG_OFF(sign-compare, sign-compare, 4018); // if(a < b){ // BOOST_COMPUTE_DIAG_ON(sign-compare, sign-compare, 4018); // #if defined(_MSC_VER) // MSVC # define BOOST_COMPUTE_DIAG_OFF(gcc, clang, msvc) BOOST_COMPUTE_MSVC_DIAG_OFF(msvc) # define BOOST_COMPUTE_DIAG_ON(gcc, clang, msvc) BOOST_COMPUTE_MSVC_DIAG_ON(msvc) #elif defined(__clang__) // Clang # define BOOST_COMPUTE_DIAG_OFF(gcc, clang, msvc) BOOST_COMPUTE_CLANG_DIAG_OFF(clang) # define BOOST_COMPUTE_DIAG_ON(gcc, clang, msvc) BOOST_COMPUTE_CLANG_DIAG_ON(clang) #elif defined(__GNUC__) // GCC/G++ # define BOOST_COMPUTE_DIAG_OFF(gcc, clang, msvc) BOOST_COMPUTE_GCC_DIAG_OFF(gcc) # define BOOST_COMPUTE_DIAG_ON(gcc, clang, msvc) BOOST_COMPUTE_GCC_DIAG_ON(gcc) #else // Ensure these macros do nothing for other compilers. # define BOOST_COMPUTE_DIAG_OFF(gcc, clang, msvc) # define BOOST_COMPUTE_DIAG_ON(gcc, clang, msvc) #endif #define BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS() \ BOOST_COMPUTE_DIAG_OFF(deprecated-declarations, deprecated-declarations, 4996) #define BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS() \ BOOST_COMPUTE_DIAG_ON(deprecated-declarations, deprecated-declarations, 4996); #endif /* BOOST_COMPUTE_DETAIL_DIAGNOSTIC_HPP */ detail/iterator_traits.hpp 0000644 00000002206 15125510617 0011741 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_ITERATOR_TRAITS_HPP #define BOOST_COMPUTE_DETAIL_ITERATOR_TRAITS_HPP #include <iterator> #include <boost/compute/detail/is_contiguous_iterator.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class Iterator> struct iterator_traits : public std::iterator_traits<Iterator> { static const bool is_contiguous = is_contiguous_iterator<Iterator>::value; static const bool is_on_device = is_device_iterator<Iterator>::value; static const bool is_on_host = !is_on_device; }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_TRAITS_HPP detail/path.hpp 0000644 00000004064 15125510617 0007462 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_PATH_HPP #define BOOST_COMPUTE_DETAIL_PATH_HPP #include <boost/filesystem/path.hpp> #include <boost/filesystem/operations.hpp> #include <boost/compute/detail/getenv.hpp> namespace boost { namespace compute { namespace detail { // Path delimiter symbol for the current OS. static const std::string& path_delim() { static const std::string delim = boost::filesystem::path("/").make_preferred().string(); return delim; } // Path to appdata folder. inline const std::string& appdata_path() { #ifdef _WIN32 static const std::string appdata = detail::getenv("APPDATA") + path_delim() + "boost_compute"; #else static const std::string appdata = detail::getenv("HOME") + path_delim() + ".boost_compute"; #endif return appdata; } // Path to cached binaries. inline std::string program_binary_path(const std::string &hash, bool create = false) { std::string dir = detail::appdata_path() + path_delim() + hash.substr(0, 2) + path_delim() + hash.substr(2); if(create && !boost::filesystem::exists(dir)){ boost::filesystem::create_directories(dir); } return dir + path_delim(); } // Path to parameter caches. inline std::string parameter_cache_path(bool create = false) { const static std::string dir = appdata_path() + path_delim() + "tune"; if(create && !boost::filesystem::exists(dir)){ boost::filesystem::create_directories(dir); } return dir + path_delim(); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_PATH_HPP detail/vendor.hpp 0000644 00000002723 15125510617 0010023 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_VENDOR_HPP #define BOOST_COMPUTE_DETAIL_VENDOR_HPP #include <boost/compute/device.hpp> #include <boost/compute/platform.hpp> namespace boost { namespace compute { namespace detail { // returns true if the device is an nvidia gpu inline bool is_nvidia_device(const device &device) { std::string nvidia("NVIDIA"); return device.vendor().compare(0, nvidia.size(), nvidia) == 0; } // returns true if the device is an amd cpu or gpu inline bool is_amd_device(const device &device) { return device.platform().vendor() == "Advanced Micro Devices, Inc."; } // returns true if the platform is Apple OpenCL platform inline bool is_apple_platform(const platform &platform) { return platform.name() == "Apple"; } // returns true if the device is from Apple OpenCL Platform inline bool is_apple_platform_device(const device &device) { return is_apple_platform(device.platform()); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_VENDOR_HPP detail/lru_cache.hpp 0000644 00000006613 15125510617 0010455 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP #define BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP #include <map> #include <list> #include <utility> #include <boost/optional.hpp> namespace boost { namespace compute { namespace detail { // a cache which evicts the least recently used item when it is full template<class Key, class Value> class lru_cache { public: typedef Key key_type; typedef Value value_type; typedef std::list<key_type> list_type; typedef std::map< key_type, std::pair<value_type, typename list_type::iterator> > map_type; lru_cache(size_t capacity) : m_capacity(capacity) { } ~lru_cache() { } size_t size() const { return m_map.size(); } size_t capacity() const { return m_capacity; } bool empty() const { return m_map.empty(); } bool contains(const key_type &key) { return m_map.find(key) != m_map.end(); } void insert(const key_type &key, const value_type &value) { typename map_type::iterator i = m_map.find(key); if(i == m_map.end()){ // insert item into the cache, but first check if it is full if(size() >= m_capacity){ // cache is full, evict the least recently used item evict(); } // insert the new item m_list.push_front(key); m_map[key] = std::make_pair(value, m_list.begin()); } } boost::optional<value_type> get(const key_type &key) { // lookup value in the cache typename map_type::iterator i = m_map.find(key); if(i == m_map.end()){ // value not in cache return boost::none; } // return the value, but first update its place in the most // recently used list typename list_type::iterator j = i->second.second; if(j != m_list.begin()){ // move item to the front of the most recently used list m_list.erase(j); m_list.push_front(key); // update iterator in map j = m_list.begin(); const value_type &value = i->second.first; m_map[key] = std::make_pair(value, j); // return the value return value; } else { // the item is already at the front of the most recently // used list so just return it return i->second.first; } } void clear() { m_map.clear(); m_list.clear(); } private: void evict() { // evict item from the end of most recently used list typename list_type::iterator i = --m_list.end(); m_map.erase(*i); m_list.erase(i); } private: map_type m_map; list_type m_list; size_t m_capacity; }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP detail/duration.hpp 0000644 00000003151 15125510617 0010347 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_DURATION_HPP #define BOOST_COMPUTE_DETAIL_DURATION_HPP #include <boost/config.hpp> #ifndef BOOST_COMPUTE_NO_HDR_CHRONO #include <chrono> #endif #ifndef BOOST_COMPUTE_NO_BOOST_CHRONO #include <boost/chrono/duration.hpp> #endif namespace boost { namespace compute { namespace detail { #ifndef BOOST_COMPUTE_NO_HDR_CHRONO template<class Rep, class Period> inline std::chrono::duration<Rep, Period> make_duration_from_nanoseconds(std::chrono::duration<Rep, Period>, size_t nanoseconds) { return std::chrono::duration_cast<std::chrono::duration<Rep, Period> >( std::chrono::nanoseconds(nanoseconds) ); } #endif // BOOST_COMPUTE_NO_HDR_CHRONO #ifndef BOOST_COMPUTE_NO_BOOST_CHRONO template<class Rep, class Period> inline boost::chrono::duration<Rep, Period> make_duration_from_nanoseconds(boost::chrono::duration<Rep, Period>, size_t nanoseconds) { return boost::chrono::duration_cast<boost::chrono::duration<Rep, Period> >( boost::chrono::nanoseconds(nanoseconds) ); } #endif // BOOST_COMPUTE_NO_BOOST_CHRONO } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_DURATION_HPP detail/get_object_info.hpp 0000644 00000020174 15125510617 0011646 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_GET_OBJECT_INFO_HPP #define BOOST_COMPUTE_DETAIL_GET_OBJECT_INFO_HPP #include <string> #include <vector> #include <boost/preprocessor/seq/for_each.hpp> #include <boost/preprocessor/tuple/elem.hpp> #include <boost/throw_exception.hpp> #include <boost/compute/cl.hpp> #include <boost/compute/exception/opencl_error.hpp> namespace boost { namespace compute { namespace detail { template<class Function, class Object, class AuxInfo> struct bound_info_function { bound_info_function(Function function, Object object, AuxInfo aux_info) : m_function(function), m_object(object), m_aux_info(aux_info) { } template<class Info> cl_int operator()(Info info, size_t input_size, const void *input, size_t size, void *value, size_t *size_ret) const { return m_function( m_object, m_aux_info, info, input_size, input, size, value, size_ret ); } template<class Info> cl_int operator()(Info info, size_t size, void *value, size_t *size_ret) const { return m_function(m_object, m_aux_info, info, size, value, size_ret); } Function m_function; Object m_object; AuxInfo m_aux_info; }; template<class Function, class Object> struct bound_info_function<Function, Object, void> { bound_info_function(Function function, Object object) : m_function(function), m_object(object) { } template<class Info> cl_int operator()(Info info, size_t size, void *value, size_t *size_ret) const { return m_function(m_object, info, size, value, size_ret); } Function m_function; Object m_object; }; template<class Function, class Object> inline bound_info_function<Function, Object, void> bind_info_function(Function f, Object o) { return bound_info_function<Function, Object, void>(f, o); } template<class Function, class Object, class AuxInfo> inline bound_info_function<Function, Object, AuxInfo> bind_info_function(Function f, Object o, AuxInfo j) { return bound_info_function<Function, Object, AuxInfo>(f, o, j); } // default implementation template<class T> struct get_object_info_impl { template<class Function, class Info> T operator()(Function function, Info info) const { T value; cl_int ret = function(info, sizeof(T), &value, 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return value; } template<class Function, class Info> T operator()(Function function, Info info, const size_t input_size, const void* input) const { T value; cl_int ret = function(info, input_size, input, sizeof(T), &value, 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return value; } }; // specialization for bool template<> struct get_object_info_impl<bool> { template<class Function, class Info> bool operator()(Function function, Info info) const { cl_bool value; cl_int ret = function(info, sizeof(cl_bool), &value, 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return value == CL_TRUE; } }; // specialization for std::string template<> struct get_object_info_impl<std::string> { template<class Function, class Info> std::string operator()(Function function, Info info) const { size_t size = 0; cl_int ret = function(info, 0, 0, &size); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } if(size == 0){ return std::string(); } std::string value(size - 1, 0); ret = function(info, size, &value[0], 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return value; } }; // specialization for std::vector<T> template<class T> struct get_object_info_impl<std::vector<T> > { template<class Function, class Info> std::vector<T> operator()(Function function, Info info) const { size_t size = 0; cl_int ret = function(info, 0, 0, &size); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } if(size == 0) return std::vector<T>(); std::vector<T> vector(size / sizeof(T)); ret = function(info, size, &vector[0], 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return vector; } template<class Function, class Info> std::vector<T> operator()(Function function, Info info, const size_t input_size, const void* input) const { #ifdef BOOST_COMPUTE_CL_VERSION_2_1 // For CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT in clGetKernelSubGroupInfo // we can't get param_value_size using param_value_size_ret if(info == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) { std::vector<T> vector(3); cl_int ret = function( info, input_size, input, sizeof(T) * vector.size(), &vector[0], 0 ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return vector; } #endif size_t size = 0; cl_int ret = function(info, input_size, input, 0, 0, &size); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } std::vector<T> vector(size / sizeof(T)); ret = function(info, input_size, input, size, &vector[0], 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return vector; } }; // returns the value (of type T) from the given clGet*Info() function call. template<class T, class Function, class Object, class Info> inline T get_object_info(Function f, Object o, Info i) { return get_object_info_impl<T>()(bind_info_function(f, o), i); } template<class T, class Function, class Object, class Info, class AuxInfo> inline T get_object_info(Function f, Object o, Info i, AuxInfo j) { return get_object_info_impl<T>()(bind_info_function(f, o, j), i); } template<class T, class Function, class Object, class Info, class AuxInfo> inline T get_object_info(Function f, Object o, Info i, AuxInfo j, const size_t k, const void * l) { return get_object_info_impl<T>()(bind_info_function(f, o, j), i, k, l); } // returns the value type for the clGet*Info() call on Object with Enum. template<class Object, int Enum> struct get_object_info_type; // defines the object::get_info<Enum>() specialization #define BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATION(object_type, result_type, value) \ namespace detail { \ template<> struct get_object_info_type<object_type, value> { typedef result_type type; }; \ } \ template<> inline result_type object_type::get_info<value>() const \ { \ return get_info<result_type>(value); \ } // used by BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS() #define BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_IMPL(r, data, elem) \ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATION( \ data, BOOST_PP_TUPLE_ELEM(2, 0, elem), BOOST_PP_TUPLE_ELEM(2, 1, elem) \ ) // defines the object::get_info<Enum>() specialization for each // (result_type, value) tuple in seq for object_type. #define BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(object_type, seq) \ BOOST_PP_SEQ_FOR_EACH( \ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_IMPL, object_type, seq \ ) } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_GET_OBJECT_INFO_HPP detail/device_ptr.hpp 0000644 00000012422 15125510617 0010647 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DEVICE_PTR_HPP #define BOOST_COMPUTE_DEVICE_PTR_HPP #include <boost/type_traits.hpp> #include <boost/static_assert.hpp> #include <boost/compute/buffer.hpp> #include <boost/compute/config.hpp> #include <boost/compute/detail/is_buffer_iterator.hpp> #include <boost/compute/detail/read_write_single_value.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class T, class IndexExpr> struct device_ptr_index_expr { typedef T result_type; device_ptr_index_expr(const buffer &buffer, uint_ index, const IndexExpr &expr) : m_buffer(buffer), m_index(index), m_expr(expr) { } operator T() const { BOOST_STATIC_ASSERT_MSG(boost::is_integral<IndexExpr>::value, "Index expression must be integral"); BOOST_ASSERT(m_buffer.get()); const context &context = m_buffer.get_context(); const device &device = context.get_device(); command_queue queue(context, device); return detail::read_single_value<T>(m_buffer, m_expr, queue); } const buffer &m_buffer; uint_ m_index; IndexExpr m_expr; }; template<class T> class device_ptr { public: typedef T value_type; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; typedef std::random_access_iterator_tag iterator_category; typedef T* pointer; typedef T& reference; device_ptr() : m_index(0) { } device_ptr(const buffer &buffer, size_t index = 0) : m_buffer(buffer.get(), false), m_index(index) { } device_ptr(const device_ptr<T> &other) : m_buffer(other.m_buffer.get(), false), m_index(other.m_index) { } device_ptr<T>& operator=(const device_ptr<T> &other) { if(this != &other){ m_buffer.get() = other.m_buffer.get(); m_index = other.m_index; } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES device_ptr(device_ptr<T>&& other) BOOST_NOEXCEPT : m_buffer(other.m_buffer.get(), false), m_index(other.m_index) { other.m_buffer.get() = 0; } device_ptr<T>& operator=(device_ptr<T>&& other) BOOST_NOEXCEPT { m_buffer.get() = other.m_buffer.get(); m_index = other.m_index; other.m_buffer.get() = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES ~device_ptr() { // set buffer to null so that its reference count will // not be decremented when its destructor is called m_buffer.get() = 0; } size_type get_index() const { return m_index; } const buffer& get_buffer() const { return m_buffer; } template<class OT> device_ptr<OT> cast() const { return device_ptr<OT>(m_buffer, m_index); } device_ptr<T> operator+(difference_type n) const { return device_ptr<T>(m_buffer, m_index + n); } device_ptr<T> operator+(const device_ptr<T> &other) const { return device_ptr<T>(m_buffer, m_index + other.m_index); } device_ptr<T>& operator+=(difference_type n) { m_index += static_cast<size_t>(n); return *this; } difference_type operator-(const device_ptr<T> &other) const { return static_cast<difference_type>(m_index - other.m_index); } device_ptr<T>& operator-=(difference_type n) { m_index -= n; return *this; } bool operator==(const device_ptr<T> &other) const { return m_buffer.get() == other.m_buffer.get() && m_index == other.m_index; } bool operator!=(const device_ptr<T> &other) const { return !(*this == other); } template<class Expr> detail::device_ptr_index_expr<T, Expr> operator[](const Expr &expr) const { BOOST_ASSERT(m_buffer.get()); return detail::device_ptr_index_expr<T, Expr>(m_buffer, uint_(m_index), expr); } private: const buffer m_buffer; size_t m_index; }; // is_buffer_iterator specialization for device_ptr template<class Iterator> struct is_buffer_iterator< Iterator, typename boost::enable_if< boost::is_same< device_ptr<typename Iterator::value_type>, typename boost::remove_const<Iterator>::type > >::type > : public boost::true_type {}; } // end detail namespace // is_device_iterator specialization for device_ptr template<class T> struct is_device_iterator<detail::device_ptr<T> > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DEVICE_PTR_HPP detail/sha1.hpp 0000644 00000003011 15125510617 0007351 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_SHA1_HPP #define BOOST_COMPUTE_DETAIL_SHA1_HPP #include <sstream> #include <iomanip> #include <boost/version.hpp> #if BOOST_VERSION >= 106600 # include <boost/uuid/detail/sha1.hpp> #else # include <boost/uuid/sha1.hpp> #endif namespace boost { namespace compute { namespace detail { // Accumulates SHA1 hash of the passed strings. class sha1 { public: sha1(const std::string &s = "") { if (!s.empty()) this->process(s); } sha1& process(const std::string &s) { h.process_bytes(s.c_str(), s.size()); return *this; } operator std::string() { unsigned int digest[5]; h.get_digest(digest); std::ostringstream buf; for(int i = 0; i < 5; ++i) buf << std::hex << std::setfill('0') << std::setw(8) << digest[i]; return buf.str(); } private: boost::uuids::detail::sha1 h; }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_SHA1_HPP detail/is_contiguous_iterator.hpp 0000644 00000007210 15125510617 0013325 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_IS_CONTIGUOUS_ITERATOR_HPP #define BOOST_COMPUTE_DETAIL_IS_CONTIGUOUS_ITERATOR_HPP #include <vector> #include <valarray> #include <boost/config.hpp> #include <boost/type_traits.hpp> #include <boost/utility/enable_if.hpp> namespace boost { namespace compute { namespace detail { // default = false template<class Iterator, class Enable = void> struct _is_contiguous_iterator : public boost::false_type {}; // std::vector<T>::iterator = true template<class Iterator> struct _is_contiguous_iterator< Iterator, typename boost::enable_if< typename boost::is_same< Iterator, typename std::vector<typename Iterator::value_type>::iterator >::type >::type > : public boost::true_type {}; // std::vector<T>::const_iterator = true template<class Iterator> struct _is_contiguous_iterator< Iterator, typename boost::enable_if< typename boost::is_same< Iterator, typename std::vector<typename Iterator::value_type>::const_iterator >::type >::type > : public boost::true_type {}; // std::valarray<T>::iterator = true template<class Iterator> struct _is_contiguous_iterator< Iterator, typename boost::enable_if< typename boost::is_same< Iterator, typename std::valarray<typename Iterator::value_type>::iterator >::type >::type > : public boost::true_type {}; // std::valarray<T>::const_iterator = true template<class Iterator> struct _is_contiguous_iterator< Iterator, typename boost::enable_if< typename boost::is_same< Iterator, typename std::valarray<typename Iterator::value_type>::const_iterator >::type >::type > : public boost::true_type {}; // T* = true template<class Iterator> struct _is_contiguous_iterator< Iterator, typename boost::enable_if< boost::is_pointer<Iterator> >::type > : public boost::true_type {}; // the is_contiguous_iterator meta-function returns true if Iterator points // to a range of contiguous values. examples of contiguous iterators are // std::vector<>::iterator and float*. examples of non-contiguous iterators // are std::set<>::iterator and std::insert_iterator<>. // // the implementation consists of two phases. the first checks that value_type // for the iterator is not void. this must be done as for many containers void // is not a valid value_type (ex. std::vector<void>::iterator is not valid). // after ensuring a non-void value_type, the _is_contiguous_iterator function // is invoked. it has specializations retuning true for all (known) contiguous // iterators types and a default value of false. template<class Iterator, class Enable = void> struct is_contiguous_iterator : public _is_contiguous_iterator< typename boost::remove_cv<Iterator>::type > {}; // value_type of void = false template<class Iterator> struct is_contiguous_iterator< Iterator, typename boost::enable_if< typename boost::is_void< typename Iterator::value_type >::type >::type > : public boost::false_type {}; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_IS_CONTIGUOUS_ITERATOR_HPP detail/work_size.hpp 0000644 00000002332 15125510617 0010536 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP #define BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP #include <cmath> namespace boost { namespace compute { namespace detail { // Given a total number of values (count), a number of values to // process per thread (vtp), and a number of threads to execute per // block (tpb), this function returns the global work size to be // passed to clEnqueueNDRangeKernel() for a 1D algorithm. inline size_t calculate_work_size(size_t count, size_t vpt, size_t tpb) { size_t work_size = static_cast<size_t>(std::ceil(float(count) / vpt)); if(work_size % tpb != 0){ work_size += tpb - work_size % tpb; } return work_size; } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP detail/buffer_value.hpp 0000644 00000007736 15125510617 0011204 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_BUFFER_VALUE_HPP #define BOOST_COMPUTE_DETAIL_BUFFER_VALUE_HPP #include <boost/compute/context.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/device_ptr.hpp> #include <boost/compute/detail/read_write_single_value.hpp> namespace boost { namespace compute { namespace detail { template<class T> class buffer_value { public: typedef T value_type; buffer_value() { } buffer_value(const value_type &value) : m_value(value) { } // creates a reference for the value in buffer at index (in bytes). buffer_value(const buffer &buffer, size_t index) : m_buffer(buffer.get(), false), m_index(index) { } buffer_value(const buffer_value<T> &other) : m_buffer(other.m_buffer.get(), false), m_index(other.m_index) { } ~buffer_value() { // set buffer to null so that its reference count will // not be decremented when its destructor is called m_buffer.get() = 0; } operator value_type() const { if(m_buffer.get()){ const context &context = m_buffer.get_context(); const device &device = context.get_device(); command_queue queue(context, device); return detail::read_single_value<T>(m_buffer, m_index / sizeof(T), queue); } else { return m_value; } } buffer_value<T> operator-() const { return -T(*this); } bool operator<(const T &value) const { return T(*this) < value; } bool operator>(const T &value) const { return T(*this) > value; } bool operator<=(const T &value) const { return T(*this) <= value; } bool operator>=(const T &value) const { return T(*this) <= value; } bool operator==(const T &value) const { return T(*this) == value; } bool operator==(const buffer_value<T> &other) const { if(m_buffer.get() != other.m_buffer.get()){ return false; } if(m_buffer.get()){ return m_index == other.m_index; } else { return m_value == other.m_value; } } bool operator!=(const T &value) const { return T(*this) != value; } buffer_value<T>& operator=(const T &value) { if(m_buffer.get()){ const context &context = m_buffer.get_context(); command_queue queue(context, context.get_device()); detail::write_single_value<T>( value, m_buffer, m_index / sizeof(T), queue ).wait(); return *this; } else { m_value = value; return *this; } } buffer_value<T>& operator=(const buffer_value<T> &value) { return operator=(T(value)); } detail::device_ptr<T> operator&() const { return detail::device_ptr<T>(m_buffer, m_index); } buffer_value<T>& operator++() { if(m_buffer.get()){ T value = T(*this); value++; *this = value; } else { m_value++; } return *this; } buffer_value<T> operator++(int) { buffer_value<T> result(*this); ++(*this); return result; } private: const buffer m_buffer; size_t m_index; value_type m_value; }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_BUFFER_VALUE_HPP detail/global_static.hpp 0000644 00000002635 15125510617 0011337 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_GLOBAL_STATIC_HPP #define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC_HPP #include <boost/compute/config.hpp> #ifdef BOOST_COMPUTE_THREAD_SAFE # ifdef BOOST_COMPUTE_HAVE_THREAD_LOCAL // use c++11 thread local storage # define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(type, name, ctor) \ thread_local type name ctor; # else // use thread_specific_ptr from boost.thread # include <boost/thread/tss.hpp> # define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(type, name, ctor) \ static ::boost::thread_specific_ptr< type > BOOST_PP_CAT(name, _tls_ptr_); \ if(!BOOST_PP_CAT(name, _tls_ptr_).get()){ \ BOOST_PP_CAT(name, _tls_ptr_).reset(new type ctor); \ } \ static type &name = *BOOST_PP_CAT(name, _tls_ptr_); # endif #else // no thread-safety, just use static # define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(type, name, ctor) \ static type name ctor; #endif #endif // BOOST_COMPUTE_DETAIL_GLOBAL_STATIC_HPP detail/meta_kernel.hpp 0000644 00000076453 15125510617 0011027 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_META_KERNEL_HPP #define BOOST_COMPUTE_DETAIL_META_KERNEL_HPP #include <set> #include <string> #include <vector> #include <iomanip> #include <sstream> #include <utility> #include <boost/tuple/tuple.hpp> #include <boost/type_traits.hpp> #include <boost/lexical_cast.hpp> #include <boost/static_assert.hpp> #include <boost/algorithm/string/find.hpp> #include <boost/preprocessor/repetition.hpp> #include <boost/compute/kernel.hpp> #include <boost/compute/closure.hpp> #include <boost/compute/function.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/type_traits.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/image/image2d.hpp> #include <boost/compute/image/image_sampler.hpp> #include <boost/compute/memory_object.hpp> #include <boost/compute/memory/svm_ptr.hpp> #include <boost/compute/detail/device_ptr.hpp> #include <boost/compute/detail/sha1.hpp> #include <boost/compute/utility/program_cache.hpp> namespace boost { namespace compute { namespace detail { template<class T> class meta_kernel_variable { public: typedef T result_type; meta_kernel_variable(const std::string &name) : m_name(name) { } meta_kernel_variable(const meta_kernel_variable &other) : m_name(other.m_name) { } meta_kernel_variable& operator=(const meta_kernel_variable &other) { if(this != &other){ m_name = other.m_name; } return *this; } ~meta_kernel_variable() { } std::string name() const { return m_name; } private: std::string m_name; }; template<class T> class meta_kernel_literal { public: typedef T result_type; meta_kernel_literal(const T &value) : m_value(value) { } meta_kernel_literal(const meta_kernel_literal &other) : m_value(other.m_value) { } meta_kernel_literal& operator=(const meta_kernel_literal &other) { if(this != &other){ m_value = other.m_value; } return *this; } ~meta_kernel_literal() { } const T& value() const { return m_value; } private: T m_value; }; struct meta_kernel_stored_arg { meta_kernel_stored_arg() : m_size(0), m_value(0) { } meta_kernel_stored_arg(const meta_kernel_stored_arg &other) : m_size(0), m_value(0) { set_value(other.m_size, other.m_value); } meta_kernel_stored_arg& operator=(const meta_kernel_stored_arg &other) { if(this != &other){ set_value(other.m_size, other.m_value); } return *this; } template<class T> meta_kernel_stored_arg(const T &value) : m_size(0), m_value(0) { set_value(value); } ~meta_kernel_stored_arg() { if(m_value){ std::free(m_value); } } void set_value(size_t size, const void *value) { if(m_value){ std::free(m_value); } m_size = size; if(value){ m_value = std::malloc(size); std::memcpy(m_value, value, size); } else { m_value = 0; } } template<class T> void set_value(const T &value) { set_value(sizeof(T), boost::addressof(value)); } size_t m_size; void *m_value; }; struct meta_kernel_buffer_info { meta_kernel_buffer_info(const buffer &buffer, const std::string &id, memory_object::address_space addr_space, size_t i) : m_mem(buffer.get()), identifier(id), address_space(addr_space), index(i) { } cl_mem m_mem; std::string identifier; memory_object::address_space address_space; size_t index; }; struct meta_kernel_svm_info { template <class T> meta_kernel_svm_info(const svm_ptr<T> ptr, const std::string &id, memory_object::address_space addr_space, size_t i) : ptr(ptr.get()), identifier(id), address_space(addr_space), index(i) { } void* ptr; std::string identifier; memory_object::address_space address_space; size_t index; }; class meta_kernel; template<class Type> struct inject_type_impl { void operator()(meta_kernel &) { // default implementation does nothing } }; #define BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(type) \ meta_kernel& operator<<(const type &x) \ { \ m_source << x; \ return *this; \ } #define BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(type) \ meta_kernel& operator<<(const type &x) \ { \ m_source << "(" << type_name<type>() << ")"; \ m_source << "("; \ for(size_t i = 0; i < vector_size<type>::value; i++){ \ *this << lit(x[i]); \ \ if(i != vector_size<type>::value - 1){ \ m_source << ","; \ } \ } \ m_source << ")"; \ return *this; \ } #define BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(type) \ BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(type, _)) \ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 2), _)) \ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 4), _)) \ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 8), _)) \ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 16), _)) class meta_kernel { public: template<class T> class argument { public: argument(const std::string &name, size_t index) : m_name(name), m_index(index) { } const std::string &name() const { return m_name; } size_t index() const { return m_index; } private: std::string m_name; size_t m_index; }; explicit meta_kernel(const std::string &name) : m_name(name) { } meta_kernel(const meta_kernel &other) { m_source.str(other.m_source.str()); m_options = other.m_options; } meta_kernel& operator=(const meta_kernel &other) { if(this != &other){ m_source.str(other.m_source.str()); m_options = other.m_options; } return *this; } ~meta_kernel() { } std::string name() const { return m_name; } std::string source() const { std::stringstream stream; // add pragmas if(!m_pragmas.empty()){ stream << m_pragmas << "\n"; } // add macros stream << "#define boost_pair_type(t1, t2) _pair_ ## t1 ## _ ## t2 ## _t\n"; stream << "#define boost_pair_get(x, n) (n == 0 ? x.first ## x.second)\n"; stream << "#define boost_make_pair(t1, x, t2, y) (boost_pair_type(t1, t2)) { x, y }\n"; stream << "#define boost_tuple_get(x, n) (x.v ## n)\n"; // add type declaration source stream << m_type_declaration_source.str() << "\n"; // add external function source stream << m_external_function_source.str() << "\n"; // add kernel source stream << "__kernel void " << m_name << "(" << boost::join(m_args, ", ") << ")\n" << "{\n" << m_source.str() << "\n}\n"; return stream.str(); } kernel compile(const context &context, const std::string &options = std::string()) { // generate the program source std::string source = this->source(); // generate cache key std::string cache_key = "__boost_meta_kernel_" + static_cast<std::string>(detail::sha1(source)); // load program cache boost::shared_ptr<program_cache> cache = program_cache::get_global_cache(context); std::string compile_options = m_options + options; // load (or build) program from cache ::boost::compute::program program = cache->get_or_build(cache_key, compile_options, source, context); // create kernel ::boost::compute::kernel kernel = program.create_kernel(name()); // bind stored args for(size_t i = 0; i < m_stored_args.size(); i++){ const detail::meta_kernel_stored_arg &arg = m_stored_args[i]; if(arg.m_size != 0){ kernel.set_arg(i, arg.m_size, arg.m_value); } } // bind buffer args for(size_t i = 0; i < m_stored_buffers.size(); i++){ const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i]; kernel.set_arg(bi.index, bi.m_mem); } // bind svm args for(size_t i = 0; i < m_stored_svm_ptrs.size(); i++){ const detail::meta_kernel_svm_info &spi = m_stored_svm_ptrs[i]; kernel.set_arg_svm_ptr(spi.index, spi.ptr); } return kernel; } template<class T> size_t add_arg(const std::string &name) { std::stringstream stream; stream << type<T>() << " " << name; // add argument to list m_args.push_back(stream.str()); // return index return m_args.size() - 1; } template<class T> size_t add_arg(memory_object::address_space address_space, const std::string &name) { return add_arg_with_qualifiers<T>(address_space_prefix(address_space), name); } template<class T> void set_arg(size_t index, const T &value) { if(index >= m_stored_args.size()){ m_stored_args.resize(index + 1); } m_stored_args[index] = detail::meta_kernel_stored_arg(value); } void set_arg(size_t index, const memory_object &mem) { set_arg<cl_mem>(index, mem.get()); } void set_arg(size_t index, const image_sampler &sampler) { set_arg<cl_sampler>(index, cl_sampler(sampler)); } template<class T> size_t add_set_arg(const std::string &name, const T &value) { size_t index = add_arg<T>(name); set_arg<T>(index, value); return index; } void add_extension_pragma(const std::string &extension, const std::string &value = "enable") { m_pragmas += "#pragma OPENCL EXTENSION " + extension + " : " + value + "\n"; } void add_extension_pragma(const std::string &extension, const std::string &value) const { return const_cast<meta_kernel *>(this)->add_extension_pragma(extension, value); } template<class T> std::string type() const { std::stringstream stream; // const qualifier if(boost::is_const<T>::value){ stream << "const "; } // volatile qualifier if(boost::is_volatile<T>::value){ stream << "volatile "; } // type typedef typename boost::remove_cv< typename boost::remove_pointer<T>::type >::type Type; stream << type_name<Type>(); // pointer if(boost::is_pointer<T>::value){ stream << "*"; } // inject type pragmas and/or definitions inject_type<Type>(); return stream.str(); } template<class T> std::string decl(const std::string &name) const { return type<T>() + " " + name; } template<class T, class Expr> std::string decl(const std::string &name, const Expr &init) const { meta_kernel tmp((std::string())); tmp << tmp.decl<T>(name) << " = " << init; return tmp.m_source.str(); } template<class T> detail::meta_kernel_variable<T> var(const std::string &name) const { type<T>(); return make_var<T>(name); } template<class T> detail::meta_kernel_literal<T> lit(const T &value) const { type<T>(); return detail::meta_kernel_literal<T>(value); } template<class T> detail::meta_kernel_variable<T> expr(const std::string &expr) const { type<T>(); return detail::meta_kernel_variable<T>(expr); } // define stream operators for scalar and vector types BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(char) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uchar) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(short) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ushort) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(int) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uint) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(long) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ulong) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(double) // define stream operators for float scalar and vector types meta_kernel& operator<<(const float &x) { m_source << std::showpoint << x << 'f'; return *this; } BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float2_) BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float4_) BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float8_) BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float16_) // define stream operators for variable types template<class T> meta_kernel& operator<<(const meta_kernel_variable<T> &variable) { return *this << variable.name(); } // define stream operators for literal types template<class T> meta_kernel& operator<<(const meta_kernel_literal<T> &literal) { return *this << literal.value(); } meta_kernel& operator<<(const meta_kernel_literal<bool> &literal) { return *this << (literal.value() ? "true" : "false"); } meta_kernel& operator<<(const meta_kernel_literal<char> &literal) { const char c = literal.value(); switch(c){ // control characters case '\0': return *this << "'\\0'"; case '\a': return *this << "'\\a'"; case '\b': return *this << "'\\b'"; case '\t': return *this << "'\\t'"; case '\n': return *this << "'\\n'"; case '\v': return *this << "'\\v'"; case '\f': return *this << "'\\f'"; case '\r': return *this << "'\\r'"; // characters which need escaping case '\"': case '\'': case '\?': case '\\': return *this << "'\\" << c << "'"; // all other characters default: return *this << "'" << c << "'"; } } meta_kernel& operator<<(const meta_kernel_literal<signed char> &literal) { return *this << lit<char>(literal.value()); } meta_kernel& operator<<(const meta_kernel_literal<unsigned char> &literal) { return *this << uint_(literal.value()); } // define stream operators for strings meta_kernel& operator<<(char ch) { m_source << ch; return *this; } meta_kernel& operator<<(const char *string) { m_source << string; return *this; } meta_kernel& operator<<(const std::string &string) { m_source << string; return *this; } template<class T> static detail::meta_kernel_variable<T> make_var(const std::string &name) { return detail::meta_kernel_variable<T>(name); } template<class T> static detail::meta_kernel_literal<T> make_lit(const T &value) { return detail::meta_kernel_literal<T>(value); } template<class T> static detail::meta_kernel_variable<T> make_expr(const std::string &expr) { return detail::meta_kernel_variable<T>(expr); } event exec(command_queue &queue) { return exec_1d(queue, 0, 1); } event exec_1d(command_queue &queue, size_t global_work_offset, size_t global_work_size, const wait_list &events = wait_list()) { const context &context = queue.get_context(); ::boost::compute::kernel kernel = compile(context); return queue.enqueue_1d_range_kernel( kernel, global_work_offset, global_work_size, 0, events ); } event exec_1d(command_queue &queue, size_t global_work_offset, size_t global_work_size, size_t local_work_size, const wait_list &events = wait_list()) { const context &context = queue.get_context(); ::boost::compute::kernel kernel = compile(context); return queue.enqueue_1d_range_kernel( kernel, global_work_offset, global_work_size, local_work_size, events ); } template<class T> std::string get_buffer_identifier(const buffer &buffer, const memory_object::address_space address_space = memory_object::global_memory) { // check if we've already seen buffer for(size_t i = 0; i < m_stored_buffers.size(); i++){ const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i]; if(bi.m_mem == buffer.get() && bi.address_space == address_space){ return bi.identifier; } } // create a new binding std::string identifier = "_buf" + lexical_cast<std::string>(m_stored_buffers.size()); size_t index = add_arg<T *>(address_space, identifier); // store new buffer info m_stored_buffers.push_back( detail::meta_kernel_buffer_info(buffer, identifier, address_space, index)); return identifier; } template<class T> std::string get_svm_identifier(const svm_ptr<T> &svm_ptr, const memory_object::address_space address_space = memory_object::global_memory) { BOOST_ASSERT( (address_space == memory_object::global_memory) || (address_space == memory_object::constant_memory) ); // check if we've already seen this pointer for(size_t i = 0; i < m_stored_svm_ptrs.size(); i++){ const detail::meta_kernel_svm_info &spi = m_stored_svm_ptrs[i]; if(spi.ptr == svm_ptr.get() && spi.address_space == address_space){ return spi.identifier; } } // create a new binding std::string identifier = "_svm_ptr" + lexical_cast<std::string>(m_stored_svm_ptrs.size()); size_t index = add_arg<T *>(address_space, identifier); if(m_stored_svm_ptrs.empty()) { m_options += std::string(" -cl-std=CL2.0"); } // store new svm pointer info m_stored_svm_ptrs.push_back( detail::meta_kernel_svm_info( svm_ptr, identifier, address_space, index ) ); return identifier; } std::string get_image_identifier(const char *qualifiers, const image2d &image) { size_t index = add_arg_with_qualifiers<image2d>(qualifiers, "image"); set_arg(index, image); return "image"; } std::string get_sampler_identifier(bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode) { (void) normalized_coords; (void) addressing_mode; (void) filter_mode; m_pragmas += "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n" " CLK_ADDRESS_NONE |\n" " CLK_FILTER_NEAREST;\n"; return "sampler"; } template<class Expr> static std::string expr_to_string(const Expr &expr) { meta_kernel tmp((std::string())); tmp << expr; return tmp.m_source.str(); } template<class Predicate> detail::invoked_function<bool, boost::tuple<Predicate> > if_(Predicate pred) const { return detail::invoked_function<bool, boost::tuple<Predicate> >( "if", std::string(), boost::make_tuple(pred) ); } template<class Predicate> detail::invoked_function<bool, boost::tuple<Predicate> > else_if_(Predicate pred) const { return detail::invoked_function<bool, boost::tuple<Predicate> >( "else if", std::string(), boost::make_tuple(pred) ); } detail::meta_kernel_variable<cl_uint> get_global_id(size_t dim) const { return expr<cl_uint>("get_global_id(" + lexical_cast<std::string>(dim) + ")"); } void add_function(const std::string &name, const std::string &source) { if(m_external_function_names.count(name)){ return; } m_external_function_names.insert(name); m_external_function_source << source << "\n"; } void add_function(const std::string &name, const std::string &source, const std::map<std::string, std::string> &definitions) { typedef std::map<std::string, std::string>::const_iterator iter; std::stringstream s; // add #define's for(iter i = definitions.begin(); i != definitions.end(); i++){ s << "#define " << i->first; if(!i->second.empty()){ s << " " << i->second; } s << "\n"; } s << source << "\n"; // add #undef's for(iter i = definitions.begin(); i != definitions.end(); i++){ s << "#undef " << i->first << "\n"; } add_function(name, s.str()); } template<class Type> void add_type_declaration(const std::string &declaration) { const char *name = type_name<Type>(); // check if the type has already been declared std::string source = m_type_declaration_source.str(); if(source.find(name) != std::string::npos){ return; } m_type_declaration_source << declaration; } template<class Type> void inject_type() const { inject_type_impl<Type>()(const_cast<meta_kernel &>(*this)); } // the insert_function_call() method inserts a call to a function with // the given name tuple of argument values. template<class ArgTuple> void insert_function_call(const std::string &name, const ArgTuple &args) { *this << name << '('; insert_function_call_args(args); *this << ')'; } // the insert_function_call_args() method takes a tuple of argument values // and inserts them into the source string with a comma in-between each. // this is useful for creating function calls given a tuple of values. void insert_function_call_args(const boost::tuple<>&) { } #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE(z, n, unused) \ inject_type<BOOST_PP_CAT(T, n)>(); #define BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG(z, n, unused) \ << boost::get<BOOST_PP_DEC(n)>(args) << ", " #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS(z, n, unused) \ template<BOOST_PP_ENUM_PARAMS(n, class T)> \ void insert_function_call_args( \ const boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> &args \ ) \ { \ BOOST_PP_REPEAT_FROM_TO( \ 0, n, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE, ~ \ ) \ *this \ BOOST_PP_REPEAT_FROM_TO( \ 1, n, BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG, ~ \ ) \ << boost::get<BOOST_PP_DEC(n)>(args); \ } BOOST_PP_REPEAT_FROM_TO( 1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS, ~ ) #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE #undef BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS static const char* address_space_prefix(const memory_object::address_space value) { switch(value){ case memory_object::global_memory: return "__global"; case memory_object::local_memory: return "__local"; case memory_object::private_memory: return "__private"; case memory_object::constant_memory: return "__constant"; }; return 0; // unreachable } private: template<class T> size_t add_arg_with_qualifiers(const char *qualifiers, const std::string &name) { size_t index = add_arg<T>(name); // update argument type declaration with qualifiers std::stringstream s; s << qualifiers << " " << m_args[index]; m_args[index] = s.str(); return index; } private: std::string m_name; std::stringstream m_source; std::stringstream m_external_function_source; std::stringstream m_type_declaration_source; std::set<std::string> m_external_function_names; std::vector<std::string> m_args; std::string m_pragmas; std::string m_options; std::vector<detail::meta_kernel_stored_arg> m_stored_args; std::vector<detail::meta_kernel_buffer_info> m_stored_buffers; std::vector<detail::meta_kernel_svm_info> m_stored_svm_ptrs; }; template<class ResultType, class ArgTuple> inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_function<ResultType, ArgTuple> &expr) { if(!expr.source().empty()){ kernel.add_function(expr.name(), expr.source(), expr.definitions()); } kernel.insert_function_call(expr.name(), expr.args()); return kernel; } template<class ResultType, class ArgTuple, class CaptureTuple> inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_closure<ResultType, ArgTuple, CaptureTuple> &expr) { if(!expr.source().empty()){ kernel.add_function(expr.name(), expr.source(), expr.definitions()); } kernel << expr.name() << '('; kernel.insert_function_call_args(expr.args()); kernel << ", "; kernel.insert_function_call_args(expr.capture()); kernel << ')'; return kernel; } template<class Arg1, class Arg2, class Result> inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_binary_operator<Arg1, Arg2, Result> &expr) { return kernel << "((" << expr.arg1() << ")" << expr.op() << "(" << expr.arg2() << "))"; } template<class T, class IndexExpr> inline meta_kernel& operator<<(meta_kernel &kernel, const detail::device_ptr_index_expr<T, IndexExpr> &expr) { if(expr.m_index == 0){ return kernel << kernel.get_buffer_identifier<T>(expr.m_buffer) << '[' << expr.m_expr << ']'; } else { return kernel << kernel.get_buffer_identifier<T>(expr.m_buffer) << '[' << expr.m_index << "+(" << expr.m_expr << ")]"; } } template<class T1, class T2, class IndexExpr> inline meta_kernel& operator<<(meta_kernel &kernel, const detail::device_ptr_index_expr<std::pair<T1, T2>, IndexExpr> &expr) { typedef std::pair<T1, T2> T; if(expr.m_index == 0){ return kernel << kernel.get_buffer_identifier<T>(expr.m_buffer) << '[' << expr.m_expr << ']'; } else { return kernel << kernel.get_buffer_identifier<T>(expr.m_buffer) << '[' << expr.m_index << "+(" << expr.m_expr << ")]"; } } // SVM requires OpenCL 2.0 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) template<class T, class IndexExpr> inline meta_kernel& operator<<(meta_kernel &kernel, const svm_ptr_index_expr<T, IndexExpr> &expr) { return kernel << kernel.get_svm_identifier<T>(expr.m_svm_ptr) << '[' << expr.m_expr << ']'; } #endif template<class Predicate, class Arg> inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_unary_negate_function<Predicate, Arg> &expr) { return kernel << "!(" << expr.pred()(expr.expr()) << ')'; } template<class Predicate, class Arg1, class Arg2> inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_binary_negate_function<Predicate, Arg1, Arg2> &expr) { return kernel << "!(" << expr.pred()(expr.expr1(), expr.expr2()) << ')'; } // get<N>() for vector types template<size_t N, class Arg, class T> inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_get<N, Arg, T> &expr) { BOOST_STATIC_ASSERT(N < 16); if(N < 10){ return kernel << expr.m_arg << ".s" << int_(N); } else if(N < 16){ #ifdef _MSC_VER # pragma warning(push) # pragma warning(disable: 4307) #endif return kernel << expr.m_arg << ".s" << char('a' + (N - 10)); #ifdef _MSC_VER # pragma warning(pop) #endif } return kernel; } template<class T, class Arg> inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_field<T, Arg> &expr) { return kernel << expr.m_arg << "." << expr.m_field; } template<class T, class Arg> inline meta_kernel& operator<<(meta_kernel &k, const invoked_as<T, Arg> &expr) { return k << "as_" << type_name<T>() << "(" << expr.m_arg << ")"; } template<class T, class Arg> inline meta_kernel& operator<<(meta_kernel &k, const invoked_convert<T, Arg> &expr) { return k << "convert_" << type_name<T>() << "(" << expr.m_arg << ")"; } template<class T, class Arg> inline meta_kernel& operator<<(meta_kernel &k, const invoked_identity<T, Arg> &expr) { return k << expr.m_arg; } template<> struct inject_type_impl<double_> { void operator()(meta_kernel &kernel) { kernel.add_extension_pragma("cl_khr_fp64", "enable"); } }; template<class Scalar, size_t N> struct inject_type_impl<vector_type<Scalar, N> > { void operator()(meta_kernel &kernel) { kernel.inject_type<Scalar>(); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_META_KERNEL_HPP detail/mpl_vector_to_tuple.hpp 0000644 00000004336 15125510617 0012615 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_MPL_VECTOR_TO_TUPLE_HPP #define BOOST_COMPUTE_DETAIL_MPL_VECTOR_TO_TUPLE_HPP #include <boost/mpl/copy.hpp> #include <boost/mpl/vector.hpp> #include <boost/tuple/tuple.hpp> #include <boost/fusion/include/mpl.hpp> #include <boost/fusion/adapted/boost_tuple.hpp> #include <boost/preprocessor/repetition.hpp> #include <boost/compute/config.hpp> namespace boost { namespace compute { namespace detail { namespace mpl = boost::mpl; template<class Vector, size_t N> struct mpl_vector_to_tuple_impl; #define BOOST_COMPUTE_PRINT_ELEM(z, n, unused) \ typename mpl::at_c<Vector, n>::type #define BOOST_COMPUTE_VEC2TUP(z, n, unused) \ template<class Vector> \ struct mpl_vector_to_tuple_impl<Vector, n> \ { \ typedef typename \ boost::tuple< \ BOOST_PP_ENUM(n, BOOST_COMPUTE_PRINT_ELEM, ~) \ > type; \ }; BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_VEC2TUP, ~) #undef BOOST_COMPUTE_VEC2TUP #undef BOOST_COMPUTE_PRINT_ELEM // meta-function which converts a mpl::vector to a boost::tuple template<class Vector> struct mpl_vector_to_tuple { typedef typename mpl_vector_to_tuple_impl< Vector, mpl::size<Vector>::value >::type type; }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_MPL_VECTOR_TO_TUPLE_HPP detail/nvidia_compute_capability.hpp 0000644 00000004031 15125510617 0013727 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_NVIDIA_COMPUTE_CAPABILITY_HPP #define BOOST_COMPUTE_DETAIL_NVIDIA_COMPUTE_CAPABILITY_HPP #include <boost/compute/device.hpp> #ifdef BOOST_COMPUTE_HAVE_HDR_CL_EXT #include <boost/compute/detail/cl_versions.hpp> #include <CL/cl_ext.h> #endif namespace boost { namespace compute { namespace detail { #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV #else #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 #endif #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV #else #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 #endif inline void get_nvidia_compute_capability(const device &device, int &major, int &minor) { if(!device.supports_extension("cl_nv_device_attribute_query")){ major = minor = 0; return; } major = device.get_info<uint_>(BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV); minor = device.get_info<uint_>(BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV); } inline bool check_nvidia_compute_capability(const device &device, int major, int minor) { int actual_major, actual_minor; get_nvidia_compute_capability(device, actual_major, actual_minor); return actual_major > major || (actual_major == major && actual_minor >= minor); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_NVIDIA_COMPUTE_CAPABILITY_HPP detail/is_buffer_iterator.hpp 0000644 00000001700 15125510617 0012375 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_IS_BUFFER_ITERATOR_HPP #define BOOST_COMPUTE_DETAIL_IS_BUFFER_ITERATOR_HPP #include <boost/config.hpp> #include <boost/type_traits.hpp> #include <boost/utility/enable_if.hpp> namespace boost { namespace compute { namespace detail { // default = false template<class Iterator, class Enable = void> struct is_buffer_iterator : public boost::false_type {}; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_IS_BUFFER_ITERATOR_HPP detail/print_range.hpp 0000644 00000005034 15125510617 0011034 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_PRINT_RANGE_HPP #define BOOST_COMPUTE_DETAIL_PRINT_RANGE_HPP #include <vector> #include <iostream> #include <iterator> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/is_buffer_iterator.hpp> #include <boost/compute/detail/iterator_range_size.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator> inline void print_range(InputIterator first, InputIterator last, command_queue &queue, typename boost::enable_if< is_buffer_iterator<InputIterator> >::type* = 0) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; const size_t size = iterator_range_size(first, last); // copy values to temporary vector on the host std::vector<value_type> tmp(size); ::boost::compute::copy(first, last, tmp.begin(), queue); // print values std::cout << "[ "; for(size_t i = 0; i < size; i++){ std::cout << tmp[i]; if(i != size - 1){ std::cout << ", "; } } std::cout << " ]" << std::endl; } template<class InputIterator> inline void print_range(InputIterator first, InputIterator last, command_queue &queue, typename boost::enable_if_c< !is_buffer_iterator<InputIterator>::value >::type* = 0) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; const context &context = queue.get_context(); const size_t size = iterator_range_size(first, last); // copy values to temporary vector on the device ::boost::compute::vector<value_type> tmp(size, context); ::boost::compute::copy(first, last, tmp.begin(), queue); print_range(tmp.begin(), tmp.end(), queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_PRINT_RANGE_HPP detail/assert_cl_success.hpp 0000644 00000001523 15125510617 0012232 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_ASSERT_CL_SUCCESS_HPP #define BOOST_COMPUTE_DETAIL_ASSERT_CL_SUCCESS_HPP #include <boost/assert.hpp> #if defined(BOOST_DISABLE_ASSERTS) || defined(NDEBUG) #define BOOST_COMPUTE_ASSERT_CL_SUCCESS(function) \ function #else #define BOOST_COMPUTE_ASSERT_CL_SUCCESS(function) \ BOOST_ASSERT(function == CL_SUCCESS) #endif #endif // BOOST_COMPUTE_DETAIL_ASSERT_CL_SUCCESS_HPP detail/cl_versions.hpp 0000644 00000006072 15125510617 0011055 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2018 Jakub Szuppe <j.szuppe@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_CL_VERSION_HPP #define BOOST_COMPUTE_DETAIL_CL_VERSION_HPP #if defined(BOOST_COMPUTE_MAX_CL_VERSION) # if !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 202 # define CL_USE_DEPRECATED_OPENCL_2_1_APIS # endif # if !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 201 # define CL_USE_DEPRECATED_OPENCL_2_0_APIS # endif # if !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 200 # define CL_USE_DEPRECATED_OPENCL_1_2_APIS # endif # if !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 102 # define CL_USE_DEPRECATED_OPENCL_1_1_APIS # endif # if !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 101 # define CL_USE_DEPRECATED_OPENCL_1_0_APIS # endif #endif #if defined(BOOST_COMPUTE_MAX_CL_VERSION) && !defined(CL_TARGET_OPENCL_VERSION) # if BOOST_COMPUTE_MAX_CL_VERSION == 201 # define CL_TARGET_OPENCL_VERSION 210 # elif BOOST_COMPUTE_MAX_CL_VERSION == 200 # define CL_TARGET_OPENCL_VERSION 200 # elif BOOST_COMPUTE_MAX_CL_VERSION == 102 # define CL_TARGET_OPENCL_VERSION 120 # elif BOOST_COMPUTE_MAX_CL_VERSION == 101 # define CL_TARGET_OPENCL_VERSION 110 # elif BOOST_COMPUTE_MAX_CL_VERSION == 100 # define CL_TARGET_OPENCL_VERSION 100 # else # define CL_TARGET_OPENCL_VERSION 220 # endif #endif #if defined(BOOST_COMPUTE_MAX_CL_VERSION) && defined(CL_TARGET_OPENCL_VERSION) # if BOOST_COMPUTE_MAX_CL_VERSION == 202 && CL_TARGET_OPENCL_VERSION != 220 # error "Boost.Compute: CL_TARGET_OPENCL_VERSION definition does not match BOOST_COMPUTE_MAX_CL_VERSION" # elif BOOST_COMPUTE_MAX_CL_VERSION == 201 && CL_TARGET_OPENCL_VERSION != 210 # error "Boost.Compute: CL_TARGET_OPENCL_VERSION definition does not match BOOST_COMPUTE_MAX_CL_VERSION" # elif BOOST_COMPUTE_MAX_CL_VERSION == 200 && CL_TARGET_OPENCL_VERSION != 200 # error "Boost.Compute: CL_TARGET_OPENCL_VERSION definition does not match BOOST_COMPUTE_MAX_CL_VERSION" # elif BOOST_COMPUTE_MAX_CL_VERSION == 102 && CL_TARGET_OPENCL_VERSION != 120 # error "Boost.Compute: CL_TARGET_OPENCL_VERSION definition does not match BOOST_COMPUTE_MAX_CL_VERSION" # elif BOOST_COMPUTE_MAX_CL_VERSION == 101 && CL_TARGET_OPENCL_VERSION != 110 # error "Boost.Compute: CL_TARGET_OPENCL_VERSION definition does not match BOOST_COMPUTE_MAX_CL_VERSION" # elif BOOST_COMPUTE_MAX_CL_VERSION == 100 && CL_TARGET_OPENCL_VERSION != 100 # error "Boost.Compute: CL_TARGET_OPENCL_VERSION definition does not match BOOST_COMPUTE_MAX_CL_VERSION" # endif #endif #endif // BOOST_COMPUTE_DETAIL_CL_VERSION_HPP detail/getenv.hpp 0000644 00000001650 15125510617 0010014 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_GETENV_HPP #define BOOST_COMPUTE_DETAIL_GETENV_HPP #include <cstdlib> namespace boost { namespace compute { namespace detail { inline const char* getenv(const char *env_var) { #ifdef _MSC_VER # pragma warning(push) # pragma warning(disable: 4996) #endif return std::getenv(env_var); #ifdef _MSC_VER # pragma warning(pop) #endif } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_GETENV_HPP detail/read_write_single_value.hpp 0000644 00000004756 15125510617 0013420 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_READ_WRITE_SINGLE_VALUE_HPP #define BOOST_COMPUTE_DETAIL_READ_WRITE_SINGLE_VALUE_HPP #include <boost/throw_exception.hpp> #include <boost/compute/buffer.hpp> #include <boost/compute/event.hpp> #include <boost/compute/exception.hpp> #include <boost/compute/command_queue.hpp> namespace boost { namespace compute { namespace detail { // reads and returns a single value at index in the buffer template<class T> inline T read_single_value(const buffer &buffer, size_t index, command_queue &queue) { BOOST_ASSERT(index < buffer.size() / sizeof(T)); BOOST_ASSERT(buffer.get_context() == queue.get_context()); T value; queue.enqueue_read_buffer(buffer, sizeof(T) * index, sizeof(T), &value); return value; } // reads and returns a the first value in the buffer template<class T> inline T read_single_value(const buffer &buffer, command_queue &queue) { return read_single_value<T>(buffer, 0, queue); } // writes a single value at index to the buffer template<class T> inline event write_single_value(const T &value, const buffer &buffer, size_t index, command_queue &queue) { BOOST_ASSERT(index < buffer.size() / sizeof(T)); BOOST_ASSERT(buffer.get_context() == queue.get_context()); return queue.enqueue_write_buffer(buffer, index * sizeof(T), sizeof(T), &value); } // writes value to the first location in buffer template<class T> inline void write_single_value(const T &value, const buffer &buffer, command_queue &queue) { write_single_value<T>(value, buffer, 0, queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_READ_WRITE_SINGLE_VALUE_HPP detail/literal.hpp 0000644 00000002644 15125510617 0010164 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_LITERAL_HPP #define BOOST_COMPUTE_DETAIL_LITERAL_HPP #include <iomanip> #include <limits> #include <sstream> #include <boost/type_traits/is_same.hpp> #include <boost/compute/types/fundamental.hpp> namespace boost { namespace compute { namespace detail { template<class T> std::string make_literal(T x) { std::stringstream s; s << std::setprecision( #ifndef BOOST_NO_CXX11_NUMERIC_LIMITS std::numeric_limits<T>::max_digits10 #else // We don't have max_digits10, so add 3 other digits (this is what is required for // float, and is one more than required for double). 3 + std::numeric_limits<T>::digits10 #endif ) << std::scientific << x; if(boost::is_same<T, float>::value || boost::is_same<T, float_>::value){ s << "f"; } return s.str(); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_LITERAL_HPP detail/parameter_cache.hpp 0000644 00000016027 15125510617 0011633 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_PARAMETER_CACHE_HPP #define BOOST_COMPUTE_DETAIL_PARAMETER_CACHE_HPP #include <algorithm> #include <string> #include <boost/shared_ptr.hpp> #include <boost/make_shared.hpp> #include <boost/noncopyable.hpp> #include <boost/compute/config.hpp> #include <boost/compute/device.hpp> #include <boost/compute/detail/global_static.hpp> #include <boost/compute/version.hpp> #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE #include <cstdio> #include <boost/algorithm/string/trim.hpp> #include <boost/compute/detail/path.hpp> #include <boost/property_tree/ptree.hpp> #include <boost/property_tree/json_parser.hpp> #endif // BOOST_COMPUTE_USE_OFFLINE_CACHE namespace boost { namespace compute { namespace detail { class parameter_cache : boost::noncopyable { public: parameter_cache(const device &device) : m_dirty(false), m_device_name(device.name()) { #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE // get offline cache file name (e.g. /home/user/.boost_compute/tune/device.json) m_file_name = make_file_name(); // load parameters from offline cache file (if it exists) if(boost::filesystem::exists(m_file_name)){ read_from_disk(); } #endif // BOOST_COMPUTE_USE_OFFLINE_CACHE } ~parameter_cache() { #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE write_to_disk(); #endif // BOOST_COMPUTE_USE_OFFLINE_CACHE } void set(const std::string &object, const std::string ¶meter, uint_ value) { m_cache[std::make_pair(object, parameter)] = value; // set the dirty flag to true. this will cause the updated parameters // to be stored to disk. m_dirty = true; } uint_ get(const std::string &object, const std::string ¶meter, uint_ default_value) { std::map<std::pair<std::string, std::string>, uint_>::iterator iter = m_cache.find(std::make_pair(object, parameter)); if(iter != m_cache.end()){ return iter->second; } else { return default_value; } } static boost::shared_ptr<parameter_cache> get_global_cache(const device &device) { // device name -> parameter cache typedef std::map<std::string, boost::shared_ptr<parameter_cache> > cache_map; BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(cache_map, caches, ((std::less<std::string>()))); cache_map::iterator iter = caches.find(device.name()); if(iter == caches.end()){ boost::shared_ptr<parameter_cache> cache = boost::make_shared<parameter_cache>(device); caches.insert(iter, std::make_pair(device.name(), cache)); return cache; } else { return iter->second; } } private: #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE // returns a string containing a cannoical device name static std::string cannonical_device_name(std::string name) { boost::algorithm::trim(name); std::replace(name.begin(), name.end(), ' ', '_'); std::replace(name.begin(), name.end(), '(', '_'); std::replace(name.begin(), name.end(), ')', '_'); return name; } // returns the boost.compute version string static std::string version_string() { char buf[32]; // snprintf is in Visual Studio since Visual Studio 2015 (_MSC_VER == 1900) #if defined (_MSC_VER) && _MSC_VER < 1900 #define DETAIL_SNPRINTF sprintf_s #else #define DETAIL_SNPRINTF std::snprintf #endif DETAIL_SNPRINTF(buf, sizeof(buf), "%d.%d.%d", BOOST_COMPUTE_VERSION_MAJOR, BOOST_COMPUTE_VERSION_MINOR, BOOST_COMPUTE_VERSION_PATCH); #undef DETAIL_SNPRINTF return buf; } // returns the file path for the cached parameters std::string make_file_name() const { return detail::parameter_cache_path(true) + cannonical_device_name(m_device_name) + ".json"; } // store current parameters to disk void write_to_disk() { BOOST_ASSERT(!m_file_name.empty()); if(m_dirty){ // save current parameters to disk boost::property_tree::ptree pt; pt.put("header.device", m_device_name); pt.put("header.version", version_string()); typedef std::map<std::pair<std::string, std::string>, uint_> map_type; for(map_type::const_iterator iter = m_cache.begin(); iter != m_cache.end(); ++iter){ const std::pair<std::string, std::string> &key = iter->first; pt.add(key.first + "." + key.second, iter->second); } write_json(m_file_name, pt); m_dirty = false; } } // load stored parameters from disk void read_from_disk() { BOOST_ASSERT(!m_file_name.empty()); m_cache.clear(); boost::property_tree::ptree pt; try { read_json(m_file_name, pt); } catch(boost::property_tree::json_parser::json_parser_error&){ // no saved cache file, ignore return; } std::string stored_device; try { stored_device = pt.get<std::string>("header.device"); } catch(boost::property_tree::ptree_bad_path&){ return; } std::string stored_version; try { stored_version = pt.get<std::string>("header.version"); } catch(boost::property_tree::ptree_bad_path&){ return; } if(stored_device == m_device_name && stored_version == version_string()){ typedef boost::property_tree::ptree::const_iterator pt_iter; for(pt_iter iter = pt.begin(); iter != pt.end(); ++iter){ if(iter->first == "header"){ // skip header continue; } boost::property_tree::ptree child_pt = pt.get_child(iter->first); for(pt_iter child_iter = child_pt.begin(); child_iter != child_pt.end(); ++child_iter){ set(iter->first, child_iter->first, boost::lexical_cast<uint_>(child_iter->second.data())); } } } m_dirty = false; } #endif // BOOST_COMPUTE_USE_OFFLINE_CACHE private: bool m_dirty; std::string m_device_name; std::string m_file_name; std::map<std::pair<std::string, std::string>, uint_> m_cache; }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_PARAMETER_CACHE_HPP iterator/function_input_iterator.hpp 0000644 00000011567 15125510617 0014100 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_FUNCTION_INPUT_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_FUNCTION_INPUT_ITERATOR_HPP #include <cstddef> #include <iterator> #include <boost/config.hpp> #include <boost/iterator/iterator_facade.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> #include <boost/compute/type_traits/result_of.hpp> namespace boost { namespace compute { // forward declaration for function_input_iterator<Function> template<class Function> class function_input_iterator; namespace detail { // helper class which defines the iterator_facade super-class // type for function_input_iterator<Function> template<class Function> class function_input_iterator_base { public: typedef ::boost::iterator_facade< ::boost::compute::function_input_iterator<Function>, typename ::boost::compute::result_of<Function()>::type, ::std::random_access_iterator_tag, typename ::boost::compute::result_of<Function()>::type > type; }; template<class Function> struct function_input_iterator_expr { typedef typename ::boost::compute::result_of<Function()>::type result_type; function_input_iterator_expr(const Function &function) : m_function(function) { } const Function m_function; }; template<class Function> inline meta_kernel& operator<<(meta_kernel &kernel, const function_input_iterator_expr<Function> &expr) { return kernel << expr.m_function(); } } // end detail namespace /// \class function_input_iterator /// \brief Iterator which returns the result of a function when dereferenced /// /// For example: /// /// \snippet test/test_function_input_iterator.cpp generate_42 /// /// \see make_function_input_iterator() template<class Function> class function_input_iterator : public detail::function_input_iterator_base<Function>::type { public: typedef typename detail::function_input_iterator_base<Function>::type super_type; typedef typename super_type::reference reference; typedef typename super_type::difference_type difference_type; typedef Function function; function_input_iterator(const Function &function, size_t index = 0) : m_function(function), m_index(index) { } function_input_iterator(const function_input_iterator<Function> &other) : m_function(other.m_function), m_index(other.m_index) { } function_input_iterator<Function>& operator=(const function_input_iterator<Function> &other) { if(this != &other){ m_function = other.m_function; m_index = other.m_index; } return *this; } ~function_input_iterator() { } size_t get_index() const { return m_index; } template<class Expr> detail::function_input_iterator_expr<Function> operator[](const Expr &expr) const { (void) expr; return detail::function_input_iterator_expr<Function>(m_function); } private: friend class ::boost::iterator_core_access; reference dereference() const { return reference(); } bool equal(const function_input_iterator<Function> &other) const { return m_function == other.m_function && m_index == other.m_index; } void increment() { m_index++; } void decrement() { m_index--; } void advance(difference_type n) { m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n); } difference_type distance_to(const function_input_iterator<Function> &other) const { return static_cast<difference_type>(other.m_index - m_index); } private: Function m_function; size_t m_index; }; /// Returns a function_input_iterator with \p function. /// /// \param function function to execute when dereferenced /// \param index index of the iterator /// /// \return a \c function_input_iterator with \p function template<class Function> inline function_input_iterator<Function> make_function_input_iterator(const Function &function, size_t index = 0) { return function_input_iterator<Function>(function, index); } /// \internal_ (is_device_iterator specialization for function_input_iterator) template<class Function> struct is_device_iterator<function_input_iterator<Function> > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_FUNCTION_INPUT_ITERATOR_HPP iterator/transform_iterator.hpp 0000644 00000017110 15125510617 0013035 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_TRANSFORM_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_TRANSFORM_ITERATOR_HPP #include <cstddef> #include <iterator> #include <boost/config.hpp> #include <boost/iterator/iterator_adaptor.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/is_buffer_iterator.hpp> #include <boost/compute/detail/read_write_single_value.hpp> #include <boost/compute/iterator/detail/get_base_iterator_buffer.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> #include <boost/compute/type_traits/result_of.hpp> namespace boost { namespace compute { // forward declaration for transform_iterator template<class InputIterator, class UnaryFunction> class transform_iterator; namespace detail { // meta-function returning the value_type for a transform_iterator template<class InputIterator, class UnaryFunction> struct make_transform_iterator_value_type { typedef typename std::iterator_traits<InputIterator>::value_type value_type; typedef typename boost::compute::result_of<UnaryFunction(value_type)>::type type; }; // helper class which defines the iterator_adaptor super-class // type for transform_iterator template<class InputIterator, class UnaryFunction> class transform_iterator_base { public: typedef ::boost::iterator_adaptor< ::boost::compute::transform_iterator<InputIterator, UnaryFunction>, InputIterator, typename make_transform_iterator_value_type<InputIterator, UnaryFunction>::type, typename std::iterator_traits<InputIterator>::iterator_category, typename make_transform_iterator_value_type<InputIterator, UnaryFunction>::type > type; }; template<class InputIterator, class UnaryFunction, class IndexExpr> struct transform_iterator_index_expr { typedef typename make_transform_iterator_value_type< InputIterator, UnaryFunction >::type result_type; transform_iterator_index_expr(const InputIterator &input_iter, const UnaryFunction &transform_expr, const IndexExpr &index_expr) : m_input_iter(input_iter), m_transform_expr(transform_expr), m_index_expr(index_expr) { } const InputIterator m_input_iter; const UnaryFunction m_transform_expr; const IndexExpr m_index_expr; }; template<class InputIterator, class UnaryFunction, class IndexExpr> inline meta_kernel& operator<<(meta_kernel &kernel, const transform_iterator_index_expr<InputIterator, UnaryFunction, IndexExpr> &expr) { return kernel << expr.m_transform_expr(expr.m_input_iter[expr.m_index_expr]); } } // end detail namespace /// \class transform_iterator /// \brief A transform iterator adaptor. /// /// The transform_iterator adaptor applies a unary function to each element /// produced from the underlying iterator when dereferenced. /// /// For example, to copy from an input range to an output range while taking /// the absolute value of each element: /// /// \snippet test/test_transform_iterator.cpp copy_abs /// /// \see buffer_iterator, make_transform_iterator() template<class InputIterator, class UnaryFunction> class transform_iterator : public detail::transform_iterator_base<InputIterator, UnaryFunction>::type { public: typedef typename detail::transform_iterator_base<InputIterator, UnaryFunction>::type super_type; typedef typename super_type::value_type value_type; typedef typename super_type::reference reference; typedef typename super_type::base_type base_type; typedef typename super_type::difference_type difference_type; typedef UnaryFunction unary_function; transform_iterator(InputIterator iterator, UnaryFunction transform) : super_type(iterator), m_transform(transform) { } transform_iterator(const transform_iterator<InputIterator, UnaryFunction> &other) : super_type(other.base()), m_transform(other.m_transform) { } transform_iterator<InputIterator, UnaryFunction>& operator=(const transform_iterator<InputIterator, UnaryFunction> &other) { if(this != &other){ super_type::operator=(other); m_transform = other.m_transform; } return *this; } ~transform_iterator() { } size_t get_index() const { return super_type::base().get_index(); } const buffer& get_buffer() const { return detail::get_base_iterator_buffer(*this); } template<class IndexExpression> detail::transform_iterator_index_expr<InputIterator, UnaryFunction, IndexExpression> operator[](const IndexExpression &expr) const { return detail::transform_iterator_index_expr<InputIterator, UnaryFunction, IndexExpression>(super_type::base(), m_transform, expr); } private: friend class ::boost::iterator_core_access; reference dereference() const { const context &context = super_type::base().get_buffer().get_context(); command_queue queue(context, context.get_device()); detail::meta_kernel k("read"); size_t output_arg = k.add_arg<value_type *>(memory_object::global_memory, "output"); k << "*output = " << m_transform(super_type::base()[k.lit(0)]) << ";"; kernel kernel = k.compile(context); buffer output_buffer(context, sizeof(value_type)); kernel.set_arg(output_arg, output_buffer); queue.enqueue_task(kernel); return detail::read_single_value<value_type>(output_buffer, queue); } private: UnaryFunction m_transform; }; /// Returns a transform_iterator for \p iterator with \p transform. /// /// \param iterator the underlying iterator /// \param transform the unary transform function /// /// \return a \c transform_iterator for \p iterator with \p transform /// /// For example, to create an iterator which returns the square-root of each /// value in a \c vector<int>: /// \code /// auto sqrt_iterator = make_transform_iterator(vec.begin(), sqrt<int>()); /// \endcode template<class InputIterator, class UnaryFunction> inline transform_iterator<InputIterator, UnaryFunction> make_transform_iterator(InputIterator iterator, UnaryFunction transform) { return transform_iterator<InputIterator, UnaryFunction>(iterator, transform); } /// \internal_ (is_device_iterator specialization for transform_iterator) template<class InputIterator, class UnaryFunction> struct is_device_iterator< transform_iterator<InputIterator, UnaryFunction> > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_TRANSFORM_ITERATOR_HPP iterator/constant_buffer_iterator.hpp 0000644 00000012710 15125510617 0014205 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_CONSTANT_BUFFER_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_CONSTANT_BUFFER_ITERATOR_HPP #include <cstddef> #include <iterator> #include <boost/iterator/iterator_facade.hpp> #include <boost/compute/buffer.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { // forward declaration for constant_buffer_iterator<T> template<class T> class constant_buffer_iterator; namespace detail { // helper class which defines the iterator_facade super-class // type for constant_buffer_iterator<T> template<class T> class constant_buffer_iterator_base { public: typedef ::boost::iterator_facade< ::boost::compute::constant_buffer_iterator<T>, T, ::std::random_access_iterator_tag, ::boost::compute::detail::buffer_value<T> > type; }; } // end detail namespace /// \class constant_buffer_iterator /// \brief An iterator for a buffer in the \c constant memory space. /// /// The constant_buffer_iterator class provides an iterator for values in a /// buffer in the \c constant memory space. /// /// For iterating over values in the \c global memory space (the most common /// case), use the buffer_iterator class. /// /// \see buffer_iterator template<class T> class constant_buffer_iterator : public detail::constant_buffer_iterator_base<T>::type { public: typedef typename detail::constant_buffer_iterator_base<T>::type super_type; typedef typename super_type::reference reference; typedef typename super_type::difference_type difference_type; constant_buffer_iterator() : m_buffer(0), m_index(0) { } constant_buffer_iterator(const buffer &buffer, size_t index) : m_buffer(&buffer), m_index(index) { } constant_buffer_iterator(const constant_buffer_iterator<T> &other) : m_buffer(other.m_buffer), m_index(other.m_index) { } constant_buffer_iterator<T>& operator=(const constant_buffer_iterator<T> &other) { if(this != &other){ m_buffer = other.m_buffer; m_index = other.m_index; } return *this; } ~constant_buffer_iterator() { } const buffer& get_buffer() const { return *m_buffer; } size_t get_index() const { return m_index; } T read(command_queue &queue) const { BOOST_ASSERT(m_buffer && m_buffer->get()); BOOST_ASSERT(m_index < m_buffer->size() / sizeof(T)); return detail::read_single_value<T>(m_buffer, m_index, queue); } void write(const T &value, command_queue &queue) { BOOST_ASSERT(m_buffer && m_buffer->get()); BOOST_ASSERT(m_index < m_buffer->size() / sizeof(T)); detail::write_single_value<T>(m_buffer, m_index, queue); } template<class Expr> detail::buffer_iterator_index_expr<T, Expr> operator[](const Expr &expr) const { BOOST_ASSERT(m_buffer); BOOST_ASSERT(m_buffer->get()); return detail::buffer_iterator_index_expr<T, Expr>( *m_buffer, m_index, memory_object::constant_memory, expr ); } private: friend class ::boost::iterator_core_access; reference dereference() const { return detail::buffer_value<T>(*m_buffer, m_index); } bool equal(const constant_buffer_iterator<T> &other) const { return m_buffer == other.m_buffer && m_index == other.m_index; } void increment() { m_index++; } void decrement() { m_index--; } void advance(difference_type n) { m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n); } difference_type distance_to(const constant_buffer_iterator<T> &other) const { return static_cast<difference_type>(other.m_index - m_index); } private: const buffer *m_buffer; size_t m_index; }; /// Creates a new constant_buffer_iterator for \p buffer at \p index. /// /// \param buffer the \ref buffer object /// \param index the index in the buffer /// /// \return a \c constant_buffer_iterator for \p buffer at \p index template<class T> inline constant_buffer_iterator<T> make_constant_buffer_iterator(const buffer &buffer, size_t index = 0) { return constant_buffer_iterator<T>(buffer, index); } /// \internal_ (is_device_iterator specialization for constant_buffer_iterator) template<class T> struct is_device_iterator<constant_buffer_iterator<T> > : boost::true_type {}; namespace detail { // is_buffer_iterator specialization for constant_buffer_iterator template<class Iterator> struct is_buffer_iterator< Iterator, typename boost::enable_if< boost::is_same< constant_buffer_iterator<typename Iterator::value_type>, typename boost::remove_const<Iterator>::type > >::type > : public boost::true_type {}; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_CONSTANT_BUFFER_ITERATOR_HPP iterator/strided_iterator.hpp 0000644 00000022574 15125510617 0012472 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_STRIDED_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_STRIDED_ITERATOR_HPP #include <cstddef> #include <iterator> #include <boost/config.hpp> #include <boost/iterator/iterator_adaptor.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/is_buffer_iterator.hpp> #include <boost/compute/detail/read_write_single_value.hpp> #include <boost/compute/iterator/detail/get_base_iterator_buffer.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> #include <boost/compute/type_traits/result_of.hpp> namespace boost { namespace compute { // forward declaration for strided_iterator template<class Iterator> class strided_iterator; namespace detail { // helper class which defines the iterator_adaptor super-class // type for strided_iterator template<class Iterator> class strided_iterator_base { public: typedef ::boost::iterator_adaptor< ::boost::compute::strided_iterator<Iterator>, Iterator, typename std::iterator_traits<Iterator>::value_type, typename std::iterator_traits<Iterator>::iterator_category > type; }; // helper class for including stride value in index expression template<class IndexExpr, class Stride> struct stride_expr { stride_expr(const IndexExpr &expr, const Stride &stride) : m_index_expr(expr), m_stride(stride) { } const IndexExpr m_index_expr; const Stride m_stride; }; template<class IndexExpr, class Stride> inline stride_expr<IndexExpr, Stride> make_stride_expr(const IndexExpr &expr, const Stride &stride) { return stride_expr<IndexExpr, Stride>(expr, stride); } template<class IndexExpr, class Stride> inline meta_kernel& operator<<(meta_kernel &kernel, const stride_expr<IndexExpr, Stride> &expr) { // (expr.m_stride * (expr.m_index_expr)) return kernel << "(" << static_cast<ulong_>(expr.m_stride) << " * (" << expr.m_index_expr << "))"; } template<class Iterator, class Stride, class IndexExpr> struct strided_iterator_index_expr { typedef typename std::iterator_traits<Iterator>::value_type result_type; strided_iterator_index_expr(const Iterator &input_iter, const Stride &stride, const IndexExpr &index_expr) : m_input_iter(input_iter), m_stride(stride), m_index_expr(index_expr) { } const Iterator m_input_iter; const Stride m_stride; const IndexExpr m_index_expr; }; template<class Iterator, class Stride, class IndexExpr> inline meta_kernel& operator<<(meta_kernel &kernel, const strided_iterator_index_expr<Iterator, Stride, IndexExpr> &expr) { return kernel << expr.m_input_iter[make_stride_expr(expr.m_index_expr, expr.m_stride)]; } } // end detail namespace /// \class strided_iterator /// \brief An iterator adaptor with adjustable iteration step. /// /// The strided iterator adaptor skips over multiple elements each time /// it is incremented or decremented. /// /// \see buffer_iterator, make_strided_iterator(), make_strided_iterator_end() template<class Iterator> class strided_iterator : public detail::strided_iterator_base<Iterator>::type { public: typedef typename detail::strided_iterator_base<Iterator>::type super_type; typedef typename super_type::value_type value_type; typedef typename super_type::reference reference; typedef typename super_type::base_type base_type; typedef typename super_type::difference_type difference_type; strided_iterator(Iterator iterator, difference_type stride) : super_type(iterator), m_stride(static_cast<difference_type>(stride)) { // stride must be greater than zero BOOST_ASSERT_MSG(stride > 0, "Stride must be greater than zero"); } strided_iterator(const strided_iterator<Iterator> &other) : super_type(other.base()), m_stride(other.m_stride) { } strided_iterator<Iterator>& operator=(const strided_iterator<Iterator> &other) { if(this != &other){ super_type::operator=(other); m_stride = other.m_stride; } return *this; } ~strided_iterator() { } size_t get_index() const { return super_type::base().get_index(); } const buffer& get_buffer() const { return detail::get_base_iterator_buffer(*this); } template<class IndexExpression> detail::strided_iterator_index_expr<Iterator, difference_type, IndexExpression> operator[](const IndexExpression &expr) const { typedef typename detail::strided_iterator_index_expr<Iterator, difference_type, IndexExpression> StridedIndexExprType; return StridedIndexExprType(super_type::base(),m_stride, expr); } private: friend class ::boost::iterator_core_access; reference dereference() const { return reference(); } bool equal(const strided_iterator<Iterator> &other) const { return (other.m_stride == m_stride) && (other.base_reference() == this->base_reference()); } void increment() { std::advance(super_type::base_reference(), m_stride); } void decrement() { std::advance(super_type::base_reference(),-m_stride); } void advance(typename super_type::difference_type n) { std::advance(super_type::base_reference(), n * m_stride); } difference_type distance_to(const strided_iterator<Iterator> &other) const { return std::distance(this->base_reference(), other.base_reference()) / m_stride; } private: difference_type m_stride; }; /// Returns a strided_iterator for \p iterator with \p stride. /// /// \param iterator the underlying iterator /// \param stride the iteration step for strided_iterator /// /// \return a \c strided_iterator for \p iterator with \p stride. /// /// For example, to create an iterator which iterates over every other /// element in a \c vector<int>: /// \code /// auto strided_iterator = make_strided_iterator(vec.begin(), 2); /// \endcode template<class Iterator> inline strided_iterator<Iterator> make_strided_iterator(Iterator iterator, typename std::iterator_traits<Iterator>::difference_type stride) { return strided_iterator<Iterator>(iterator, stride); } /// Returns a strided_iterator which refers to element that would follow /// the last element accessible through strided_iterator for \p first iterator /// with \p stride. /// /// Parameter \p stride must be greater than zero. /// /// \param first the iterator referring to the first element accessible /// through strided_iterator for \p first with \p stride /// \param last the iterator referring to the last element that may be //// accessible through strided_iterator for \p first with \p stride /// \param stride the iteration step /// /// \return a \c strided_iterator referring to element that would follow /// the last element accessible through strided_iterator for \p first /// iterator with \p stride. /// /// It can be helpful when iterating over strided_iterator: /// \code /// // vec.size() may not be divisible by 3 /// auto strided_iterator_begin = make_strided_iterator(vec.begin(), 3); /// auto strided_iterator_end = make_strided_iterator_end(vec.begin(), vec.end(), 3); /// /// // copy every 3rd element to result /// boost::compute::copy( /// strided_iterator_begin, /// strided_iterator_end, /// result.begin(), /// queue /// ); /// \endcode template<class Iterator> strided_iterator<Iterator> make_strided_iterator_end(Iterator first, Iterator last, typename std::iterator_traits<Iterator>::difference_type stride) { typedef typename std::iterator_traits<Iterator>::difference_type difference_type; // calculate distance from end to the last element that would be // accessible through strided_iterator. difference_type range = std::distance(first, last); difference_type d = (range - 1) / stride; d *= stride; d -= range; // advance from end to the element that would follow the last // accessible element Iterator end_for_strided_iterator = last; std::advance(end_for_strided_iterator, d + stride); return strided_iterator<Iterator>(end_for_strided_iterator, stride); } /// \internal_ (is_device_iterator specialization for strided_iterator) template<class Iterator> struct is_device_iterator<strided_iterator<Iterator> > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_STRIDED_ITERATOR_HPP iterator/constant_iterator.hpp 0000644 00000010224 15125510617 0012652 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_CONSTANT_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_CONSTANT_ITERATOR_HPP #include <string> #include <cstddef> #include <iterator> #include <boost/config.hpp> #include <boost/iterator/iterator_facade.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { // forward declaration for constant_iterator<T> template<class T> class constant_iterator; namespace detail { // helper class which defines the iterator_facade super-class // type for constant_iterator<T> template<class T> class constant_iterator_base { public: typedef ::boost::iterator_facade< ::boost::compute::constant_iterator<T>, T, ::std::random_access_iterator_tag > type; }; } // end detail namespace /// \class constant_iterator /// \brief An iterator with a constant value. /// /// The constant_iterator class provides an iterator which returns a constant /// value when dereferenced. /// /// For example, this could be used to implement the fill() algorithm in terms /// of the copy() algorithm by copying from a range of constant iterators: /// /// \snippet test/test_constant_iterator.cpp fill_with_copy /// /// \see make_constant_iterator() template<class T> class constant_iterator : public detail::constant_iterator_base<T>::type { public: typedef typename detail::constant_iterator_base<T>::type super_type; typedef typename super_type::reference reference; typedef typename super_type::difference_type difference_type; constant_iterator(const T &value, size_t index = 0) : m_value(value), m_index(index) { } constant_iterator(const constant_iterator<T> &other) : m_value(other.m_value), m_index(other.m_index) { } constant_iterator<T>& operator=(const constant_iterator<T> &other) { if(this != &other){ m_value = other.m_value; m_index = other.m_index; } return *this; } ~constant_iterator() { } size_t get_index() const { return m_index; } /// \internal_ template<class Expr> detail::meta_kernel_literal<T> operator[](const Expr &expr) const { (void) expr; return detail::meta_kernel::make_lit<T>(m_value); } private: friend class ::boost::iterator_core_access; /// \internal_ reference dereference() const { return m_value; } /// \internal_ bool equal(const constant_iterator<T> &other) const { return m_value == other.m_value && m_index == other.m_index; } /// \internal_ void increment() { m_index++; } /// \internal_ void decrement() { m_index--; } /// \internal_ void advance(difference_type n) { m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n); } /// \internal_ difference_type distance_to(const constant_iterator<T> &other) const { return static_cast<difference_type>(other.m_index - m_index); } private: T m_value; size_t m_index; }; /// Returns a new constant_iterator with \p value at \p index. /// /// \param value the constant value /// \param index the iterators index /// /// \return a \c constant_iterator with \p value template<class T> inline constant_iterator<T> make_constant_iterator(const T &value, size_t index = 0) { return constant_iterator<T>(value, index); } /// \internal_ (is_device_iterator specialization for constant_iterator) template<class T> struct is_device_iterator<constant_iterator<T> > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_CONSTANT_ITERATOR_HPP iterator/permutation_iterator.hpp 0000644 00000014251 15125510617 0013374 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_PERMUTATION_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_PERMUTATION_ITERATOR_HPP #include <string> #include <cstddef> #include <iterator> #include <boost/config.hpp> #include <boost/iterator/iterator_adaptor.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/is_buffer_iterator.hpp> #include <boost/compute/detail/read_write_single_value.hpp> #include <boost/compute/iterator/detail/get_base_iterator_buffer.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { // forward declaration for transform_iterator template<class ElementIterator, class IndexIterator> class permutation_iterator; namespace detail { // helper class which defines the iterator_adaptor super-class // type for permutation_iterator template<class ElementIterator, class IndexIterator> class permutation_iterator_base { public: typedef ::boost::iterator_adaptor< ::boost::compute::permutation_iterator<ElementIterator, IndexIterator>, ElementIterator > type; }; template<class ElementIterator, class IndexIterator, class IndexExpr> struct permutation_iterator_access_expr { typedef typename std::iterator_traits<ElementIterator>::value_type result_type; permutation_iterator_access_expr(const ElementIterator &e, const IndexIterator &i, const IndexExpr &expr) : m_element_iter(e), m_index_iter(i), m_expr(expr) { } const ElementIterator m_element_iter; const IndexIterator m_index_iter; const IndexExpr m_expr; }; template<class ElementIterator, class IndexIterator, class IndexExpr> inline meta_kernel& operator<<(meta_kernel &kernel, const permutation_iterator_access_expr<ElementIterator, IndexIterator, IndexExpr> &expr) { return kernel << expr.m_element_iter[expr.m_index_iter[expr.m_expr]]; } } // end detail namespace /// \class permutation_iterator /// \brief The permutation_iterator class provides a permuation iterator /// /// A permutation iterator iterates over a value range and an index range. When /// dereferenced, it returns the value from the value range using the current /// index from the index range. /// /// For example, to reverse a range using the copy() algorithm and a permutation /// sequence: /// /// \snippet test/test_permutation_iterator.cpp reverse_range /// /// \see make_permutation_iterator() template<class ElementIterator, class IndexIterator> class permutation_iterator : public detail::permutation_iterator_base<ElementIterator, IndexIterator>::type { public: typedef typename detail::permutation_iterator_base<ElementIterator, IndexIterator>::type super_type; typedef typename super_type::value_type value_type; typedef typename super_type::reference reference; typedef typename super_type::base_type base_type; typedef typename super_type::difference_type difference_type; typedef IndexIterator index_iterator; permutation_iterator(ElementIterator e, IndexIterator i) : super_type(e), m_map(i) { } permutation_iterator(const permutation_iterator<ElementIterator, IndexIterator> &other) : super_type(other), m_map(other.m_map) { } permutation_iterator<ElementIterator, IndexIterator>& operator=(const permutation_iterator<ElementIterator, IndexIterator> &other) { if(this != &other){ super_type::operator=(other); m_map = other.m_map; } return *this; } ~permutation_iterator() { } size_t get_index() const { return super_type::base().get_index(); } const buffer& get_buffer() const { return detail::get_base_iterator_buffer(*this); } template<class IndexExpr> detail::permutation_iterator_access_expr<ElementIterator, IndexIterator, IndexExpr> operator[](const IndexExpr &expr) const { return detail::permutation_iterator_access_expr<ElementIterator, IndexIterator, IndexExpr>(super_type::base(), m_map, expr); } private: friend class ::boost::iterator_core_access; reference dereference() const { return reference(); } private: IndexIterator m_map; }; /// Returns a permutation_iterator for \p e using indices from \p i. /// /// \param e the element range iterator /// \param i the index range iterator /// /// \return a \c permutation_iterator for \p e using \p i template<class ElementIterator, class IndexIterator> inline permutation_iterator<ElementIterator, IndexIterator> make_permutation_iterator(ElementIterator e, IndexIterator i) { return permutation_iterator<ElementIterator, IndexIterator>(e, i); } /// \internal_ (is_device_iterator specialization for permutation_iterator) template<class ElementIterator, class IndexIterator> struct is_device_iterator< permutation_iterator<ElementIterator, IndexIterator> > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_PERMUTATION_ITERATOR_HPP iterator/detail/swizzle_iterator.hpp 0000644 00000013400 15125510617 0013771 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_DETAIL_SWIZZLE_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_DETAIL_SWIZZLE_ITERATOR_HPP #include <string> #include <cstddef> #include <iterator> #include <boost/config.hpp> #include <boost/iterator/iterator_adaptor.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/type_traits/make_vector_type.hpp> #include <boost/compute/detail/is_buffer_iterator.hpp> #include <boost/compute/detail/read_write_single_value.hpp> #include <boost/compute/iterator/detail/get_base_iterator_buffer.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { // forward declaration for swizzle_iterator template<class InputIterator, size_t Size> class swizzle_iterator; // meta-function returing the value_type for a swizzle_iterator template<class InputIterator, size_t Size> struct make_swizzle_iterator_value_type { typedef typename make_vector_type< typename scalar_type< typename std::iterator_traits<InputIterator>::value_type >::type, Size >::type type; }; // helper class which defines the iterator_adaptor super-class // type for swizzle_iterator template<class InputIterator, size_t Size> class swizzle_iterator_base { public: typedef ::boost::iterator_adaptor< swizzle_iterator<InputIterator, Size>, InputIterator, typename make_swizzle_iterator_value_type<InputIterator, Size>::type, typename std::iterator_traits<InputIterator>::iterator_category, typename make_swizzle_iterator_value_type<InputIterator, Size>::type > type; }; template<class InputIterator, size_t Size, class IndexExpr> struct swizzle_iterator_index_expr { typedef typename make_swizzle_iterator_value_type<InputIterator, Size>::type result_type; swizzle_iterator_index_expr(const InputIterator &input_iter, const IndexExpr &index_expr, const std::string &components) : m_input_iter(input_iter), m_index_expr(index_expr), m_components(components) { } InputIterator m_input_iter; IndexExpr m_index_expr; std::string m_components; }; template<class InputIterator, size_t Size, class IndexExpr> inline meta_kernel& operator<<(meta_kernel &kernel, const swizzle_iterator_index_expr<InputIterator, Size, IndexExpr> &expr) { return kernel << expr.m_input_iter[expr.m_index_expr] << "." << expr.m_components; } template<class InputIterator, size_t Size> class swizzle_iterator : public swizzle_iterator_base<InputIterator, Size>::type { public: typedef typename swizzle_iterator_base<InputIterator, Size>::type super_type; typedef typename super_type::value_type value_type; typedef typename super_type::reference reference; typedef typename super_type::base_type base_type; typedef typename super_type::difference_type difference_type; BOOST_STATIC_CONSTANT(size_t, vector_size = Size); swizzle_iterator(InputIterator iterator, const std::string &components) : super_type(iterator), m_components(components) { BOOST_ASSERT(components.size() == Size); } swizzle_iterator(const swizzle_iterator<InputIterator, Size> &other) : super_type(other.base()), m_components(other.m_components) { BOOST_ASSERT(m_components.size() == Size); } swizzle_iterator<InputIterator, Size>& operator=(const swizzle_iterator<InputIterator, Size> &other) { if(this != &other){ super_type::operator=(other); m_components = other.m_components; } return *this; } ~swizzle_iterator() { } size_t get_index() const { return super_type::base().get_index(); } const buffer& get_buffer() const { return get_base_iterator_buffer(*this); } template<class IndexExpression> swizzle_iterator_index_expr<InputIterator, Size, IndexExpression> operator[](const IndexExpression &expr) const { return swizzle_iterator_index_expr<InputIterator, Size, IndexExpression>(super_type::base(), expr, m_components); } private: friend class ::boost::iterator_core_access; reference dereference() const { return reference(); } private: std::string m_components; }; template<size_t Size, class InputIterator> inline swizzle_iterator<InputIterator, Size> make_swizzle_iterator(InputIterator iterator, const std::string &components) { return swizzle_iterator<InputIterator, Size>(iterator, components); } } // end detail namespace // is_device_iterator specialization for swizzle_iterator template<size_t Size, class InputIterator> struct is_device_iterator<detail::swizzle_iterator<InputIterator, Size> > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_SWIZZLE_ITERATOR_HPP iterator/detail/get_base_iterator_buffer.hpp 0000644 00000003212 15125510617 0015364 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_DETAIL_GET_BASE_ITERATOR_BUFFER_HPP #define BOOST_COMPUTE_ITERATOR_DETAIL_GET_BASE_ITERATOR_BUFFER_HPP namespace boost { namespace compute { namespace detail { // returns the buffer for an iterator adaptor's base iterator if // it exists, otherwise returns a null buffer object. template<class Iterator> inline const buffer& get_base_iterator_buffer(const Iterator &iter, typename boost::enable_if< is_buffer_iterator< typename Iterator::base_type > >::type* = 0) { return iter.base().get_buffer(); } template<class Iterator> inline const buffer& get_base_iterator_buffer(const Iterator &iter, typename boost::disable_if< is_buffer_iterator< typename Iterator::base_type > >::type* = 0) { (void) iter; static buffer null_buffer; return null_buffer; } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_DETAIL_GET_BASE_ITERATOR_BUFFER_HPP iterator/zip_iterator.hpp 0000644 00000024303 15125510617 0011626 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_ZIP_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_ZIP_ITERATOR_HPP #include <cstddef> #include <iterator> #include <boost/config.hpp> #include <boost/fusion/algorithm/iteration/for_each.hpp> #include <boost/iterator/iterator_facade.hpp> #include <boost/mpl/back_inserter.hpp> #include <boost/mpl/transform.hpp> #include <boost/mpl/vector.hpp> #include <boost/preprocessor/repetition.hpp> #include <boost/tuple/tuple.hpp> #include <boost/tuple/tuple_comparison.hpp> #include <boost/compute/config.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/mpl_vector_to_tuple.hpp> #include <boost/compute/types/tuple.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> #include <boost/compute/type_traits/type_name.hpp> namespace boost { namespace compute { // forward declaration for zip_iterator template<class IteratorTuple> class zip_iterator; namespace detail { namespace mpl = boost::mpl; // meta-function returning the value_type for an iterator template<class Iterator> struct make_iterator_value_type { typedef typename std::iterator_traits<Iterator>::value_type type; }; // meta-function returning the value_type for a zip_iterator template<class IteratorTuple> struct make_zip_iterator_value_type { typedef typename detail::mpl_vector_to_tuple< typename mpl::transform< IteratorTuple, make_iterator_value_type<mpl::_1>, mpl::back_inserter<mpl::vector<> > >::type >::type type; }; // helper class which defines the iterator_facade super-class // type for zip_iterator template<class IteratorTuple> class zip_iterator_base { public: typedef ::boost::iterator_facade< ::boost::compute::zip_iterator<IteratorTuple>, typename make_zip_iterator_value_type<IteratorTuple>::type, ::std::random_access_iterator_tag, typename make_zip_iterator_value_type<IteratorTuple>::type > type; }; template<class IteratorTuple, class IndexExpr> struct zip_iterator_index_expr { typedef typename make_zip_iterator_value_type<IteratorTuple>::type result_type; zip_iterator_index_expr(const IteratorTuple &iterators, const IndexExpr &index_expr) : m_iterators(iterators), m_index_expr(index_expr) { } const IteratorTuple m_iterators; const IndexExpr m_index_expr; }; /// \internal_ #define BOOST_COMPUTE_PRINT_ELEM(z, n, unused) \ BOOST_PP_EXPR_IF(n, << ", ") \ << boost::get<n>(expr.m_iterators)[expr.m_index_expr] /// \internal_ #define BOOST_COMPUTE_PRINT_ZIP_IDX(z, n, unused) \ template<BOOST_PP_ENUM_PARAMS(n, class Iterator), class IndexExpr> \ inline meta_kernel& operator<<( \ meta_kernel &kernel, \ const zip_iterator_index_expr< \ boost::tuple<BOOST_PP_ENUM_PARAMS(n, Iterator)>, \ IndexExpr \ > &expr) \ { \ typedef typename \ boost::tuple<BOOST_PP_ENUM_PARAMS(n, Iterator)> \ tuple_type; \ typedef typename \ make_zip_iterator_value_type<tuple_type>::type \ value_type; \ kernel.inject_type<value_type>(); \ return kernel \ << "(" << type_name<value_type>() << ")" \ << "{ " \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_PRINT_ELEM, ~) \ << "}"; \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_PRINT_ZIP_IDX, ~) #undef BOOST_COMPUTE_PRINT_ZIP_IDX #undef BOOST_COMPUTE_PRINT_ELEM struct iterator_advancer { iterator_advancer(size_t n) : m_distance(n) { } template<class Iterator> void operator()(Iterator &i) const { std::advance(i, m_distance); } size_t m_distance; }; template<class Iterator> void increment_iterator(Iterator &i) { i++; } template<class Iterator> void decrement_iterator(Iterator &i) { i--; } } // end detail namespace /// \class zip_iterator /// \brief A zip iterator adaptor. /// /// The zip_iterator class combines values from multiple input iterators. When /// dereferenced it returns a tuple containing each value at the current /// position in each input range. /// /// \see make_zip_iterator() template<class IteratorTuple> class zip_iterator : public detail::zip_iterator_base<IteratorTuple>::type { public: typedef typename detail::zip_iterator_base<IteratorTuple>::type super_type; typedef typename super_type::value_type value_type; typedef typename super_type::reference reference; typedef typename super_type::difference_type difference_type; typedef IteratorTuple iterator_tuple; zip_iterator(IteratorTuple iterators) : m_iterators(iterators) { } zip_iterator(const zip_iterator<IteratorTuple> &other) : m_iterators(other.m_iterators) { } zip_iterator<IteratorTuple>& operator=(const zip_iterator<IteratorTuple> &other) { if(this != &other){ super_type::operator=(other); m_iterators = other.m_iterators; } return *this; } ~zip_iterator() { } const IteratorTuple& get_iterator_tuple() const { return m_iterators; } template<class IndexExpression> detail::zip_iterator_index_expr<IteratorTuple, IndexExpression> operator[](const IndexExpression &expr) const { return detail::zip_iterator_index_expr<IteratorTuple, IndexExpression>(m_iterators, expr); } private: friend class ::boost::iterator_core_access; reference dereference() const { return reference(); } bool equal(const zip_iterator<IteratorTuple> &other) const { return m_iterators == other.m_iterators; } void increment() { boost::fusion::for_each(m_iterators, detail::increment_iterator); } void decrement() { boost::fusion::for_each(m_iterators, detail::decrement_iterator); } void advance(difference_type n) { boost::fusion::for_each(m_iterators, detail::iterator_advancer(n)); } difference_type distance_to(const zip_iterator<IteratorTuple> &other) const { return std::distance(boost::get<0>(m_iterators), boost::get<0>(other.m_iterators)); } private: IteratorTuple m_iterators; }; /// Creates a zip_iterator for \p iterators. /// /// \param iterators a tuple of input iterators to zip together /// /// \return a \c zip_iterator for \p iterators /// /// For example, to zip together iterators from three vectors (\c a, \c b, and /// \p c): /// \code /// auto zipped = boost::compute::make_zip_iterator( /// boost::make_tuple(a.begin(), b.begin(), c.begin()) /// ); /// \endcode template<class IteratorTuple> inline zip_iterator<IteratorTuple> make_zip_iterator(IteratorTuple iterators) { return zip_iterator<IteratorTuple>(iterators); } /// \internal_ (is_device_iterator specialization for zip_iterator) template<class IteratorTuple> struct is_device_iterator<zip_iterator<IteratorTuple> > : boost::true_type {}; namespace detail { // get<N>() specialization for zip_iterator /// \internal_ #define BOOST_COMPUTE_ZIP_GET_N(z, n, unused) \ template<size_t N, class IteratorTuple, class IndexExpr, \ BOOST_PP_ENUM_PARAMS(n, class T)> \ inline meta_kernel& \ operator<<(meta_kernel &kernel, \ const invoked_get< \ N, \ zip_iterator_index_expr<IteratorTuple, IndexExpr>, \ boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> \ > &expr) \ { \ typedef typename boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> Tuple; \ typedef typename boost::tuples::element<N, Tuple>::type T; \ BOOST_STATIC_ASSERT(N < size_t(boost::tuples::length<Tuple>::value)); \ kernel.inject_type<T>(); \ return kernel \ << boost::get<N>(expr.m_arg.m_iterators)[expr.m_arg.m_index_expr]; \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_ZIP_GET_N, ~) #undef BOOST_COMPUTE_ZIP_GET_N } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_ZIP_ITERATOR_HPP iterator/buffer_iterator.hpp 0000644 00000020116 15125510617 0012273 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_BUFFER_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_BUFFER_ITERATOR_HPP #include <cstddef> #include <iterator> #include <boost/config.hpp> #include <boost/type_traits.hpp> #include <boost/static_assert.hpp> #include <boost/utility/enable_if.hpp> #include <boost/iterator/iterator_facade.hpp> #include <boost/compute/buffer.hpp> #include <boost/compute/detail/buffer_value.hpp> #include <boost/compute/detail/is_buffer_iterator.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/read_write_single_value.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { // forward declaration for buffer_iterator<T> template<class T> class buffer_iterator; namespace detail { // helper class which defines the iterator_facade super-class // type for buffer_iterator<T> template<class T> class buffer_iterator_base { public: typedef ::boost::iterator_facade< ::boost::compute::buffer_iterator<T>, T, ::std::random_access_iterator_tag, ::boost::compute::detail::buffer_value<T> > type; }; template<class T, class IndexExpr> struct buffer_iterator_index_expr { typedef T result_type; buffer_iterator_index_expr(const buffer &buffer, size_t index, const memory_object::address_space address_space, const IndexExpr &expr) : m_buffer(buffer.get(), false), m_index(index), m_address_space(address_space), m_expr(expr) { } buffer_iterator_index_expr(const buffer_iterator_index_expr& other) : m_buffer(other.m_buffer.get(), false), m_index(other.m_index), m_address_space(other.m_address_space), m_expr(other.m_expr) { } ~buffer_iterator_index_expr() { // set buffer to null so that its reference count will // not be decremented when its destructor is called m_buffer.get() = 0; } operator T() const { BOOST_STATIC_ASSERT_MSG(boost::is_integral<IndexExpr>::value, "Index expression must be integral"); return buffer_value<T>(m_buffer, size_t(m_expr) * sizeof(T)); } const buffer m_buffer; const size_t m_index; const memory_object::address_space m_address_space; const IndexExpr m_expr; }; template<class T, class IndexExpr> inline meta_kernel& operator<<(meta_kernel &kernel, const buffer_iterator_index_expr<T, IndexExpr> &expr) { if(expr.m_index == 0){ return kernel << kernel.get_buffer_identifier<T>(expr.m_buffer, expr.m_address_space) << '[' << expr.m_expr << ']'; } else { return kernel << kernel.get_buffer_identifier<T>(expr.m_buffer, expr.m_address_space) << '[' << uint_(expr.m_index) << "+(" << expr.m_expr << ")]"; } } } // end detail namespace /// \class buffer_iterator /// \brief An iterator for values in a buffer. /// /// The buffer_iterator class iterates over values in a memory buffer on a /// compute device. It is the most commonly used iterator in Boost.Compute /// and is used by the \ref vector "vector<T>" and \ref array "array<T, N>" /// container classes. /// /// Buffer iterators store a reference to a memory buffer along with an index /// into that memory buffer. /// /// The buffer_iterator class allows for arbitrary OpenCL memory objects /// (including those created outside of Boost.Compute) to be used with the /// Boost.Compute algorithms (such as transform() and sort()). For example, /// to reverse the contents of an OpenCL memory buffer containing a set of /// integers: /// /// \snippet test/test_buffer_iterator.cpp reverse_external_buffer /// /// \see buffer, make_buffer_iterator() template<class T> class buffer_iterator : public detail::buffer_iterator_base<T>::type { public: typedef typename detail::buffer_iterator_base<T>::type super_type; typedef typename super_type::reference reference; typedef typename super_type::difference_type difference_type; buffer_iterator() : m_index(0) { } buffer_iterator(const buffer &buffer, size_t index) : m_buffer(buffer.get(), false), m_index(index) { } buffer_iterator(const buffer_iterator<T> &other) : m_buffer(other.m_buffer.get(), false), m_index(other.m_index) { } buffer_iterator<T>& operator=(const buffer_iterator<T> &other) { if(this != &other){ m_buffer.get() = other.m_buffer.get(); m_index = other.m_index; } return *this; } ~buffer_iterator() { // set buffer to null so that its reference count will // not be decremented when its destructor is called m_buffer.get() = 0; } const buffer& get_buffer() const { return m_buffer; } size_t get_index() const { return m_index; } T read(command_queue &queue) const { BOOST_ASSERT(m_buffer.get()); BOOST_ASSERT(m_index < m_buffer.size() / sizeof(T)); return detail::read_single_value<T>(m_buffer, m_index, queue); } void write(const T &value, command_queue &queue) { BOOST_ASSERT(m_buffer.get()); BOOST_ASSERT(m_index < m_buffer.size() / sizeof(T)); detail::write_single_value<T>(value, m_buffer, m_index, queue); } /// \internal_ template<class Expr> detail::buffer_iterator_index_expr<T, Expr> operator[](const Expr &expr) const { BOOST_ASSERT(m_buffer.get()); return detail::buffer_iterator_index_expr<T, Expr>( m_buffer, m_index, memory_object::global_memory, expr ); } private: friend class ::boost::iterator_core_access; /// \internal_ reference dereference() const { return detail::buffer_value<T>(m_buffer, m_index * sizeof(T)); } /// \internal_ bool equal(const buffer_iterator<T> &other) const { return m_buffer.get() == other.m_buffer.get() && m_index == other.m_index; } /// \internal_ void increment() { m_index++; } /// \internal_ void decrement() { m_index--; } /// \internal_ void advance(difference_type n) { m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n); } /// \internal_ difference_type distance_to(const buffer_iterator<T> &other) const { return static_cast<difference_type>(other.m_index - m_index); } private: const buffer m_buffer; size_t m_index; }; /// Creates a new \ref buffer_iterator for \p buffer at \p index. /// /// \param buffer the \ref buffer object /// \param index the index in the buffer /// /// \return a \c buffer_iterator for \p buffer at \p index template<class T> inline buffer_iterator<T> make_buffer_iterator(const buffer &buffer, size_t index = 0) { return buffer_iterator<T>(buffer, index); } /// \internal_ (is_device_iterator specialization for buffer_iterator) template<class T> struct is_device_iterator<buffer_iterator<T> > : boost::true_type {}; namespace detail { // is_buffer_iterator specialization for buffer_iterator template<class Iterator> struct is_buffer_iterator< Iterator, typename boost::enable_if< boost::is_same< buffer_iterator<typename Iterator::value_type>, typename boost::remove_const<Iterator>::type > >::type > : public boost::true_type {}; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_BUFFER_ITERATOR_HPP iterator/discard_iterator.hpp 0000644 00000007504 15125510617 0012441 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_DISCARD_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_DISCARD_ITERATOR_HPP #include <string> #include <cstddef> #include <iterator> #include <boost/config.hpp> #include <boost/iterator/iterator_facade.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { // forward declaration for discard_iterator class discard_iterator; namespace detail { // helper class which defines the iterator_facade super-class // type for discard_iterator struct discard_iterator_base { typedef ::boost::iterator_facade< ::boost::compute::discard_iterator, void, ::std::random_access_iterator_tag, void * > type; }; template<class IndexExpr> struct discard_iterator_index_expr { typedef void result_type; discard_iterator_index_expr(const IndexExpr &expr) : m_expr(expr) { } IndexExpr m_expr; }; template<class IndexExpr> inline meta_kernel& operator<<(meta_kernel &kernel, const discard_iterator_index_expr<IndexExpr> &expr) { (void) expr; return kernel; } } // end detail namespace /// \class discard_iterator /// \brief An iterator which discards all values written to it. /// /// \see make_discard_iterator(), constant_iterator class discard_iterator : public detail::discard_iterator_base::type { public: typedef detail::discard_iterator_base::type super_type; typedef super_type::reference reference; typedef super_type::difference_type difference_type; discard_iterator(size_t index = 0) : m_index(index) { } discard_iterator(const discard_iterator &other) : m_index(other.m_index) { } discard_iterator& operator=(const discard_iterator &other) { if(this != &other){ m_index = other.m_index; } return *this; } ~discard_iterator() { } /// \internal_ template<class Expr> detail::discard_iterator_index_expr<Expr> operator[](const Expr &expr) const { return detail::discard_iterator_index_expr<Expr>(expr); } private: friend class ::boost::iterator_core_access; /// \internal_ reference dereference() const { return 0; } /// \internal_ bool equal(const discard_iterator &other) const { return m_index == other.m_index; } /// \internal_ void increment() { m_index++; } /// \internal_ void decrement() { m_index--; } /// \internal_ void advance(difference_type n) { m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n); } /// \internal_ difference_type distance_to(const discard_iterator &other) const { return static_cast<difference_type>(other.m_index - m_index); } private: size_t m_index; }; /// Returns a new discard_iterator with \p index. /// /// \param index the index of the iterator /// /// \return a \c discard_iterator at \p index inline discard_iterator make_discard_iterator(size_t index = 0) { return discard_iterator(index); } /// internal_ (is_device_iterator specialization for discard_iterator) template<> struct is_device_iterator<discard_iterator> : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_DISCARD_ITERATOR_HPP iterator/counting_iterator.hpp 0000644 00000010772 15125510617 0012657 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_COUNTING_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_COUNTING_ITERATOR_HPP #include <string> #include <cstddef> #include <iterator> #include <boost/config.hpp> #include <boost/iterator/iterator_facade.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { // forward declaration for counting_iterator<T> template<class T> class counting_iterator; namespace detail { // helper class which defines the iterator_facade super-class // type for counting_iterator<T> template<class T> class counting_iterator_base { public: typedef ::boost::iterator_facade< ::boost::compute::counting_iterator<T>, T, ::std::random_access_iterator_tag > type; }; template<class T, class IndexExpr> struct counting_iterator_index_expr { typedef T result_type; counting_iterator_index_expr(const T init, const IndexExpr &expr) : m_init(init), m_expr(expr) { } const T m_init; const IndexExpr m_expr; }; template<class T, class IndexExpr> inline meta_kernel& operator<<(meta_kernel &kernel, const counting_iterator_index_expr<T, IndexExpr> &expr) { return kernel << '(' << expr.m_init << '+' << expr.m_expr << ')'; } } // end detail namespace /// \class counting_iterator /// \brief The counting_iterator class implements a counting iterator. /// /// A counting iterator returns an internal value (initialized with \p init) /// which is incremented each time the iterator is incremented. /// /// For example, this could be used to implement the iota() algorithm in terms /// of the copy() algorithm by copying from a range of counting iterators: /// /// \snippet test/test_counting_iterator.cpp iota_with_copy /// /// \see make_counting_iterator() template<class T> class counting_iterator : public detail::counting_iterator_base<T>::type { public: typedef typename detail::counting_iterator_base<T>::type super_type; typedef typename super_type::reference reference; typedef typename super_type::difference_type difference_type; counting_iterator(const T &init) : m_init(init) { } counting_iterator(const counting_iterator<T> &other) : m_init(other.m_init) { } counting_iterator<T>& operator=(const counting_iterator<T> &other) { if(this != &other){ m_init = other.m_init; } return *this; } ~counting_iterator() { } size_t get_index() const { return 0; } template<class Expr> detail::counting_iterator_index_expr<T, Expr> operator[](const Expr &expr) const { return detail::counting_iterator_index_expr<T, Expr>(m_init, expr); } private: friend class ::boost::iterator_core_access; reference dereference() const { return m_init; } bool equal(const counting_iterator<T> &other) const { return m_init == other.m_init; } void increment() { m_init++; } void decrement() { m_init--; } void advance(difference_type n) { m_init += static_cast<T>(n); } difference_type distance_to(const counting_iterator<T> &other) const { return difference_type(other.m_init) - difference_type(m_init); } private: T m_init; }; /// Returns a new counting_iterator starting at \p init. /// /// \param init the initial value /// /// \return a counting_iterator with \p init. /// /// For example, to create a counting iterator which returns unsigned integers /// and increments from one: /// \code /// auto iter = make_counting_iterator<uint_>(1); /// \endcode template<class T> inline counting_iterator<T> make_counting_iterator(const T &init) { return counting_iterator<T>(init); } /// \internal_ (is_device_iterator specialization for counting_iterator) template<class T> struct is_device_iterator<counting_iterator<T> > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_COUNTING_ITERATOR_HPP image2d.hpp 0000644 00000001045 15125510617 0006570 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // deprecated, use <boost/compute/image/image2d.hpp> instead #include <boost/compute/image/image2d.hpp> cl_ext.hpp 0000644 00000001230 15125510617 0006532 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CL_EXT_HPP #define BOOST_COMPUTE_CL_EXT_HPP #include "detail/cl_versions.hpp" #if defined(__APPLE__) #include <OpenCL/cl_ext.h> #else #include <CL/cl_ext.h> #endif #endif // BOOST_COMPUTE_CL_EXT_HPP platform.hpp 0000644 00000016374 15125510617 0007117 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_PLATFORM_HPP #define BOOST_COMPUTE_PLATFORM_HPP #include <algorithm> #include <string> #include <vector> #include <boost/algorithm/string/split.hpp> #include <boost/algorithm/string/classification.hpp> #include <boost/compute/cl.hpp> #include <boost/compute/device.hpp> #include <boost/compute/detail/get_object_info.hpp> namespace boost { namespace compute { /// \class platform /// \brief A compute platform. /// /// The platform class provides an interface to an OpenCL platform. /// /// To obtain a list of all platforms on the system use the /// system::platforms() method. /// /// \see device, context class platform { public: /// Creates a new platform object for \p id. explicit platform(cl_platform_id id) : m_platform(id) { } /// Creates a new platform as a copy of \p other. platform(const platform &other) : m_platform(other.m_platform) { } /// Copies the platform id from \p other. platform& operator=(const platform &other) { if(this != &other){ m_platform = other.m_platform; } return *this; } /// Destroys the platform object. ~platform() { } /// Returns the ID of the platform. cl_platform_id id() const { return m_platform; } /// Returns the name of the platform. std::string name() const { return get_info<std::string>(CL_PLATFORM_NAME); } /// Returns the name of the vendor for the platform. std::string vendor() const { return get_info<std::string>(CL_PLATFORM_VENDOR); } /// Returns the profile string for the platform. std::string profile() const { return get_info<std::string>(CL_PLATFORM_PROFILE); } /// Returns the version string for the platform. std::string version() const { return get_info<std::string>(CL_PLATFORM_VERSION); } /// Returns a list of extensions supported by the platform. std::vector<std::string> extensions() const { std::string extensions_string = get_info<std::string>(CL_PLATFORM_EXTENSIONS); std::vector<std::string> extensions_vector; boost::split(extensions_vector, extensions_string, boost::is_any_of("\t "), boost::token_compress_on); return extensions_vector; } /// Returns \c true if the platform supports the extension with /// \p name. bool supports_extension(const std::string &name) const { const std::vector<std::string> extensions = this->extensions(); return std::find( extensions.begin(), extensions.end(), name) != extensions.end(); } /// Returns a list of devices on the platform. std::vector<device> devices(cl_device_type type = CL_DEVICE_TYPE_ALL) const { size_t count = device_count(type); if(count == 0){ // no devices for this platform return std::vector<device>(); } std::vector<cl_device_id> device_ids(count); cl_int ret = clGetDeviceIDs(m_platform, type, static_cast<cl_uint>(count), &device_ids[0], 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } std::vector<device> devices; for(cl_uint i = 0; i < count; i++){ devices.push_back(device(device_ids[i])); } return devices; } /// Returns the number of devices on the platform. size_t device_count(cl_device_type type = CL_DEVICE_TYPE_ALL) const { cl_uint count = 0; cl_int ret = clGetDeviceIDs(m_platform, type, 0, 0, &count); if(ret != CL_SUCCESS){ if(ret == CL_DEVICE_NOT_FOUND){ // no devices for this platform return 0; } else { // something else went wrong BOOST_THROW_EXCEPTION(opencl_error(ret)); } } return count; } /// Returns information about the platform. /// /// \see_opencl_ref{clGetPlatformInfo} template<class T> T get_info(cl_platform_info info) const { return detail::get_object_info<T>(clGetPlatformInfo, m_platform, info); } /// \overload template<int Enum> typename detail::get_object_info_type<platform, Enum>::type get_info() const; /// Returns the address of the \p function_name extension /// function. Returns \c 0 if \p function_name is invalid. void* get_extension_function_address(const char *function_name) const { #ifdef BOOST_COMPUTE_CL_VERSION_1_2 return clGetExtensionFunctionAddressForPlatform(m_platform, function_name); #else return clGetExtensionFunctionAddress(function_name); #endif } /// Requests that the platform unload any compiler resources. void unload_compiler() { #ifdef BOOST_COMPUTE_CL_VERSION_1_2 clUnloadPlatformCompiler(m_platform); #else clUnloadCompiler(); #endif } /// Returns \c true if the platform is the same at \p other. bool operator==(const platform &other) const { return m_platform == other.m_platform; } /// Returns \c true if the platform is different from \p other. bool operator!=(const platform &other) const { return m_platform != other.m_platform; } /// Returns \c true if the platform OpenCL version is major.minor /// or newer; otherwise returns \c false. bool check_version(int major, int minor) const { std::stringstream stream; stream << version(); int actual_major, actual_minor; stream.ignore(7); // 'OpenCL ' stream >> actual_major; stream.ignore(1); // '.' stream >> actual_minor; return actual_major > major || (actual_major == major && actual_minor >= minor); } private: cl_platform_id m_platform; }; /// \internal_ define get_info() specializations for platform BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(platform, ((std::string, CL_PLATFORM_PROFILE)) ((std::string, CL_PLATFORM_VERSION)) ((std::string, CL_PLATFORM_NAME)) ((std::string, CL_PLATFORM_VENDOR)) ((std::string, CL_PLATFORM_EXTENSIONS)) ) #ifdef BOOST_COMPUTE_CL_VERSION_2_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(platform, ((cl_ulong, CL_PLATFORM_HOST_TIMER_RESOLUTION)) ) #endif // BOOST_COMPUTE_CL_VERSION_2_1 inline boost::compute::platform device::platform() const { return boost::compute::platform(get_info<CL_DEVICE_PLATFORM>()); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_PLATFORM_HPP random.hpp 0000644 00000002174 15125510617 0006544 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_HPP #define BOOST_COMPUTE_RANDOM_HPP /// \file /// /// Meta-header to include all Boost.Compute random headers. #include <boost/compute/random/bernoulli_distribution.hpp> #include <boost/compute/random/default_random_engine.hpp> #include <boost/compute/random/discrete_distribution.hpp> #include <boost/compute/random/linear_congruential_engine.hpp> #include <boost/compute/random/mersenne_twister_engine.hpp> #include <boost/compute/random/threefry_engine.hpp> #include <boost/compute/random/normal_distribution.hpp> #include <boost/compute/random/uniform_int_distribution.hpp> #include <boost/compute/random/uniform_real_distribution.hpp> #endif // BOOST_COMPUTE_RANDOM_HPP algorithm/transform_reduce.hpp 0000644 00000007143 15125510617 0012615 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_TRANSFORM_REDUCE_HPP #define BOOST_COMPUTE_ALGORITHM_TRANSFORM_REDUCE_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/algorithm/reduce.hpp> #include <boost/compute/iterator/transform_iterator.hpp> #include <boost/compute/iterator/zip_iterator.hpp> #include <boost/compute/functional/detail/unpack.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Transforms each value in the range [\p first, \p last) with the unary /// \p transform_function and then reduces each transformed value with /// \p reduce_function. /// /// For example, to calculate the sum of the absolute values of a vector /// of integers: /// /// \snippet test/test_transform_reduce.cpp sum_abs_int /// /// Space complexity on GPUs: \Omega(n)<br> /// Space complexity on CPUs: \Omega(1) /// /// \see reduce(), inner_product() template<class InputIterator, class OutputIterator, class UnaryTransformFunction, class BinaryReduceFunction> inline void transform_reduce(InputIterator first, InputIterator last, OutputIterator result, UnaryTransformFunction transform_function, BinaryReduceFunction reduce_function, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); ::boost::compute::reduce( ::boost::compute::make_transform_iterator(first, transform_function), ::boost::compute::make_transform_iterator(last, transform_function), result, reduce_function, queue ); } /// \overload template<class InputIterator1, class InputIterator2, class OutputIterator, class BinaryTransformFunction, class BinaryReduceFunction> inline void transform_reduce(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryTransformFunction transform_function, BinaryReduceFunction reduce_function, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<InputIterator1>::difference_type difference_type; difference_type n = std::distance(first1, last1); ::boost::compute::transform_reduce( ::boost::compute::make_zip_iterator( boost::make_tuple(first1, first2) ), ::boost::compute::make_zip_iterator( boost::make_tuple(last1, first2 + n) ), result, detail::unpack(transform_function), reduce_function, queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_TRANSFORM_REDUCE_HPP algorithm/count_if.hpp 0000644 00000004524 15125510617 0011061 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_COUNT_IF_HPP #define BOOST_COMPUTE_ALGORITHM_COUNT_IF_HPP #include <boost/static_assert.hpp> #include <boost/compute/device.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/detail/count_if_with_ballot.hpp> #include <boost/compute/algorithm/detail/count_if_with_reduce.hpp> #include <boost/compute/algorithm/detail/count_if_with_threads.hpp> #include <boost/compute/algorithm/detail/serial_count_if.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns the number of elements in the range [\p first, \p last) /// for which \p predicate returns \c true. /// /// Space complexity on CPUs: \Omega(1)<br> /// Space complexity on GPUs: \Omega(n) template<class InputIterator, class Predicate> inline size_t count_if(InputIterator first, InputIterator last, Predicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); const device &device = queue.get_device(); size_t input_size = detail::iterator_range_size(first, last); if(input_size == 0){ return 0; } if(device.type() & device::cpu){ if(input_size < 1024){ return detail::serial_count_if(first, last, predicate, queue); } else { return detail::count_if_with_threads(first, last, predicate, queue); } } else { if(input_size < 32){ return detail::serial_count_if(first, last, predicate, queue); } else { return detail::count_if_with_reduce(first, last, predicate, queue); } } } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_COUNT_IF_HPP algorithm/includes.hpp 0000644 00000012757 15125510617 0011070 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_INCLUDES_HPP #define BOOST_COMPUTE_ALGORITHM_INCLUDES_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/algorithm/detail/balanced_path.hpp> #include <boost/compute/algorithm/fill_n.hpp> #include <boost/compute/algorithm/find.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/read_write_single_value.hpp> #include <boost/compute/system.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { /// /// \brief Serial includes kernel class /// /// Subclass of meta_kernel to perform includes operation after tiling /// class serial_includes_kernel : meta_kernel { public: serial_includes_kernel() : meta_kernel("includes") { } template<class InputIterator1, class InputIterator2, class InputIterator3, class InputIterator4, class OutputIterator> void set_range(InputIterator1 first1, InputIterator2 first2, InputIterator3 tile_first1, InputIterator3 tile_last1, InputIterator4 tile_first2, OutputIterator result) { m_count = iterator_range_size(tile_first1, tile_last1) - 1; *this << "uint i = get_global_id(0);\n" << "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" << "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" << "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" << "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" << "uint includes = 1;\n" << "while(start1<end1 && start2<end2)\n" << "{\n" << " if(" << first1[expr<uint_>("start1")] << " == " << first2[expr<uint_>("start2")] << ")\n" << " {\n" << " start1++; start2++;\n" << " }\n" << " else if(" << first1[expr<uint_>("start1")] << " < " << first2[expr<uint_>("start2")] << ")\n" << " start1++;\n" << " else\n" << " {\n" << " includes = 0;\n" << " break;\n" << " }\n" << "}\n" << "if(start2<end2)\n" << " includes = 0;\n" << result[expr<uint_>("i")] << " = includes;\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; } //end detail namespace /// /// \brief Includes algorithm /// /// Finds if the sorted range [first1, last1) includes the sorted /// range [first2, last2). In other words, it checks if [first1, last1) is /// a superset of [first2, last2). /// /// \return True, if [first1, last1) includes [first2, last2). False otherwise. /// /// \param first1 Iterator pointing to start of first set /// \param last1 Iterator pointing to end of first set /// \param first2 Iterator pointing to start of second set /// \param last2 Iterator pointing to end of second set /// \param queue Queue on which to execute /// /// Space complexity: \Omega(distance(\p first1, \p last1) + distance(\p first2, \p last2)) template<class InputIterator1, class InputIterator2> inline bool includes(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); size_t tile_size = 1024; size_t count1 = detail::iterator_range_size(first1, last1); size_t count2 = detail::iterator_range_size(first2, last2); vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::balanced_path_kernel tiling_kernel; tiling_kernel.tile_size = static_cast<unsigned int>(tile_size); tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, uint_(0), queue); fill_n(tile_b.begin(), 1, uint_(0), queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, static_cast<uint_>(count1), queue); fill_n(tile_b.end()-1, 1, static_cast<uint_>(count2), queue); vector<uint_> result((count1+count2+tile_size-1)/tile_size, queue.get_context()); // Find individually detail::serial_includes_kernel includes_kernel; includes_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), result.begin()); includes_kernel.exec(queue); return find(result.begin(), result.end(), 0, queue) == result.end(); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP algorithm/stable_partition.hpp 0000644 00000005261 15125510617 0012615 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_STABLE_PARTITION_HPP #define BOOST_COMPUTE_ALGORITHM_STABLE_PARTITION_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/context.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy_if.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// /// \brief Partitioning algorithm /// /// Partitions the elements in the range [\p first, \p last) according to /// \p predicate. The order of the elements is preserved. /// \return Iterator pointing to end of true values /// /// \param first Iterator pointing to start of range /// \param last Iterator pointing to end of range /// \param predicate Unary predicate to be applied on each element /// \param queue Queue on which to execute /// /// Space complexity: \Omega(3n) /// /// \see is_partitioned() and partition() /// template<class Iterator, class UnaryPredicate> inline Iterator stable_partition(Iterator first, Iterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<Iterator>::value); typedef typename std::iterator_traits<Iterator>::value_type value_type; // make temporary copy of the input ::boost::compute::vector<value_type> tmp(first, last, queue); // copy true values Iterator last_true = ::boost::compute::copy_if(tmp.begin(), tmp.end(), first, predicate, queue); // copy false values Iterator last_false = ::boost::compute::copy_if(tmp.begin(), tmp.end(), last_true, not1(predicate), queue); // return iterator pointing to the last true value return last_true; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_STABLE_PARTITION_HPP algorithm/inclusive_scan.hpp 0000644 00000006512 15125510617 0012257 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_INCLUSIVE_SCAN_HPP #define BOOST_COMPUTE_ALGORITHM_INCLUSIVE_SCAN_HPP #include <boost/static_assert.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/detail/scan.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Performs an inclusive scan of the elements in the range [\p first, \p last) /// and stores the results in the range beginning at \p result. /// /// Each element in the output is assigned to the sum of the current value in /// the input with the sum of every previous value in the input. /// /// \param first first element in the range to scan /// \param last last element in the range to scan /// \param result first element in the result range /// \param binary_op associative binary operator /// \param queue command queue to perform the operation /// /// \return \c OutputIterator to the end of the result range /// /// The default operation is to add the elements up. /// /// \snippet test/test_scan.cpp inclusive_scan_int /// /// But different associative operation can be specified as \p binary_op /// instead (e.g., multiplication, maximum, minimum). /// /// \snippet test/test_scan.cpp inclusive_scan_int_multiplies /// /// Space complexity on GPUs: \Omega(n)<br> /// Space complexity on GPUs when \p first == \p result: \Omega(2n)<br> /// Space complexity on CPUs: \Omega(1) /// /// \see exclusive_scan() template<class InputIterator, class OutputIterator, class BinaryOperator> inline OutputIterator inclusive_scan(InputIterator first, InputIterator last, OutputIterator result, BinaryOperator binary_op, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<OutputIterator>::value_type output_type; return detail::scan(first, last, result, false, output_type(0), binary_op, queue); } /// \overload template<class InputIterator, class OutputIterator> inline OutputIterator inclusive_scan(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<OutputIterator>::value_type output_type; return detail::scan(first, last, result, false, output_type(0), boost::compute::plus<output_type>(), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_INCLUSIVE_SCAN_HPP algorithm/any_of.hpp 0000644 00000003010 15125510617 0010513 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_ANY_OF_HPP #define BOOST_COMPUTE_ALGORITHM_ANY_OF_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/algorithm/find_if.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns \c true if \p predicate returns \c true for any of the elements in /// the range [\p first, \p last). /// /// For example, to test if a vector contains any negative values: /// /// \snippet test/test_any_all_none_of.cpp any_of /// /// Space complexity: \Omega(1) /// /// \see all_of(), none_of() template<class InputIterator, class UnaryPredicate> inline bool any_of(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); return ::boost::compute::find_if(first, last, predicate, queue) != last; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_ANY_OF_HPP algorithm/minmax_element.hpp 0000644 00000005154 15125510617 0012255 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_MINMAX_ELEMENT_HPP #define BOOST_COMPUTE_ALGORITHM_MINMAX_ELEMENT_HPP #include <utility> #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/max_element.hpp> #include <boost/compute/algorithm/min_element.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns a pair of iterators with the first pointing to the minimum /// element and the second pointing to the maximum element in the range /// [\p first, \p last). /// /// \param first first element in the input range /// \param last last element in the input range /// \param compare comparison function object which returns true if the first /// argument is less than (i.e. is ordered before) the second. /// \param queue command queue to perform the operation /// /// Space complexity on CPUs: \Omega(1)<br> /// Space complexity on GPUs: \Omega(N) /// /// \see max_element(), min_element() template<class InputIterator, class Compare> inline std::pair<InputIterator, InputIterator> minmax_element(InputIterator first, InputIterator last, Compare compare, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); if(first == last){ // empty range return std::make_pair(first, first); } return std::make_pair(min_element(first, last, compare, queue), max_element(first, last, compare, queue)); } ///\overload template<class InputIterator> inline std::pair<InputIterator, InputIterator> minmax_element(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); if(first == last){ // empty range return std::make_pair(first, first); } return std::make_pair(min_element(first, last, queue), max_element(first, last, queue)); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_MINMAX_ELEMENT_HPP algorithm/mismatch.hpp 0000644 00000006521 15125510617 0011057 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_MISMATCH_HPP #define BOOST_COMPUTE_ALGORITHM_MISMATCH_HPP #include <iterator> #include <utility> #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/find.hpp> #include <boost/compute/iterator/transform_iterator.hpp> #include <boost/compute/iterator/zip_iterator.hpp> #include <boost/compute/functional/detail/unpack.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns a pair of iterators pointing to the first position where the /// range [\p first1, \p last1) and the range starting at \p first2 /// differ. /// /// Space complexity: \Omega(1) template<class InputIterator1, class InputIterator2> inline std::pair<InputIterator1, InputIterator2> mismatch(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); typedef typename std::iterator_traits<InputIterator1>::value_type value_type; ::boost::compute::equal_to<value_type> op; InputIterator2 last2 = first2 + std::distance(first1, last1); InputIterator1 iter = boost::get<0>( ::boost::compute::find( ::boost::compute::make_transform_iterator( ::boost::compute::make_zip_iterator( boost::make_tuple(first1, first2) ), detail::unpack(op) ), ::boost::compute::make_transform_iterator( ::boost::compute::make_zip_iterator( boost::make_tuple(last1, last2) ), detail::unpack(op) ), false, queue ).base().get_iterator_tuple() ); return std::make_pair(iter, first2 + std::distance(first1, iter)); } /// \overload template<class InputIterator1, class InputIterator2> inline std::pair<InputIterator1, InputIterator2> mismatch(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); if(std::distance(first1, last1) < std::distance(first2, last2)){ return ::boost::compute::mismatch(first1, last1, first2, queue); } else { return ::boost::compute::mismatch( first1, first1 + std::distance(first2, last2), first2, queue ); } } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_MISMATCH_HPP algorithm/swap_ranges.hpp 0000644 00000003516 15125510617 0011564 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SWAP_RANGES_HPP #define BOOST_COMPUTE_ALGORITHM_SWAP_RANGES_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Swaps the elements in the range [\p first1, \p last1) with the /// elements in the range beginning at \p first2. /// /// Space complexity: \Omega(distance(\p first1, \p last1)) template<class Iterator1, class Iterator2> inline Iterator2 swap_ranges(Iterator1 first1, Iterator1 last1, Iterator2 first2, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<Iterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<Iterator2>::value); typedef typename std::iterator_traits<Iterator1>::value_type value_type; Iterator2 last2 = first2 + std::distance(first1, last1); ::boost::compute::vector<value_type> tmp(first1, last1, queue); ::boost::compute::copy(first2, last2, first1, queue); ::boost::compute::copy(tmp.begin(), tmp.end(), first2, queue); return last2; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SWAP_RANGES_HPP algorithm/fill_n.hpp 0000644 00000002527 15125510617 0010517 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FILL_N_HPP #define BOOST_COMPUTE_ALGORITHM_FILL_N_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/fill.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Fills the range [\p first, \p first + count) with \p value. /// /// Space complexity: \Omega(1) /// /// \see fill() template<class BufferIterator, class Size, class T> inline void fill_n(BufferIterator first, Size count, const T &value, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<BufferIterator>::value); ::boost::compute::fill(first, first + count, value, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FILL_N_HPP algorithm/iota.hpp 0000644 00000003307 15125510617 0010205 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_IOTA_HPP #define BOOST_COMPUTE_ALGORITHM_IOTA_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/iterator/counting_iterator.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Fills the range [\p first, \p last) with sequential values starting at /// \p value. /// /// For example, the following code: /// \snippet test/test_iota.cpp iota /// /// Will fill \c vec with the values (\c 0, \c 1, \c 2, \c ...). /// /// Space complexity: \Omega(1) template<class BufferIterator, class T> inline void iota(BufferIterator first, BufferIterator last, const T &value, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<BufferIterator>::value); T count = static_cast<T>(detail::iterator_range_size(first, last)); copy( ::boost::compute::make_counting_iterator(value), ::boost::compute::make_counting_iterator(value + count), first, queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_IOTA_HPP algorithm/exclusive_scan.hpp 0000644 00000007712 15125510617 0012270 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_EXCLUSIVE_SCAN_HPP #define BOOST_COMPUTE_ALGORITHM_EXCLUSIVE_SCAN_HPP #include <boost/static_assert.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/detail/scan.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Performs an exclusive scan of the elements in the range [\p first, \p last) /// and stores the results in the range beginning at \p result. /// /// Each element in the output is assigned to the sum of all the previous /// values in the input. /// /// \param first first element in the range to scan /// \param last last element in the range to scan /// \param result first element in the result range /// \param init value used to initialize the scan sequence /// \param binary_op associative binary operator /// \param queue command queue to perform the operation /// /// \return \c OutputIterator to the end of the result range /// /// The default operation is to add the elements up. /// /// \snippet test/test_scan.cpp exclusive_scan_int /// /// But different associative operation can be specified as \p binary_op /// instead (e.g., multiplication, maximum, minimum). Also value used to /// initialized the scan sequence can be specified. /// /// \snippet test/test_scan.cpp exclusive_scan_int_multiplies /// /// Space complexity on GPUs: \Omega(n)<br> /// Space complexity on GPUs when \p first == \p result: \Omega(2n)<br> /// Space complexity on CPUs: \Omega(1) /// /// \see inclusive_scan() template<class InputIterator, class OutputIterator, class T, class BinaryOperator> inline OutputIterator exclusive_scan(InputIterator first, InputIterator last, OutputIterator result, T init, BinaryOperator binary_op, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); return detail::scan(first, last, result, true, init, binary_op, queue); } /// \overload template<class InputIterator, class OutputIterator, class T> inline OutputIterator exclusive_scan(InputIterator first, InputIterator last, OutputIterator result, T init, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<OutputIterator>::value_type output_type; return detail::scan(first, last, result, true, init, boost::compute::plus<output_type>(), queue); } /// \overload template<class InputIterator, class OutputIterator> inline OutputIterator exclusive_scan(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<OutputIterator>::value_type output_type; return detail::scan(first, last, result, true, output_type(0), boost::compute::plus<output_type>(), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_EXCLUSIVE_SCAN_HPP algorithm/set_union.hpp 0000644 00000016330 15125510617 0011254 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP #define BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/algorithm/detail/balanced_path.hpp> #include <boost/compute/algorithm/detail/compact.hpp> #include <boost/compute/algorithm/exclusive_scan.hpp> #include <boost/compute/algorithm/fill_n.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/system.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { /// /// \brief Serial set union kernel class /// /// Subclass of meta_kernel to perform serial set union after tiling /// class serial_set_union_kernel : meta_kernel { public: unsigned int tile_size; serial_set_union_kernel() : meta_kernel("set_union") { tile_size = 4; } template<class InputIterator1, class InputIterator2, class InputIterator3, class InputIterator4, class OutputIterator1, class OutputIterator2> void set_range(InputIterator1 first1, InputIterator2 first2, InputIterator3 tile_first1, InputIterator3 tile_last1, InputIterator4 tile_first2, OutputIterator1 result, OutputIterator2 counts) { m_count = iterator_range_size(tile_first1, tile_last1) - 1; *this << "uint i = get_global_id(0);\n" << "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" << "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" << "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" << "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" << "uint index = i*" << tile_size << ";\n" << "uint count = 0;\n" << "while(start1<end1 && start2<end2)\n" << "{\n" << " if(" << first1[expr<uint_>("start1")] << " == " << first2[expr<uint_>("start2")] << ")\n" << " {\n" << result[expr<uint_>("index")] << " = " << first1[expr<uint_>("start1")] << ";\n" << " index++; count++;\n" << " start1++; start2++;\n" << " }\n" << " else if(" << first1[expr<uint_>("start1")] << " < " << first2[expr<uint_>("start2")] << ")\n" << " {\n" << result[expr<uint_>("index")] << " = " << first1[expr<uint_>("start1")] << ";\n" << " index++; count++;\n" << " start1++;\n" << " }\n" << " else\n" << " {\n" << result[expr<uint_>("index")] << " = " << first2[expr<uint_>("start2")] << ";\n" << " index++; count++;\n" << " start2++;\n" << " }\n" << "}\n" << "while(start1<end1)\n" << "{\n" << result[expr<uint_>("index")] << " = " << first1[expr<uint_>("start1")] << ";\n" << " index++; count++;\n" << " start1++;\n" << "}\n" << "while(start2<end2)\n" << "{\n" << result[expr<uint_>("index")] << " = " << first2[expr<uint_>("start2")] << ";\n" << " index++; count++;\n" << " start2++;\n" << "}\n" << counts[expr<uint_>("i")] << " = count;\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; } //end detail namespace /// /// \brief Set union algorithm /// /// Finds the union of the sorted range [first1, last1) with the sorted /// range [first2, last2) and stores it in range starting at result /// \return Iterator pointing to end of union /// /// \param first1 Iterator pointing to start of first set /// \param last1 Iterator pointing to end of first set /// \param first2 Iterator pointing to start of second set /// \param last2 Iterator pointing to end of second set /// \param result Iterator pointing to start of range in which the union /// will be stored /// \param queue Queue on which to execute /// /// Space complexity: /// \Omega(2(distance(\p first1, \p last1) + distance(\p first2, \p last2))) template<class InputIterator1, class InputIterator2, class OutputIterator> inline OutputIterator set_union(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<InputIterator1>::value_type value_type; int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::balanced_path_kernel tiling_kernel; tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); fill_n(tile_b.begin(), 1, 0, queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, count1, queue); fill_n(tile_b.end()-1, 1, count2, queue); vector<value_type> temp_result(count1+count2, queue.get_context()); vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); fill_n(counts.end()-1, 1, 0, queue); // Find individual unions detail::serial_set_union_kernel union_kernel; union_kernel.tile_size = tile_size; union_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), temp_result.begin(), counts.begin()); union_kernel.exec(queue); exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue); // Compact the results detail::compact_kernel compact_kernel; compact_kernel.tile_size = tile_size; compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); compact_kernel.exec(queue); return result + (counts.end() - 1).read(queue); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP algorithm/fill.hpp 0000644 00000023456 15125510617 0010206 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP #define BOOST_COMPUTE_ALGORITHM_FILL_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/mpl/int.hpp> #include <boost/mpl/vector.hpp> #include <boost/mpl/contains.hpp> #include <boost/utility/enable_if.hpp> #include <boost/compute/cl.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/async/future.hpp> #include <boost/compute/iterator/constant_iterator.hpp> #include <boost/compute/iterator/discard_iterator.hpp> #include <boost/compute/detail/is_buffer_iterator.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { namespace mpl = boost::mpl; // fills the range [first, first + count) with value using copy() template<class BufferIterator, class T> inline void fill_with_copy(BufferIterator first, size_t count, const T &value, command_queue &queue) { ::boost::compute::copy( ::boost::compute::make_constant_iterator(value, 0), ::boost::compute::make_constant_iterator(value, count), first, queue ); } // fills the range [first, first + count) with value using copy_async() template<class BufferIterator, class T> inline future<void> fill_async_with_copy(BufferIterator first, size_t count, const T &value, command_queue &queue) { return ::boost::compute::copy_async( ::boost::compute::make_constant_iterator(value, 0), ::boost::compute::make_constant_iterator(value, count), first, queue ); } #if defined(BOOST_COMPUTE_CL_VERSION_1_2) // meta-function returing true if Iterator points to a range of values // that can be filled using clEnqueueFillBuffer(). to meet this criteria // it must have a buffer accessible through iter.get_buffer() and the // size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}. template<class Iterator> struct is_valid_fill_buffer_iterator : public mpl::and_< is_buffer_iterator<Iterator>, mpl::contains< mpl::vector< mpl::int_<1>, mpl::int_<2>, mpl::int_<4>, mpl::int_<8>, mpl::int_<16>, mpl::int_<32>, mpl::int_<64>, mpl::int_<128> >, mpl::int_< sizeof(typename std::iterator_traits<Iterator>::value_type) > > >::type { }; template<> struct is_valid_fill_buffer_iterator<discard_iterator> : public boost::false_type {}; // specialization which uses clEnqueueFillBuffer for buffer iterators template<class BufferIterator, class T> inline void dispatch_fill(BufferIterator first, size_t count, const T &value, command_queue &queue, typename boost::enable_if< is_valid_fill_buffer_iterator<BufferIterator> >::type* = 0) { typedef typename std::iterator_traits<BufferIterator>::value_type value_type; if(count == 0){ // nothing to do return; } // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer) if(!queue.check_device_version(1, 2)){ return fill_with_copy(first, count, value, queue); } value_type pattern = static_cast<value_type>(value); size_t offset = static_cast<size_t>(first.get_index()); if(count == 1){ // use clEnqueueWriteBuffer() directly when writing a single value // to the device buffer. this is potentially more efficient and also // works around a bug in the intel opencl driver. queue.enqueue_write_buffer( first.get_buffer(), offset * sizeof(value_type), sizeof(value_type), &pattern ); } else { queue.enqueue_fill_buffer( first.get_buffer(), &pattern, sizeof(value_type), offset * sizeof(value_type), count * sizeof(value_type) ); } } template<class BufferIterator, class T> inline future<void> dispatch_fill_async(BufferIterator first, size_t count, const T &value, command_queue &queue, typename boost::enable_if< is_valid_fill_buffer_iterator<BufferIterator> >::type* = 0) { typedef typename std::iterator_traits<BufferIterator>::value_type value_type; // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer) if(!queue.check_device_version(1, 2)){ return fill_async_with_copy(first, count, value, queue); } value_type pattern = static_cast<value_type>(value); size_t offset = static_cast<size_t>(first.get_index()); event event_ = queue.enqueue_fill_buffer(first.get_buffer(), &pattern, sizeof(value_type), offset * sizeof(value_type), count * sizeof(value_type)); return future<void>(event_); } #ifdef BOOST_COMPUTE_CL_VERSION_2_0 // specializations for svm_ptr<T> template<class T> inline void dispatch_fill(svm_ptr<T> first, size_t count, const T &value, command_queue &queue) { if(count == 0){ return; } queue.enqueue_svm_fill( first.get(), &value, sizeof(T), count * sizeof(T) ); } template<class T> inline future<void> dispatch_fill_async(svm_ptr<T> first, size_t count, const T &value, command_queue &queue) { if(count == 0){ return future<void>(); } event event_ = queue.enqueue_svm_fill( first.get(), &value, sizeof(T), count * sizeof(T) ); return future<void>(event_); } #endif // BOOST_COMPUTE_CL_VERSION_2_0 // default implementations template<class BufferIterator, class T> inline void dispatch_fill(BufferIterator first, size_t count, const T &value, command_queue &queue, typename boost::disable_if< is_valid_fill_buffer_iterator<BufferIterator> >::type* = 0) { fill_with_copy(first, count, value, queue); } template<class BufferIterator, class T> inline future<void> dispatch_fill_async(BufferIterator first, size_t count, const T &value, command_queue &queue, typename boost::disable_if< is_valid_fill_buffer_iterator<BufferIterator> >::type* = 0) { return fill_async_with_copy(first, count, value, queue); } #else template<class BufferIterator, class T> inline void dispatch_fill(BufferIterator first, size_t count, const T &value, command_queue &queue) { fill_with_copy(first, count, value, queue); } template<class BufferIterator, class T> inline future<void> dispatch_fill_async(BufferIterator first, size_t count, const T &value, command_queue &queue) { return fill_async_with_copy(first, count, value, queue); } #endif // !defined(BOOST_COMPUTE_CL_VERSION_1_2) } // end detail namespace /// Fills the range [\p first, \p last) with \p value. /// /// \param first first element in the range to fill /// \param last last element in the range to fill /// \param value value to copy to each element /// \param queue command queue to perform the operation /// /// For example, to fill a vector on the device with sevens: /// \code /// // vector on the device /// boost::compute::vector<int> vec(10, context); /// /// // fill vector with sevens /// boost::compute::fill(vec.begin(), vec.end(), 7, queue); /// \endcode /// /// Space complexity: \Omega(1) /// /// \see boost::compute::fill_n() template<class BufferIterator, class T> inline void fill(BufferIterator first, BufferIterator last, const T &value, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<BufferIterator>::value); size_t count = detail::iterator_range_size(first, last); if(count == 0){ return; } detail::dispatch_fill(first, count, value, queue); } template<class BufferIterator, class T> inline future<void> fill_async(BufferIterator first, BufferIterator last, const T &value, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(detail::is_buffer_iterator<BufferIterator>::value); size_t count = detail::iterator_range_size(first, last); if(count == 0){ return future<void>(); } return detail::dispatch_fill_async(first, count, value, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP algorithm/upper_bound.hpp 0000644 00000003031 15125510617 0011565 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_UPPER_BOUND_HPP #define BOOST_COMPUTE_ALGORITHM_UPPER_BOUND_HPP #include <boost/static_assert.hpp> #include <boost/compute/lambda.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/detail/binary_find.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns an iterator pointing to the first element in the sorted /// range [\p first, \p last) that is not less than or equal to /// \p value. /// /// Space complexity: \Omega(1) template<class InputIterator, class T> inline InputIterator upper_bound(InputIterator first, InputIterator last, const T &value, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); using ::boost::compute::_1; InputIterator position = detail::binary_find(first, last, _1 > value, queue); return position; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_UPPER_BOUND_HPP algorithm/lower_bound.hpp 0000644 00000003045 15125510617 0011567 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_LOWER_BOUND_HPP #define BOOST_COMPUTE_ALGORITHM_LOWER_BOUND_HPP #include <boost/static_assert.hpp> #include <boost/compute/lambda.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/detail/binary_find.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns an iterator pointing to the first element in the sorted /// range [\p first, \p last) that is not less than \p value. /// /// Space complexity: \Omega(1) /// /// \see upper_bound() template<class InputIterator, class T> inline InputIterator lower_bound(InputIterator first, InputIterator last, const T &value, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); using ::boost::compute::_1; InputIterator position = detail::binary_find(first, last, _1 >= value, queue); return position; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_LOWER_BOUND_HPP algorithm/for_each_n.hpp 0000644 00000002617 15125510617 0011337 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FOR_EACH_N_HPP #define BOOST_COMPUTE_ALGORITHM_FOR_EACH_N_HPP #include <boost/static_assert.hpp> #include <boost/compute/algorithm/for_each.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Calls \p function on each element in the range [\p first, \p first /// \c + \p count). /// /// Space complexity: \Omega(1) /// /// \see for_each() template<class InputIterator, class Size, class UnaryFunction> inline UnaryFunction for_each_n(InputIterator first, Size count, UnaryFunction function, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); return ::boost::compute::for_each(first, first + count, function, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FOR_EACH_N_HPP algorithm/accumulate.hpp 0000644 00000014347 15125510617 0011402 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP #define BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP #include <boost/static_assert.hpp> #include <boost/preprocessor/seq/for_each.hpp> #include <boost/compute/system.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/reduce.hpp> #include <boost/compute/algorithm/detail/serial_accumulate.hpp> #include <boost/compute/container/array.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> #include <boost/compute/detail/iterator_range_size.hpp> namespace boost { namespace compute { namespace detail { // Space complexity O(1) template<class InputIterator, class T, class BinaryFunction> inline T generic_accumulate(InputIterator first, InputIterator last, T init, BinaryFunction function, command_queue &queue) { const context &context = queue.get_context(); size_t size = iterator_range_size(first, last); if(size == 0){ return init; } // accumulate on device array<T, 1> device_result(context); detail::serial_accumulate( first, last, device_result.begin(), init, function, queue ); // copy result to host T result; ::boost::compute::copy_n(device_result.begin(), 1, &result, queue); return result; } // returns true if we can use reduce() instead of accumulate() when // accumulate() this is true when the function is commutative (such as // addition of integers) and the initial value is the identity value // for the operation (zero for addition, one for multiplication). template<class T, class F> inline bool can_accumulate_with_reduce(T init, F function) { (void) init; (void) function; return false; } /// \internal_ #define BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE(r, data, type) \ inline bool can_accumulate_with_reduce(type init, plus<type>) \ { \ return init == type(0); \ } \ inline bool can_accumulate_with_reduce(type init, multiplies<type>) \ { \ return init == type(1); \ } BOOST_PP_SEQ_FOR_EACH( BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE, _, (char_)(uchar_)(short_)(ushort_)(int_)(uint_)(long_)(ulong_) ) template<class T> inline bool can_accumulate_with_reduce(T init, min<T>) { return init == (std::numeric_limits<T>::max)(); } template<class T> inline bool can_accumulate_with_reduce(T init, max<T>) { return init == (std::numeric_limits<T>::min)(); } #undef BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE template<class InputIterator, class T, class BinaryFunction> inline T dispatch_accumulate(InputIterator first, InputIterator last, T init, BinaryFunction function, command_queue &queue) { size_t size = iterator_range_size(first, last); if(size == 0){ return init; } if(can_accumulate_with_reduce(init, function)){ T result; reduce(first, last, &result, function, queue); return result; } else { return generic_accumulate(first, last, init, function, queue); } } } // end detail namespace /// Returns the result of applying \p function to the elements in the /// range [\p first, \p last) and \p init. /// /// If no function is specified, \c plus will be used. /// /// \param first first element in the input range /// \param last last element in the input range /// \param init initial value /// \param function binary reduction function /// \param queue command queue to perform the operation /// /// \return the accumulated result value /// /// In specific situations the call to \c accumulate() can be automatically /// optimized to a call to the more efficient \c reduce() algorithm. This /// occurs when the binary reduction function is recognized as associative /// (such as the \c plus<int> function). /// /// Note that because floating-point addition is not associative, calling /// \c accumulate() with \c plus<float> results in a less efficient serial /// reduction algorithm being executed. If a slight loss in precision is /// acceptable, the more efficient parallel \c reduce() algorithm should be /// used instead. /// /// For example: /// \code /// // with vec = boost::compute::vector<int> /// accumulate(vec.begin(), vec.end(), 0, plus<int>()); // fast /// reduce(vec.begin(), vec.end(), &result, plus<int>()); // fast /// /// // with vec = boost::compute::vector<float> /// accumulate(vec.begin(), vec.end(), 0, plus<float>()); // slow /// reduce(vec.begin(), vec.end(), &result, plus<float>()); // fast /// \endcode /// /// Space complexity: \Omega(1)<br> /// Space complexity when optimized to \c reduce(): \Omega(n) /// /// \see reduce() template<class InputIterator, class T, class BinaryFunction> inline T accumulate(InputIterator first, InputIterator last, T init, BinaryFunction function, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); return detail::dispatch_accumulate(first, last, init, function, queue); } /// \overload template<class InputIterator, class T> inline T accumulate(InputIterator first, InputIterator last, T init, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type IT; return detail::dispatch_accumulate(first, last, init, plus<IT>(), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP algorithm/binary_search.hpp 0000644 00000002710 15125510617 0012057 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_BINARY_SEARCH_HPP #define BOOST_COMPUTE_ALGORITHM_BINARY_SEARCH_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/lower_bound.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns \c true if \p value is in the sorted range [\p first, /// \p last). /// /// Space complexity: \Omega(1) template<class InputIterator, class T> inline bool binary_search(InputIterator first, InputIterator last, const T &value, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); InputIterator position = lower_bound(first, last, value, queue); return position != last && position.read(queue) == value; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_BINARY_SEARCH_HPP algorithm/transform.hpp 0000644 00000006264 15125510617 0011271 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_TRANSFORM_HPP #define BOOST_COMPUTE_ALGORITHM_TRANSFORM_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/iterator/transform_iterator.hpp> #include <boost/compute/iterator/zip_iterator.hpp> #include <boost/compute/functional/detail/unpack.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Transforms the elements in the range [\p first, \p last) using /// operator \p op and stores the results in the range beginning at /// \p result. /// /// For example, to calculate the absolute value for each element in a vector: /// /// \snippet test/test_transform.cpp transform_abs /// /// Space complexity: \Omega(1) /// /// \see copy() template<class InputIterator, class OutputIterator, class UnaryOperator> inline OutputIterator transform(InputIterator first, InputIterator last, OutputIterator result, UnaryOperator op, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); return copy( ::boost::compute::make_transform_iterator(first, op), ::boost::compute::make_transform_iterator(last, op), result, queue ); } /// \overload template<class InputIterator1, class InputIterator2, class OutputIterator, class BinaryOperator> inline OutputIterator transform(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryOperator op, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<InputIterator1>::difference_type difference_type; difference_type n = std::distance(first1, last1); return transform( ::boost::compute::make_zip_iterator(boost::make_tuple(first1, first2)), ::boost::compute::make_zip_iterator(boost::make_tuple(last1, first2 + n)), result, detail::unpack(op), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_TRANSFORM_HPP algorithm/generate.hpp 0000644 00000003557 15125510617 0011052 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_GENERATE_HPP #define BOOST_COMPUTE_ALGORITHM_GENERATE_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/iterator/function_input_iterator.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Stores the result of \p generator for each element in the range /// [\p first, \p last). /// /// Space complexity: \Omega(1) template<class OutputIterator, class Generator> inline void generate(OutputIterator first, OutputIterator last, Generator generator, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); size_t count = detail::iterator_range_size(first, last); if(count == 0){ return; } ::boost::compute::copy( ::boost::compute::make_function_input_iterator(generator, first.get_index()), ::boost::compute::make_function_input_iterator(generator, last.get_index()), first, queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_GENERATE_HPP algorithm/reverse_copy.hpp 0000644 00000005166 15125510617 0011763 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REVERSE_COPY_HPP #define BOOST_COMPUTE_ALGORITHM_REVERSE_COPY_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/algorithm/reverse.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class Iterator, class OutputIterator> struct reverse_copy_kernel : public meta_kernel { reverse_copy_kernel(Iterator first, Iterator last, OutputIterator result) : meta_kernel("reverse_copy") { // store size of the range m_size = detail::iterator_range_size(first, last); add_set_arg<const cl_uint>("size", static_cast<const cl_uint>(m_size)); *this << decl<cl_uint>("i") << " = get_global_id(0);\n" << decl<cl_uint>("j") << " = size - get_global_id(0) - 1;\n" << result[var<cl_uint>("j")] << "=" << first[var<cl_uint>("i")] << ";\n"; } void exec(command_queue &queue) { exec_1d(queue, 0, m_size); } size_t m_size; }; } // end detail namespace /// Copies the elements in the range [\p first, \p last) in reversed /// order to the range beginning at \p result. /// /// Space complexity: \Omega(1) /// /// \see reverse() template<class InputIterator, class OutputIterator> inline OutputIterator reverse_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<OutputIterator>::difference_type difference_type; difference_type count = std::distance(first, last); detail::reverse_copy_kernel<InputIterator, OutputIterator> kernel(first, last, result); // run kernel kernel.exec(queue); // return iterator to the end of result return result + count; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REVERSE_COPY_HPP algorithm/replace_copy.hpp 0000644 00000004266 15125510617 0011723 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REPLACE_COPY_HPP #define BOOST_COMPUTE_ALGORITHM_REPLACE_COPY_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/algorithm/replace.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Copies the value in the range [\p first, \p last) to the range /// beginning at \p result while replacing each instance of \p old_value /// with \p new_value. /// /// Space complexity: \Omega(1) /// /// \see replace() template<class InputIterator, class OutputIterator, class T> inline OutputIterator replace_copy(InputIterator first, InputIterator last, OutputIterator result, const T &old_value, const T &new_value, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<OutputIterator>::difference_type difference_type; difference_type count = std::distance(first, last); if(count == 0){ return result; } // copy data to result ::boost::compute::copy(first, last, result, queue); // replace in result ::boost::compute::replace(result, result + count, old_value, new_value, queue); // return iterator to the end of result return result + count; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REPLACE_COPY_HPP algorithm/partition_point.hpp 0000644 00000003502 15125510617 0012470 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_PARTITION_POINT_HPP #define BOOST_COMPUTE_ALGORITHM_PARTITION_POINT_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/detail/binary_find.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// /// \brief Partition point algorithm /// /// Finds the end of true values in the partitioned range [first, last) /// \return Iterator pointing to end of true values /// /// \param first Iterator pointing to start of range /// \param last Iterator pointing to end of range /// \param predicate Unary predicate to be applied on each element /// \param queue Queue on which to execute /// /// Space complexity: \Omega(1) /// /// \see partition() and stable_partition() /// template<class InputIterator, class UnaryPredicate> inline InputIterator partition_point(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); return detail::binary_find(first, last, not1(predicate), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_PARTITION_POINT_HPP algorithm/is_permutation.hpp 0000644 00000005270 15125510617 0012314 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_IS_PERMUTATION_HPP #define BOOST_COMPUTE_ALGORITHM_IS_PERMUTATION_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/algorithm/equal.hpp> #include <boost/compute/algorithm/sort.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// /// \brief Permutation checking algorithm /// /// Checks if the range [first1, last1) can be permuted into the /// range [first2, last2) /// \return True, if it can be permuted. False, otherwise. /// /// \param first1 Iterator pointing to start of first range /// \param last1 Iterator pointing to end of first range /// \param first2 Iterator pointing to start of second range /// \param last2 Iterator pointing to end of second range /// \param queue Queue on which to execute /// /// Space complexity: \Omega(distance(\p first1, \p last1) + distance(\p first2, \p last2)) template<class InputIterator1, class InputIterator2> inline bool is_permutation(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); typedef typename std::iterator_traits<InputIterator1>::value_type value_type1; typedef typename std::iterator_traits<InputIterator2>::value_type value_type2; size_t count1 = detail::iterator_range_size(first1, last1); size_t count2 = detail::iterator_range_size(first2, last2); if(count1 != count2) return false; vector<value_type1> temp1(first1, last1, queue); vector<value_type2> temp2(first2, last2, queue); sort(temp1.begin(), temp1.end(), queue); sort(temp2.begin(), temp2.end(), queue); return equal(temp1.begin(), temp1.end(), temp2.begin(), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_IS_PERMUTATION_HPP algorithm/find_if.hpp 0000644 00000002740 15125510617 0010647 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FIND_IF_HPP #define BOOST_COMPUTE_ALGORITHM_FIND_IF_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/detail/find_if_with_atomics.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns an iterator pointing to the first element in the range /// [\p first, \p last) for which \p predicate returns \c true. /// /// Space complexity: \Omega(1) template<class InputIterator, class UnaryPredicate> inline InputIterator find_if(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); return detail::find_if_with_atomics(first, last, predicate, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FIND_IF_HPP algorithm/generate_n.hpp 0000644 00000002375 15125510617 0011364 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_GENERATE_N_HPP #define BOOST_COMPUTE_ALGORITHM_GENERATE_N_HPP #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/generate.hpp> namespace boost { namespace compute { /// Stores the result of \p generator for each element in the range /// [\p first, \p first + \p count). /// /// Space complexity: \Omega(1) template<class OutputIterator, class Size, class Generator> inline void generate_n(OutputIterator first, Size count, Generator generator, command_queue &queue = system::default_queue()) { ::boost::compute::generate(first, first + count, generator, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_GENERATE_N_HPP algorithm/partial_sum.hpp 0000644 00000003214 15125510617 0011566 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_PARTIAL_SUM_HPP #define BOOST_COMPUTE_ALGORITHM_PARTIAL_SUM_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/inclusive_scan.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Calculates the cumulative sum of the elements in the range [\p first, /// \p last) and writes the resulting values to the range beginning at /// \p result. /// /// Space complexity on GPUs: \Omega(n)<br> /// Space complexity on GPUs when \p first == \p result: \Omega(2n)<br> /// Space complexity on CPUs: \Omega(1) template<class InputIterator, class OutputIterator> inline OutputIterator partial_sum(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); return ::boost::compute::inclusive_scan(first, last, result, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_PARTIAL_SUM_HPP algorithm/set_difference.hpp 0000644 00000015435 15125510617 0012223 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SET_DIFFERENCE_HPP #define BOOST_COMPUTE_ALGORITHM_SET_DIFFERENCE_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/algorithm/detail/compact.hpp> #include <boost/compute/algorithm/detail/balanced_path.hpp> #include <boost/compute/algorithm/exclusive_scan.hpp> #include <boost/compute/algorithm/fill_n.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/system.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { /// /// \brief Serial set difference kernel class /// /// Subclass of meta_kernel to perform serial set difference after tiling /// class serial_set_difference_kernel : meta_kernel { public: unsigned int tile_size; serial_set_difference_kernel() : meta_kernel("set_difference") { tile_size = 4; } template<class InputIterator1, class InputIterator2, class InputIterator3, class InputIterator4, class OutputIterator1, class OutputIterator2> void set_range(InputIterator1 first1, InputIterator2 first2, InputIterator3 tile_first1, InputIterator3 tile_last1, InputIterator4 tile_first2, OutputIterator1 result, OutputIterator2 counts) { m_count = iterator_range_size(tile_first1, tile_last1) - 1; *this << "uint i = get_global_id(0);\n" << "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" << "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" << "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" << "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" << "uint index = i*" << tile_size << ";\n" << "uint count = 0;\n" << "while(start1<end1 && start2<end2)\n" << "{\n" << " if(" << first1[expr<uint_>("start1")] << " == " << first2[expr<uint_>("start2")] << ")\n" << " {\n" << " start1++; start2++;\n" << " }\n" << " else if(" << first1[expr<uint_>("start1")] << " < " << first2[expr<uint_>("start2")] << ")\n" << " {\n" << result[expr<uint_>("index")] << " = " << first1[expr<uint_>("start1")] << ";\n" << " index++; count++;\n" << " start1++;\n" << " }\n" << " else\n" << " {\n" << " start2++;\n" << " }\n" << "}\n" << "while(start1<end1)\n" << "{\n" << result[expr<uint_>("index")] << " = " << first1[expr<uint_>("start1")] << ";\n" << " index++; count++;\n" << " start1++;\n" << "}\n" << counts[expr<uint_>("i")] << " = count;\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; } //end detail namespace /// /// \brief Set difference algorithm /// /// Finds the difference of the sorted range [first2, last2) from the sorted /// range [first1, last1) and stores it in range starting at result /// \return Iterator pointing to end of difference /// /// \param first1 Iterator pointing to start of first set /// \param last1 Iterator pointing to end of first set /// \param first2 Iterator pointing to start of second set /// \param last2 Iterator pointing to end of second set /// \param result Iterator pointing to start of range in which the difference /// will be stored /// \param queue Queue on which to execute /// /// Space complexity: /// \Omega(2(distance(\p first1, \p last1) + distance(\p first2, \p last2))) template<class InputIterator1, class InputIterator2, class OutputIterator> inline OutputIterator set_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<InputIterator1>::value_type value_type; int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::balanced_path_kernel tiling_kernel; tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); fill_n(tile_b.begin(), 1, 0, queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, count1, queue); fill_n(tile_b.end()-1, 1, count2, queue); vector<value_type> temp_result(count1+count2, queue.get_context()); vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); fill_n(counts.end()-1, 1, 0, queue); // Find individual differences detail::serial_set_difference_kernel difference_kernel; difference_kernel.tile_size = tile_size; difference_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), temp_result.begin(), counts.begin()); difference_kernel.exec(queue); exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue); // Compact the results detail::compact_kernel compact_kernel; compact_kernel.tile_size = tile_size; compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); compact_kernel.exec(queue); return result + (counts.end() - 1).read(queue); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SET_DIFFERENCE_HPP algorithm/inner_product.hpp 0000644 00000007411 15125510617 0012124 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_INNER_PRODUCT_HPP #define BOOST_COMPUTE_ALGORITHM_INNER_PRODUCT_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/accumulate.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/iterator/transform_iterator.hpp> #include <boost/compute/iterator/zip_iterator.hpp> #include <boost/compute/functional/detail/unpack.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns the inner product of the elements in the range /// [\p first1, \p last1) with the elements in the range beginning /// at \p first2. /// /// Space complexity: \Omega(1)<br> /// Space complexity when binary operator is recognized as associative: \Omega(n) template<class InputIterator1, class InputIterator2, class T> inline T inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); typedef typename std::iterator_traits<InputIterator1>::value_type input_type; ptrdiff_t n = std::distance(first1, last1); return ::boost::compute::accumulate( ::boost::compute::make_transform_iterator( ::boost::compute::make_zip_iterator( boost::make_tuple(first1, first2) ), detail::unpack(multiplies<input_type>()) ), ::boost::compute::make_transform_iterator( ::boost::compute::make_zip_iterator( boost::make_tuple(last1, first2 + n) ), detail::unpack(multiplies<input_type>()) ), init, queue ); } /// \overload template<class InputIterator1, class InputIterator2, class T, class BinaryAccumulateFunction, class BinaryTransformFunction> inline T inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryAccumulateFunction accumulate_function, BinaryTransformFunction transform_function, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); typedef typename std::iterator_traits<InputIterator1>::value_type value_type; size_t count = detail::iterator_range_size(first1, last1); vector<value_type> result(count, queue.get_context()); transform(first1, last1, first2, result.begin(), transform_function, queue); return ::boost::compute::accumulate(result.begin(), result.end(), init, accumulate_function, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_INNER_PRODUCT_HPP algorithm/copy.hpp 0000644 00000075373 15125510617 0010237 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_COPY_HPP #define BOOST_COMPUTE_ALGORITHM_COPY_HPP #include <algorithm> #include <iterator> #include <boost/utility/enable_if.hpp> #include <boost/mpl/and.hpp> #include <boost/mpl/not.hpp> #include <boost/mpl/or.hpp> #include <boost/compute/buffer.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/detail/copy_on_device.hpp> #include <boost/compute/algorithm/detail/copy_to_device.hpp> #include <boost/compute/algorithm/detail/copy_to_host.hpp> #include <boost/compute/async/future.hpp> #include <boost/compute/container/mapped_view.hpp> #include <boost/compute/detail/device_ptr.hpp> #include <boost/compute/detail/is_contiguous_iterator.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/parameter_cache.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { namespace mpl = boost::mpl; // meta-function returning true if copy() between InputIterator and // OutputIterator can be implemented with clEnqueueCopyBuffer(). template<class InputIterator, class OutputIterator> struct can_copy_with_copy_buffer : mpl::and_< mpl::or_< boost::is_same< InputIterator, buffer_iterator<typename InputIterator::value_type> >, boost::is_same< InputIterator, detail::device_ptr<typename InputIterator::value_type> > >, mpl::or_< boost::is_same< OutputIterator, buffer_iterator<typename OutputIterator::value_type> >, boost::is_same< OutputIterator, detail::device_ptr<typename OutputIterator::value_type> > >, boost::is_same< typename InputIterator::value_type, typename OutputIterator::value_type > >::type {}; // meta-function returning true if value_types of HostIterator and // DeviceIterator are same template<class HostIterator, class DeviceIterator> struct is_same_value_type : boost::is_same< typename boost::remove_cv< typename std::iterator_traits<HostIterator>::value_type >::type, typename boost::remove_cv< typename DeviceIterator::value_type >::type >::type {}; // meta-function returning true if value_type of HostIterator is bool template<class HostIterator> struct is_bool_value_type : boost::is_same< typename boost::remove_cv< typename std::iterator_traits<HostIterator>::value_type >::type, bool >::type {}; // host -> device (async) template<class InputIterator, class OutputIterator> inline future<OutputIterator> dispatch_copy_async(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if< mpl::and_< mpl::not_< is_device_iterator<InputIterator> >, is_device_iterator<OutputIterator>, is_same_value_type<InputIterator, OutputIterator> > >::type* = 0) { BOOST_STATIC_ASSERT_MSG( is_contiguous_iterator<InputIterator>::value, "copy_async() is only supported for contiguous host iterators" ); return copy_to_device_async(first, last, result, queue, events); } // host -> device (async) // Type mismatch between InputIterator and OutputIterator value_types template<class InputIterator, class OutputIterator> inline future<OutputIterator> dispatch_copy_async(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if< mpl::and_< mpl::not_< is_device_iterator<InputIterator> >, is_device_iterator<OutputIterator>, mpl::not_< is_same_value_type<InputIterator, OutputIterator> > > >::type* = 0) { BOOST_STATIC_ASSERT_MSG( is_contiguous_iterator<InputIterator>::value, "copy_async() is only supported for contiguous host iterators" ); typedef typename std::iterator_traits<InputIterator>::value_type input_type; const context &context = queue.get_context(); size_t count = iterator_range_size(first, last); if(count < size_t(1)) { return future<OutputIterator>(); } // map [first; last) to device and run copy kernel // on device for copying & casting ::boost::compute::mapped_view<input_type> mapped_host( // make sure it's a pointer to constant data // to force read only mapping const_cast<const input_type*>( ::boost::addressof(*first) ), count, context ); return copy_on_device_async( mapped_host.begin(), mapped_host.end(), result, queue, events ); } // host -> device // InputIterator is a contiguous iterator template<class InputIterator, class OutputIterator> inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if< mpl::and_< mpl::not_< is_device_iterator<InputIterator> >, is_device_iterator<OutputIterator>, is_same_value_type<InputIterator, OutputIterator>, is_contiguous_iterator<InputIterator> > >::type* = 0) { return copy_to_device(first, last, result, queue, events); } // host -> device // Type mismatch between InputIterator and OutputIterator value_types // InputIterator is a contiguous iterator template<class InputIterator, class OutputIterator> inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if< mpl::and_< mpl::not_< is_device_iterator<InputIterator> >, is_device_iterator<OutputIterator>, mpl::not_< is_same_value_type<InputIterator, OutputIterator> >, is_contiguous_iterator<InputIterator> > >::type* = 0) { typedef typename OutputIterator::value_type output_type; typedef typename std::iterator_traits<InputIterator>::value_type input_type; const device &device = queue.get_device(); // loading parameters std::string cache_key = std::string("__boost_compute_copy_to_device_") + type_name<input_type>() + "_" + type_name<output_type>(); boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); uint_ map_copy_threshold; uint_ direct_copy_threshold; // calculate default values of thresholds if (device.type() & device::gpu) { // GPUs map_copy_threshold = 524288; // 0.5 MB direct_copy_threshold = 52428800; // 50 MB } else { // CPUs and other devices map_copy_threshold = 134217728; // 128 MB direct_copy_threshold = 0; // it's never efficient for CPUs } // load thresholds map_copy_threshold = parameters->get( cache_key, "map_copy_threshold", map_copy_threshold ); direct_copy_threshold = parameters->get( cache_key, "direct_copy_threshold", direct_copy_threshold ); // select copy method based on thresholds & input_size_bytes size_t count = iterator_range_size(first, last); size_t input_size_bytes = count * sizeof(input_type); // [0; map_copy_threshold) -> copy_to_device_map() if(input_size_bytes < map_copy_threshold) { return copy_to_device_map(first, last, result, queue, events); } // [map_copy_threshold; direct_copy_threshold) -> convert [first; last) // on host and then perform copy_to_device() else if(input_size_bytes < direct_copy_threshold) { std::vector<output_type> vector(first, last); return copy_to_device( vector.begin(), vector.end(), result, queue, events ); } // [direct_copy_threshold; inf) -> map [first; last) to device and // run copy kernel on device for copying & casting // At this point we are sure that count > 1 (first != last). // Perform async copy to device, wait for it to be finished and // return the result. // At this point we are sure that count > 1 (first != last), so event // returned by dispatch_copy_async() must be valid. return dispatch_copy_async(first, last, result, queue, events).get(); } // host -> device // InputIterator is NOT a contiguous iterator template<class InputIterator, class OutputIterator> inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if< mpl::and_< mpl::not_< is_device_iterator<InputIterator> >, is_device_iterator<OutputIterator>, mpl::not_< is_contiguous_iterator<InputIterator> > > >::type* = 0) { typedef typename OutputIterator::value_type output_type; typedef typename std::iterator_traits<InputIterator>::value_type input_type; const device &device = queue.get_device(); // loading parameters std::string cache_key = std::string("__boost_compute_copy_to_device_") + type_name<input_type>() + "_" + type_name<output_type>(); boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); uint_ map_copy_threshold; uint_ direct_copy_threshold; // calculate default values of thresholds if (device.type() & device::gpu) { // GPUs map_copy_threshold = 524288; // 0.5 MB direct_copy_threshold = 52428800; // 50 MB } else { // CPUs and other devices map_copy_threshold = 134217728; // 128 MB direct_copy_threshold = 0; // it's never efficient for CPUs } // load thresholds map_copy_threshold = parameters->get( cache_key, "map_copy_threshold", map_copy_threshold ); direct_copy_threshold = parameters->get( cache_key, "direct_copy_threshold", direct_copy_threshold ); // select copy method based on thresholds & input_size_bytes size_t input_size = iterator_range_size(first, last); size_t input_size_bytes = input_size * sizeof(input_type); // [0; map_copy_threshold) -> copy_to_device_map() // // if direct_copy_threshold is less than map_copy_threshold // copy_to_device_map() is used for every input if(input_size_bytes < map_copy_threshold || direct_copy_threshold <= map_copy_threshold) { return copy_to_device_map(first, last, result, queue, events); } // [map_copy_threshold; inf) -> convert [first; last) // on host and then perform copy_to_device() std::vector<output_type> vector(first, last); return copy_to_device(vector.begin(), vector.end(), result, queue, events); } // device -> host (async) template<class InputIterator, class OutputIterator> inline future<OutputIterator> dispatch_copy_async(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if< mpl::and_< is_device_iterator<InputIterator>, mpl::not_< is_device_iterator<OutputIterator> >, is_same_value_type<OutputIterator, InputIterator> > >::type* = 0) { BOOST_STATIC_ASSERT_MSG( is_contiguous_iterator<OutputIterator>::value, "copy_async() is only supported for contiguous host iterators" ); return copy_to_host_async(first, last, result, queue, events); } // device -> host (async) // Type mismatch between InputIterator and OutputIterator value_types template<class InputIterator, class OutputIterator> inline future<OutputIterator> dispatch_copy_async(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if< mpl::and_< is_device_iterator<InputIterator>, mpl::not_< is_device_iterator<OutputIterator> >, mpl::not_< is_same_value_type<OutputIterator, InputIterator> > > >::type* = 0) { BOOST_STATIC_ASSERT_MSG( is_contiguous_iterator<OutputIterator>::value, "copy_async() is only supported for contiguous host iterators" ); typedef typename std::iterator_traits<OutputIterator>::value_type output_type; const context &context = queue.get_context(); size_t count = iterator_range_size(first, last); if(count < size_t(1)) { return future<OutputIterator>(); } // map host memory to device buffer mapped_host( context, count * sizeof(output_type), buffer::write_only | buffer::use_host_ptr, static_cast<void*>( ::boost::addressof(*result) ) ); // copy async on device ::boost::compute::future<buffer_iterator<output_type> > future = copy_on_device_async( first, last, make_buffer_iterator<output_type>(mapped_host), queue, events ); // update host memory asynchronously by maping and unmaping memory event map_event; void* ptr = queue.enqueue_map_buffer_async( mapped_host, CL_MAP_READ, 0, count * sizeof(output_type), map_event, future.get_event() ); event unmap_event = queue.enqueue_unmap_buffer(mapped_host, ptr, map_event); return make_future(result + count, unmap_event); } // device -> host // OutputIterator is a contiguous iterator template<class InputIterator, class OutputIterator> inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if< mpl::and_< is_device_iterator<InputIterator>, mpl::not_< is_device_iterator<OutputIterator> >, is_same_value_type<OutputIterator, InputIterator>, is_contiguous_iterator<OutputIterator>, mpl::not_< is_bool_value_type<OutputIterator> > > >::type* = 0) { return copy_to_host(first, last, result, queue, events); } // device -> host // Type mismatch between InputIterator and OutputIterator value_types // OutputIterator is NOT a contiguous iterator or value_type of OutputIterator // is a boolean type. template<class InputIterator, class OutputIterator> inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if< mpl::and_< is_device_iterator<InputIterator>, mpl::not_< is_device_iterator<OutputIterator> >, mpl::or_< mpl::not_< is_contiguous_iterator<OutputIterator> >, is_bool_value_type<OutputIterator> > > >::type* = 0) { typedef typename std::iterator_traits<OutputIterator>::value_type output_type; typedef typename InputIterator::value_type input_type; const device &device = queue.get_device(); // loading parameters std::string cache_key = std::string("__boost_compute_copy_to_host_") + type_name<input_type>() + "_" + type_name<output_type>(); boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); uint_ map_copy_threshold; uint_ direct_copy_threshold; // calculate default values of thresholds if (device.type() & device::gpu) { // GPUs map_copy_threshold = 33554432; // 30 MB direct_copy_threshold = 0; // it's never efficient for GPUs } else { // CPUs and other devices map_copy_threshold = 134217728; // 128 MB direct_copy_threshold = 0; // it's never efficient for CPUs } // load thresholds map_copy_threshold = parameters->get( cache_key, "map_copy_threshold", map_copy_threshold ); direct_copy_threshold = parameters->get( cache_key, "direct_copy_threshold", direct_copy_threshold ); // select copy method based on thresholds & input_size_bytes size_t count = iterator_range_size(first, last); size_t input_size_bytes = count * sizeof(input_type); // [0; map_copy_threshold) -> copy_to_host_map() // // if direct_copy_threshold is less than map_copy_threshold // copy_to_host_map() is used for every input if(input_size_bytes < map_copy_threshold || direct_copy_threshold <= map_copy_threshold) { return copy_to_host_map(first, last, result, queue, events); } // [map_copy_threshold; inf) -> copy [first;last) to temporary vector // then copy (and convert) to result using std::copy() std::vector<input_type> vector(count); copy_to_host(first, last, vector.begin(), queue, events); return std::copy(vector.begin(), vector.end(), result); } // device -> host // Type mismatch between InputIterator and OutputIterator value_types // OutputIterator is a contiguous iterator // value_type of OutputIterator is NOT a boolean type template<class InputIterator, class OutputIterator> inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if< mpl::and_< is_device_iterator<InputIterator>, mpl::not_< is_device_iterator<OutputIterator> >, mpl::not_< is_same_value_type<OutputIterator, InputIterator> >, is_contiguous_iterator<OutputIterator>, mpl::not_< is_bool_value_type<OutputIterator> > > >::type* = 0) { typedef typename std::iterator_traits<OutputIterator>::value_type output_type; typedef typename InputIterator::value_type input_type; const device &device = queue.get_device(); // loading parameters std::string cache_key = std::string("__boost_compute_copy_to_host_") + type_name<input_type>() + "_" + type_name<output_type>(); boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); uint_ map_copy_threshold; uint_ direct_copy_threshold; // calculate default values of thresholds if (device.type() & device::gpu) { // GPUs map_copy_threshold = 524288; // 0.5 MB direct_copy_threshold = 52428800; // 50 MB } else { // CPUs and other devices map_copy_threshold = 134217728; // 128 MB direct_copy_threshold = 0; // it's never efficient for CPUs } // load thresholds map_copy_threshold = parameters->get( cache_key, "map_copy_threshold", map_copy_threshold ); direct_copy_threshold = parameters->get( cache_key, "direct_copy_threshold", direct_copy_threshold ); // select copy method based on thresholds & input_size_bytes size_t count = iterator_range_size(first, last); size_t input_size_bytes = count * sizeof(input_type); // [0; map_copy_threshold) -> copy_to_host_map() if(input_size_bytes < map_copy_threshold) { return copy_to_host_map(first, last, result, queue, events); } // [map_copy_threshold; direct_copy_threshold) -> copy [first;last) to // temporary vector then copy (and convert) to result using std::copy() else if(input_size_bytes < direct_copy_threshold) { std::vector<input_type> vector(count); copy_to_host(first, last, vector.begin(), queue, events); return std::copy(vector.begin(), vector.end(), result); } // [direct_copy_threshold; inf) -> map [result; result + input_size) to // device and run copy kernel on device for copying & casting // map host memory to device. // Perform async copy to host, wait for it to be finished and // return the result. // At this point we are sure that count > 1 (first != last), so event // returned by dispatch_copy_async() must be valid. return dispatch_copy_async(first, last, result, queue, events).get(); } // device -> device template<class InputIterator, class OutputIterator> inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if< mpl::and_< is_device_iterator<InputIterator>, is_device_iterator<OutputIterator>, mpl::not_< can_copy_with_copy_buffer< InputIterator, OutputIterator > > > >::type* = 0) { return copy_on_device(first, last, result, queue, events); } // device -> device (specialization for buffer iterators) template<class InputIterator, class OutputIterator> inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if< mpl::and_< is_device_iterator<InputIterator>, is_device_iterator<OutputIterator>, can_copy_with_copy_buffer< InputIterator, OutputIterator > > >::type* = 0) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; difference_type n = std::distance(first, last); if(n < 1){ // nothing to copy return result; } queue.enqueue_copy_buffer(first.get_buffer(), result.get_buffer(), first.get_index() * sizeof(value_type), result.get_index() * sizeof(value_type), static_cast<size_t>(n) * sizeof(value_type), events); return result + n; } // device -> device (async) template<class InputIterator, class OutputIterator> inline future<OutputIterator> dispatch_copy_async(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if< mpl::and_< is_device_iterator<InputIterator>, is_device_iterator<OutputIterator>, mpl::not_< can_copy_with_copy_buffer< InputIterator, OutputIterator > > > >::type* = 0) { return copy_on_device_async(first, last, result, queue, events); } // device -> device (async, specialization for buffer iterators) template<class InputIterator, class OutputIterator> inline future<OutputIterator> dispatch_copy_async(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if< mpl::and_< is_device_iterator<InputIterator>, is_device_iterator<OutputIterator>, can_copy_with_copy_buffer< InputIterator, OutputIterator > > >::type* = 0) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; difference_type n = std::distance(first, last); if(n < 1){ // nothing to copy return make_future(result, event()); } event event_ = queue.enqueue_copy_buffer( first.get_buffer(), result.get_buffer(), first.get_index() * sizeof(value_type), result.get_index() * sizeof(value_type), static_cast<size_t>(n) * sizeof(value_type), events ); return make_future(result + n, event_); } // host -> host template<class InputIterator, class OutputIterator> inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events, typename boost::enable_if_c< !is_device_iterator<InputIterator>::value && !is_device_iterator<OutputIterator>::value >::type* = 0) { (void) queue; (void) events; return std::copy(first, last, result); } } // end detail namespace /// Copies the values in the range [\p first, \p last) to the range /// beginning at \p result. /// /// The generic copy() function can be used for a variety of data /// transfer tasks and provides a standard interface to the following /// OpenCL functions: /// /// \li \c clEnqueueReadBuffer() /// \li \c clEnqueueWriteBuffer() /// \li \c clEnqueueCopyBuffer() /// /// Unlike the aforementioned OpenCL functions, copy() will also work /// with non-contiguous data-structures (e.g. \c std::list<T>) as /// well as with "fancy" iterators (e.g. transform_iterator). /// /// \param first first element in the range to copy /// \param last last element in the range to copy /// \param result first element in the result range /// \param queue command queue to perform the operation /// /// \return \c OutputIterator to the end of the result range /// /// For example, to copy an array of \c int values on the host to a vector on /// the device: /// \code /// // array on the host /// int data[] = { 1, 2, 3, 4 }; /// /// // vector on the device /// boost::compute::vector<int> vec(4, context); /// /// // copy values to the device vector /// boost::compute::copy(data, data + 4, vec.begin(), queue); /// \endcode /// /// The copy algorithm can also be used with standard containers such as /// \c std::vector<T>: /// \code /// std::vector<int> host_vector = ... /// boost::compute::vector<int> device_vector = ... /// /// // copy from the host to the device /// boost::compute::copy( /// host_vector.begin(), host_vector.end(), device_vector.begin(), queue /// ); /// /// // copy from the device to the host /// boost::compute::copy( /// device_vector.begin(), device_vector.end(), host_vector.begin(), queue /// ); /// \endcode /// /// Space complexity: \Omega(1) /// /// \see copy_n(), copy_if(), copy_async() template<class InputIterator, class OutputIterator> inline OutputIterator copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue(), const wait_list &events = wait_list()) { return detail::dispatch_copy(first, last, result, queue, events); } /// Copies the values in the range [\p first, \p last) to the range /// beginning at \p result. The copy is performed asynchronously. /// /// \see copy() template<class InputIterator, class OutputIterator> inline future<OutputIterator> copy_async(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue(), const wait_list &events = wait_list()) { return detail::dispatch_copy_async(first, last, result, queue, events); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_COPY_HPP algorithm/partition_copy.hpp 0000644 00000005040 15125510617 0012310 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_PARTITION_COPY_HPP #define BOOST_COMPUTE_ALGORITHM_PARTITION_COPY_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy_if.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Copies all of the elements in the range [\p first, \p last) for which /// \p predicate returns \c true to the range beginning at \p first_true /// and all of the elements for which \p predicate returns \c false to /// the range beginning at \p first_false. /// /// Space complexity: \Omega(2n) /// /// \see partition() template<class InputIterator, class OutputIterator1, class OutputIterator2, class UnaryPredicate> inline std::pair<OutputIterator1, OutputIterator2> partition_copy(InputIterator first, InputIterator last, OutputIterator1 first_true, OutputIterator2 first_false, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator2>::value); // copy true values OutputIterator1 last_true = ::boost::compute::copy_if(first, last, first_true, predicate, queue); // copy false values OutputIterator2 last_false = ::boost::compute::copy_if(first, last, first_false, not1(predicate), queue); // return iterators to the end of the true and the false ranges return std::make_pair(last_true, last_false); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_PARTITION_COPY_HPP algorithm/set_intersection.hpp 0000644 00000014766 15125510617 0012645 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SET_INTERSECTION_HPP #define BOOST_COMPUTE_ALGORITHM_SET_INTERSECTION_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/algorithm/detail/compact.hpp> #include <boost/compute/algorithm/detail/balanced_path.hpp> #include <boost/compute/algorithm/exclusive_scan.hpp> #include <boost/compute/algorithm/fill_n.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/system.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { /// /// \brief Serial set intersection kernel class /// /// Subclass of meta_kernel to perform serial set intersection after tiling /// class serial_set_intersection_kernel : meta_kernel { public: unsigned int tile_size; serial_set_intersection_kernel() : meta_kernel("set_intersection") { tile_size = 4; } template<class InputIterator1, class InputIterator2, class InputIterator3, class InputIterator4, class OutputIterator1, class OutputIterator2> void set_range(InputIterator1 first1, InputIterator2 first2, InputIterator3 tile_first1, InputIterator3 tile_last1, InputIterator4 tile_first2, OutputIterator1 result, OutputIterator2 counts) { m_count = iterator_range_size(tile_first1, tile_last1) - 1; *this << "uint i = get_global_id(0);\n" << "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" << "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" << "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" << "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" << "uint index = i*" << tile_size << ";\n" << "uint count = 0;\n" << "while(start1<end1 && start2<end2)\n" << "{\n" << " if(" << first1[expr<uint_>("start1")] << " == " << first2[expr<uint_>("start2")] << ")\n" << " {\n" << result[expr<uint_>("index")] << " = " << first1[expr<uint_>("start1")] << ";\n" << " index++; count++;\n" << " start1++; start2++;\n" << " }\n" << " else if(" << first1[expr<uint_>("start1")] << " < " << first2[expr<uint_>("start2")] << ")\n" << " start1++;\n" << " else start2++;\n" << "}\n" << counts[expr<uint_>("i")] << " = count;\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; } //end detail namespace /// /// \brief Set intersection algorithm /// /// Finds the intersection of the sorted range [first1, last1) with the sorted /// range [first2, last2) and stores it in range starting at result /// \return Iterator pointing to end of intersection /// /// \param first1 Iterator pointing to start of first set /// \param last1 Iterator pointing to end of first set /// \param first2 Iterator pointing to start of second set /// \param last2 Iterator pointing to end of second set /// \param result Iterator pointing to start of range in which the intersection /// will be stored /// \param queue Queue on which to execute /// /// Space complexity: /// \Omega(2(distance(\p first1, \p last1) + distance(\p first2, \p last2))) template<class InputIterator1, class InputIterator2, class OutputIterator> inline OutputIterator set_intersection(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<InputIterator1>::value_type value_type; int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::balanced_path_kernel tiling_kernel; tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); fill_n(tile_b.begin(), 1, 0, queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, count1, queue); fill_n(tile_b.end()-1, 1, count2, queue); vector<value_type> temp_result(count1+count2, queue.get_context()); vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); fill_n(counts.end()-1, 1, 0, queue); // Find individual intersections detail::serial_set_intersection_kernel intersection_kernel; intersection_kernel.tile_size = tile_size; intersection_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), temp_result.begin(), counts.begin()); intersection_kernel.exec(queue); exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue); // Compact the results detail::compact_kernel compact_kernel; compact_kernel.tile_size = tile_size; compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); compact_kernel.exec(queue); return result + (counts.end() - 1).read(queue); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SET_INTERSECTION_HPP algorithm/copy_if.hpp 0000644 00000005016 15125510617 0010700 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_COPY_IF_HPP #define BOOST_COMPUTE_ALGORITHM_COPY_IF_HPP #include <boost/static_assert.hpp> #include <boost/compute/algorithm/transform_if.hpp> #include <boost/compute/functional/identity.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { // like the copy_if() algorithm but writes the indices of the values for which // predicate returns true. template<class InputIterator, class OutputIterator, class Predicate> inline OutputIterator copy_index_if(InputIterator first, InputIterator last, OutputIterator result, Predicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type T; return detail::transform_if_impl( first, last, result, identity<T>(), predicate, true, queue ); } } // end detail namespace /// Copies each element in the range [\p first, \p last) for which /// \p predicate returns \c true to the range beginning at \p result. /// /// Space complexity: \Omega(2n) template<class InputIterator, class OutputIterator, class Predicate> inline OutputIterator copy_if(InputIterator first, InputIterator last, OutputIterator result, Predicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type T; return ::boost::compute::transform_if( first, last, result, identity<T>(), predicate, queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_COPY_IF_HPP algorithm/rotate_copy.hpp 0000644 00000002771 15125510617 0011605 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_ROTATE_COPY_HPP #define BOOST_COMPUTE_ALGORITHM_ROTATE_COPY_HPP #include <boost/compute/system.hpp> #include <boost/compute/algorithm/copy.hpp> namespace boost { namespace compute { /// Performs left rotation such that element at n_first comes to the /// beginning and the output is stored in range starting at result. /// /// Space complexity: \Omega(1) /// /// \see rotate() template<class InputIterator, class OutputIterator> inline void rotate_copy(InputIterator first, InputIterator n_first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { size_t count = detail::iterator_range_size(first, n_first); size_t count2 = detail::iterator_range_size(n_first, last); ::boost::compute::copy(first+count, last, result, queue); ::boost::compute::copy(first, first+count, result+count2, queue); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_ROTATE_COPY_HPP algorithm/inplace_merge.hpp 0000644 00000003777 15125510617 0012056 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_INPLACE_MERGE_HPP #define BOOST_COMPUTE_ALGORITHM_INPLACE_MERGE_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/merge.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Merges the sorted values in the range [\p first, \p middle) with /// the sorted values in the range [\p middle, \p last) in-place. /// /// Space complexity: \Omega(n) template<class Iterator> inline void inplace_merge(Iterator first, Iterator middle, Iterator last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<Iterator>::value); BOOST_ASSERT(first < middle && middle < last); typedef typename std::iterator_traits<Iterator>::value_type T; const context &context = queue.get_context(); ptrdiff_t left_size = std::distance(first, middle); ptrdiff_t right_size = std::distance(middle, last); vector<T> left(left_size, context); vector<T> right(right_size, context); copy(first, middle, left.begin(), queue); copy(middle, last, right.begin(), queue); ::boost::compute::merge( left.begin(), left.end(), right.begin(), right.end(), first, queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_INPLACE_MERGE_HPP algorithm/all_of.hpp 0000644 00000002623 15125510617 0010505 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_ALL_OF_HPP #define BOOST_COMPUTE_ALGORITHM_ALL_OF_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/algorithm/find_if_not.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns \c true if \p predicate returns \c true for all of the elements in /// the range [\p first, \p last). /// /// Space complexity: \Omega(1) /// /// \see any_of(), none_of() template<class InputIterator, class UnaryPredicate> inline bool all_of(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); return ::boost::compute::find_if_not(first, last, predicate, queue) == last; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_ALL_OF_HPP algorithm/search.hpp 0000644 00000005627 15125510617 0010525 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SEARCH_HPP #define BOOST_COMPUTE_ALGORITHM_SEARCH_HPP #include <boost/static_assert.hpp> #include <boost/compute/algorithm/detail/search_all.hpp> #include <boost/compute/algorithm/find.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/system.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// /// \brief Substring matching algorithm /// /// Searches for the first match of the pattern [p_first, p_last) /// in text [t_first, t_last). /// \return Iterator pointing to beginning of first occurrence /// /// \param t_first Iterator pointing to start of text /// \param t_last Iterator pointing to end of text /// \param p_first Iterator pointing to start of pattern /// \param p_last Iterator pointing to end of pattern /// \param queue Queue on which to execute /// /// Space complexity: \Omega(distance(\p t_first, \p t_last)) template<class TextIterator, class PatternIterator> inline TextIterator search(TextIterator t_first, TextIterator t_last, PatternIterator p_first, PatternIterator p_last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<TextIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<PatternIterator>::value); // there is no need to check if pattern starts at last n - 1 indices vector<uint_> matching_indices( detail::iterator_range_size(t_first, t_last) - detail::iterator_range_size(p_first, p_last) + 1, queue.get_context() ); // search_kernel puts value 1 at every index in vector where pattern starts at detail::search_kernel<PatternIterator, TextIterator, vector<uint_>::iterator> kernel; kernel.set_range(p_first, p_last, t_first, t_last, matching_indices.begin()); kernel.exec(queue); vector<uint_>::iterator index = ::boost::compute::find( matching_indices.begin(), matching_indices.end(), uint_(1), queue ); // pattern was not found if(index == matching_indices.end()) return t_last; return t_first + detail::iterator_range_size(matching_indices.begin(), index); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SEARCH_HPP algorithm/remove.hpp 0000644 00000003722 15125510617 0010547 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REMOVE_HPP #define BOOST_COMPUTE_ALGORITHM_REMOVE_HPP #include <boost/static_assert.hpp> #include <boost/compute/lambda.hpp> #include <boost/compute/system.hpp> #include <boost/compute/algorithm/remove_if.hpp> #include <boost/compute/type_traits/vector_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Removes each element equal to \p value in the range [\p first, /// \p last). /// /// Space complexity: \Omega(3n) /// /// \see remove_if() template<class Iterator, class T> inline Iterator remove(Iterator first, Iterator last, const T &value, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<Iterator>::value); typedef typename std::iterator_traits<Iterator>::value_type value_type; using ::boost::compute::_1; using ::boost::compute::lambda::all; if(vector_size<value_type>::value == 1){ return ::boost::compute::remove_if(first, last, _1 == value, queue); } else { return ::boost::compute::remove_if(first, last, all(_1 == value), queue); } } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REMOVE_HPP algorithm/partition.hpp 0000644 00000002763 15125510617 0011267 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_PARTITION_HPP #define BOOST_COMPUTE_ALGORITHM_PARTITION_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/stable_partition.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// /// Partitions the elements in the range [\p first, \p last) according to /// \p predicate. Order of the elements need not be preserved. /// /// Space complexity: \Omega(3n) /// /// \see is_partitioned() and stable_partition() /// template<class Iterator, class UnaryPredicate> inline Iterator partition(Iterator first, Iterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<Iterator>::value); return stable_partition(first, last, predicate, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_PARTITION_HPP algorithm/find.hpp 0000644 00000003724 15125510617 0010174 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FIND_HPP #define BOOST_COMPUTE_ALGORITHM_FIND_HPP #include <boost/static_assert.hpp> #include <boost/compute/lambda.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/find_if.hpp> #include <boost/compute/type_traits/vector_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns an iterator pointing to the first element in the range /// [\p first, \p last) that equals \p value. /// /// Space complexity: \Omega(1) template<class InputIterator, class T> inline InputIterator find(InputIterator first, InputIterator last, const T &value, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type value_type; using ::boost::compute::_1; using ::boost::compute::lambda::all; if(vector_size<value_type>::value == 1){ return ::boost::compute::find_if( first, last, _1 == value, queue ); } else { return ::boost::compute::find_if( first, last, all(_1 == value), queue ); } } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FIND_HPP algorithm/adjacent_difference.hpp 0000644 00000010500 15125510617 0013165 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_ADJACENT_DIFFERENCE_HPP #define BOOST_COMPUTE_ALGORITHM_ADJACENT_DIFFERENCE_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/functional/operator.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class OutputIterator, class BinaryFunction> inline OutputIterator dispatch_adjacent_difference(InputIterator first, InputIterator last, OutputIterator result, BinaryFunction op, command_queue &queue = system::default_queue()) { size_t count = detail::iterator_range_size(first, last); detail::meta_kernel k("adjacent_difference"); k << "const uint i = get_global_id(0);\n" << "if(i == 0){\n" << " " << result[k.var<uint_>("0")] << " = " << first[k.var<uint_>("0")] << ";\n" << "}\n" << "else {\n" << " " << result[k.var<uint_>("i")] << " = " << op(first[k.var<uint_>("i")], first[k.var<uint_>("i-1")]) << ";\n" << "}\n"; k.exec_1d(queue, 0, count, 1); return result + count; } } // end detail namespace /// Stores the difference of each pair of consecutive values in the range /// [\p first, \p last) to the range beginning at \p result. If \p op is not /// provided, \c minus<T> is used. /// /// \param first first element in the input range /// \param last last element in the input range /// \param result first element in the output range /// \param op binary difference function /// \param queue command queue to perform the operation /// /// \return \c OutputIterator to the end of the result range /// /// Space complexity: \Omega(1)<br> /// Space complexity when \p result == \p first: \Omega(n) /// /// \see adjacent_find() template<class InputIterator, class OutputIterator, class BinaryFunction> inline OutputIterator adjacent_difference(InputIterator first, InputIterator last, OutputIterator result, BinaryFunction op, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type value_type; if(first == last) { return result; } if (first == result) { vector<value_type> temp(detail::iterator_range_size(first, last), queue.get_context()); copy(first, last, temp.begin(), queue); return ::boost::compute::detail::dispatch_adjacent_difference( temp.begin(), temp.end(), result, op, queue ); } else { return ::boost::compute::detail::dispatch_adjacent_difference( first, last, result, op, queue ); } } /// \overload template<class InputIterator, class OutputIterator> inline OutputIterator adjacent_difference(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type value_type; return ::boost::compute::adjacent_difference( first, last, result, ::boost::compute::minus<value_type>(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_ADJACENT_DIFFERENCE_HPP algorithm/replace.hpp 0000644 00000005034 15125510617 0010663 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REPLACE_HPP #define BOOST_COMPUTE_ALGORITHM_REPLACE_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class Iterator, class T> class replace_kernel : public meta_kernel { public: replace_kernel() : meta_kernel("replace") { m_count = 0; } void set_range(Iterator first, Iterator last) { m_count = detail::iterator_range_size(first, last); *this << "const uint i = get_global_id(0);\n" << "if(" << first[var<cl_uint>("i")] << " == " << var<T>("old_value") << ")\n" << " " << first[var<cl_uint>("i")] << '=' << var<T>("new_value") << ";\n"; } void set_old_value(const T &old_value) { add_set_arg<T>("old_value", old_value); } void set_new_value(const T &new_value) { add_set_arg<T>("new_value", new_value); } void exec(command_queue &queue) { if(m_count == 0){ // nothing to do return; } exec_1d(queue, 0, m_count); } private: size_t m_count; }; } // end detail namespace /// Replaces each instance of \p old_value in the range [\p first, /// \p last) with \p new_value. /// /// Space complexity: \Omega(1) template<class Iterator, class T> inline void replace(Iterator first, Iterator last, const T &old_value, const T &new_value, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<Iterator>::value); detail::replace_kernel<Iterator, T> kernel; kernel.set_range(first, last); kernel.set_old_value(old_value); kernel.set_new_value(new_value); kernel.exec(queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REPLACE_HPP algorithm/sort_by_key.hpp 0000644 00000014014 15125510617 0011577 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SORT_BY_KEY_HPP #define BOOST_COMPUTE_ALGORITHM_SORT_BY_KEY_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/utility/enable_if.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/detail/merge_sort_on_cpu.hpp> #include <boost/compute/algorithm/detail/merge_sort_on_gpu.hpp> #include <boost/compute/algorithm/detail/insertion_sort.hpp> #include <boost/compute/algorithm/detail/radix_sort.hpp> #include <boost/compute/algorithm/reverse.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class KeyIterator, class ValueIterator> inline void dispatch_gpu_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, less<typename std::iterator_traits<KeyIterator>::value_type> compare, command_queue &queue, typename boost::enable_if_c< is_radix_sortable< typename std::iterator_traits<KeyIterator>::value_type >::value >::type* = 0) { size_t count = detail::iterator_range_size(keys_first, keys_last); if(count < 32){ detail::serial_insertion_sort_by_key( keys_first, keys_last, values_first, compare, queue ); } else { detail::radix_sort_by_key( keys_first, keys_last, values_first, queue ); } } template<class KeyIterator, class ValueIterator> inline void dispatch_gpu_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, greater<typename std::iterator_traits<KeyIterator>::value_type> compare, command_queue &queue, typename boost::enable_if_c< is_radix_sortable< typename std::iterator_traits<KeyIterator>::value_type >::value >::type* = 0) { size_t count = detail::iterator_range_size(keys_first, keys_last); if(count < 32){ detail::serial_insertion_sort_by_key( keys_first, keys_last, values_first, compare, queue ); } else { // radix sorts in descending order detail::radix_sort_by_key( keys_first, keys_last, values_first, false, queue ); } } template<class KeyIterator, class ValueIterator, class Compare> inline void dispatch_gpu_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, command_queue &queue) { size_t count = detail::iterator_range_size(keys_first, keys_last); if(count < 32){ detail::serial_insertion_sort_by_key( keys_first, keys_last, values_first, compare, queue ); } else { detail::merge_sort_by_key_on_gpu( keys_first, keys_last, values_first, compare, queue ); } } template<class KeyIterator, class ValueIterator, class Compare> inline void dispatch_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, command_queue &queue) { if(queue.get_device().type() & device::gpu) { dispatch_gpu_sort_by_key(keys_first, keys_last, values_first, compare, queue); return; } ::boost::compute::detail::merge_sort_by_key_on_cpu( keys_first, keys_last, values_first, compare, queue ); } } // end detail namespace /// Performs a key-value sort using the keys in the range [\p keys_first, /// \p keys_last) on the values in the range [\p values_first, /// \p values_first \c + (\p keys_last \c - \p keys_first)) using \p compare. /// /// If no compare function is specified, \c less is used. /// /// Space complexity: \Omega(2n) /// /// \see sort() template<class KeyIterator, class ValueIterator, class Compare> inline void sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<KeyIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<ValueIterator>::value); ::boost::compute::detail::dispatch_sort_by_key( keys_first, keys_last, values_first, compare, queue ); } /// \overload template<class KeyIterator, class ValueIterator> inline void sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<KeyIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<ValueIterator>::value); typedef typename std::iterator_traits<KeyIterator>::value_type key_type; ::boost::compute::sort_by_key( keys_first, keys_last, values_first, less<key_type>(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SORT_BY_KEY_HPP algorithm/equal_range.hpp 0000644 00000003144 15125510617 0011533 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_EQUAL_RANGE_HPP #define BOOST_COMPUTE_ALGORITHM_EQUAL_RANGE_HPP #include <utility> #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/lower_bound.hpp> #include <boost/compute/algorithm/upper_bound.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns a pair of iterators containing the range of values equal /// to \p value in the sorted range [\p first, \p last). /// /// Space complexity: \Omega(1) template<class InputIterator, class T> inline std::pair<InputIterator, InputIterator> equal_range(InputIterator first, InputIterator last, const T &value, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); return std::make_pair( ::boost::compute::lower_bound(first, last, value, queue), ::boost::compute::upper_bound(first, last, value, queue) ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_EQUAL_RANGE_HPP algorithm/set_symmetric_difference.hpp 0000644 00000016555 15125510617 0014323 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SET_SYMMETRIC_DIFFERENCE_HPP #define BOOST_COMPUTE_ALGORITHM_SET_SYMMETRIC_DIFFERENCE_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/algorithm/detail/compact.hpp> #include <boost/compute/algorithm/detail/balanced_path.hpp> #include <boost/compute/algorithm/exclusive_scan.hpp> #include <boost/compute/algorithm/fill_n.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/system.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { /// /// \brief Serial set symmetric difference kernel class /// /// Subclass of meta_kernel to perform serial set symmetric /// difference after tiling /// class serial_set_symmetric_difference_kernel : meta_kernel { public: unsigned int tile_size; serial_set_symmetric_difference_kernel() : meta_kernel("set_symmetric_difference") { tile_size = 4; } template<class InputIterator1, class InputIterator2, class InputIterator3, class InputIterator4, class OutputIterator1, class OutputIterator2> void set_range(InputIterator1 first1, InputIterator2 first2, InputIterator3 tile_first1, InputIterator3 tile_last1, InputIterator4 tile_first2, OutputIterator1 result, OutputIterator2 counts) { m_count = iterator_range_size(tile_first1, tile_last1) - 1; *this << "uint i = get_global_id(0);\n" << "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" << "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" << "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" << "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" << "uint index = i*" << tile_size << ";\n" << "uint count = 0;\n" << "while(start1<end1 && start2<end2)\n" << "{\n" << " if(" << first1[expr<uint_>("start1")] << " == " << first2[expr<uint_>("start2")] << ")\n" << " {\n" << " start1++; start2++;\n" << " }\n" << " else if(" << first1[expr<uint_>("start1")] << " < " << first2[expr<uint_>("start2")] << ")\n" << " {\n" << result[expr<uint_>("index")] << " = " << first1[expr<uint_>("start1")] << ";\n" << " index++; count++;\n" << " start1++;\n" << " }\n" << " else\n" << " {\n" << result[expr<uint_>("index")] << " = " << first2[expr<uint_>("start2")] << ";\n" << " index++; count++;\n" << " start2++;\n" << " }\n" << "}\n" << "while(start1<end1)\n" << "{\n" << result[expr<uint_>("index")] << " = " << first1[expr<uint_>("start1")] << ";\n" << " index++; count++;\n" << " start1++;\n" << "}\n" << "while(start2<end2)\n" << "{\n" << result[expr<uint_>("index")] << " = " << first2[expr<uint_>("start2")] << ";\n" << " index++; count++;\n" << " start2++;\n" << "}\n" << counts[expr<uint_>("i")] << " = count;\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; } //end detail namespace /// /// \brief Set symmetric difference algorithm /// /// Finds the symmetric difference of the sorted range [first2, last2) from /// the sorted range [first1, last1) and stores it in range starting at result /// \return Iterator pointing to end of symmetric difference /// /// \param first1 Iterator pointing to start of first set /// \param last1 Iterator pointing to end of first set /// \param first2 Iterator pointing to start of second set /// \param last2 Iterator pointing to end of second set /// \param result Iterator pointing to start of range in which the symmetric /// difference will be stored /// \param queue Queue on which to execute /// /// Space complexity: /// \Omega(2(distance(\p first1, \p last1) + distance(\p first2, \p last2))) template<class InputIterator1, class InputIterator2, class OutputIterator> inline OutputIterator set_symmetric_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<InputIterator1>::value_type value_type; int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::balanced_path_kernel tiling_kernel; tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); fill_n(tile_b.begin(), 1, 0, queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, count1, queue); fill_n(tile_b.end()-1, 1, count2, queue); vector<value_type> temp_result(count1+count2, queue.get_context()); vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); fill_n(counts.end()-1, 1, 0, queue); // Find individual symmetric differences detail::serial_set_symmetric_difference_kernel symmetric_difference_kernel; symmetric_difference_kernel.tile_size = tile_size; symmetric_difference_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), temp_result.begin(), counts.begin()); symmetric_difference_kernel.exec(queue); exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue); // Compact the results detail::compact_kernel compact_kernel; compact_kernel.tile_size = tile_size; compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); compact_kernel.exec(queue); return result + (counts.end() - 1).read(queue); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SET_SYMMETRIC_DIFFERENCE_HPP algorithm/adjacent_find.hpp 0000644 00000012301 15125510617 0012014 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_ADJACENT_FIND_HPP #define BOOST_COMPUTE_ALGORITHM_ADJACENT_FIND_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/lambda.hpp> #include <boost/compute/system.hpp> #include <boost/compute/container/detail/scalar.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/functional/operator.hpp> #include <boost/compute/type_traits/vector_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class Compare> inline InputIterator serial_adjacent_find(InputIterator first, InputIterator last, Compare compare, command_queue &queue) { if(first == last){ return last; } const context &context = queue.get_context(); detail::scalar<uint_> output(context); detail::meta_kernel k("serial_adjacent_find"); size_t size_arg = k.add_arg<const uint_>("size"); size_t output_arg = k.add_arg<uint_ *>(memory_object::global_memory, "output"); k << k.decl<uint_>("result") << " = size;\n" << "for(uint i = 0; i < size - 1; i++){\n" << " if(" << compare(first[k.expr<uint_>("i")], first[k.expr<uint_>("i+1")]) << "){\n" << " result = i;\n" << " break;\n" << " }\n" << "}\n" << "*output = result;\n"; k.set_arg<const uint_>( size_arg, static_cast<uint_>(detail::iterator_range_size(first, last)) ); k.set_arg(output_arg, output.get_buffer()); k.exec_1d(queue, 0, 1, 1); return first + output.read(queue); } template<class InputIterator, class Compare> inline InputIterator adjacent_find_with_atomics(InputIterator first, InputIterator last, Compare compare, command_queue &queue) { if(first == last){ return last; } const context &context = queue.get_context(); size_t count = detail::iterator_range_size(first, last); // initialize output to the last index detail::scalar<uint_> output(context); output.write(static_cast<uint_>(count), queue); detail::meta_kernel k("adjacent_find_with_atomics"); size_t output_arg = k.add_arg<uint_ *>(memory_object::global_memory, "output"); k << "const uint i = get_global_id(0);\n" << "if(" << compare(first[k.expr<uint_>("i")], first[k.expr<uint_>("i+1")]) << "){\n" << " atomic_min(output, i);\n" << "}\n"; k.set_arg(output_arg, output.get_buffer()); k.exec_1d(queue, 0, count - 1, 1); return first + output.read(queue); } } // end detail namespace /// Searches the range [\p first, \p last) for two identical adjacent /// elements and returns an iterator pointing to the first. /// /// \param first first element in the range to search /// \param last last element in the range to search /// \param compare binary comparison function /// \param queue command queue to perform the operation /// /// \return \c InputIteratorm to the first element which compares equal /// to the following element. If none are equal, returns \c last. /// /// Space complexity: \Omega(1) /// /// \see find(), adjacent_difference() template<class InputIterator, class Compare> inline InputIterator adjacent_find(InputIterator first, InputIterator last, Compare compare, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); size_t count = detail::iterator_range_size(first, last); if(count < 32){ return detail::serial_adjacent_find(first, last, compare, queue); } else { return detail::adjacent_find_with_atomics(first, last, compare, queue); } } /// \overload template<class InputIterator> inline InputIterator adjacent_find(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type value_type; using ::boost::compute::lambda::_1; using ::boost::compute::lambda::_2; using ::boost::compute::lambda::all; if(vector_size<value_type>::value == 1){ return ::boost::compute::adjacent_find( first, last, _1 == _2, queue ); } else { return ::boost::compute::adjacent_find( first, last, all(_1 == _2), queue ); } } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_ADJACENT_FIND_HPP algorithm/is_sorted.hpp 0000644 00000004604 15125510617 0011245 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_IS_SORTED_HPP #define BOOST_COMPUTE_ALGORITHM_IS_SORTED_HPP #include <boost/static_assert.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/system.hpp> #include <boost/compute/functional/bind.hpp> #include <boost/compute/functional/operator.hpp> #include <boost/compute/algorithm/adjacent_find.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns \c true if the values in the range [\p first, \p last) /// are in sorted order. /// /// \param first first element in the range to check /// \param last last element in the range to check /// \param compare comparison function (by default \c less) /// \param queue command queue to perform the operation /// /// \return \c true if the range [\p first, \p last) is sorted /// /// Space complexity: \Omega(1) /// /// \see sort() template<class InputIterator, class Compare> inline bool is_sorted(InputIterator first, InputIterator last, Compare compare, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); using ::boost::compute::placeholders::_1; using ::boost::compute::placeholders::_2; return ::boost::compute::adjacent_find( first, last, ::boost::compute::bind(compare, _2, _1), queue ) == last; } /// \overload template<class InputIterator> inline bool is_sorted(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type value_type; return ::boost::compute::is_sorted( first, last, ::boost::compute::less<value_type>(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_IS_SORTED_HPP algorithm/search_n.hpp 0000644 00000010514 15125510617 0011031 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_N_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_N_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/algorithm/find.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/system.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { /// /// \brief Search kernel class /// /// Subclass of meta_kernel which is capable of performing search_n /// template<class TextIterator, class OutputIterator> class search_n_kernel : public meta_kernel { public: typedef typename std::iterator_traits<TextIterator>::value_type value_type; search_n_kernel() : meta_kernel("search_n") {} void set_range(TextIterator t_first, TextIterator t_last, value_type value, size_t n, OutputIterator result) { m_n = n; m_n_arg = add_arg<uint_>("n"); m_value = value; m_value_arg = add_arg<value_type>("value"); m_count = iterator_range_size(t_first, t_last); m_count = m_count + 1 - m_n; *this << "uint i = get_global_id(0);\n" << "uint i1 = i;\n" << "uint j;\n" << "for(j = 0; j<n; j++,i++)\n" << "{\n" << " if(value != " << t_first[expr<uint_>("i")] << ")\n" << " j = n + 1;\n" << "}\n" << "if(j == n)\n" << result[expr<uint_>("i1")] << " = 1;\n" << "else\n" << result[expr<uint_>("i1")] << " = 0;\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } set_arg(m_n_arg, uint_(m_n)); set_arg(m_value_arg, m_value); return exec_1d(queue, 0, m_count); } private: size_t m_n; size_t m_n_arg; size_t m_count; value_type m_value; size_t m_value_arg; }; } //end detail namespace /// /// \brief Substring matching algorithm /// /// Searches for the first occurrence of n consecutive occurrences of /// value in text [t_first, t_last). /// \return Iterator pointing to beginning of first occurrence /// /// \param t_first Iterator pointing to start of text /// \param t_last Iterator pointing to end of text /// \param n Number of times value repeats /// \param value Value which repeats /// \param queue Queue on which to execute /// /// Space complexity: \Omega(distance(\p t_first, \p t_last)) template<class TextIterator, class ValueType> inline TextIterator search_n(TextIterator t_first, TextIterator t_last, size_t n, ValueType value, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<TextIterator>::value); // there is no need to check if pattern starts at last n - 1 indices vector<uint_> matching_indices( detail::iterator_range_size(t_first, t_last) + 1 - n, queue.get_context() ); // search_n_kernel puts value 1 at every index in vector where pattern // of n values starts at detail::search_n_kernel<TextIterator, vector<uint_>::iterator> kernel; kernel.set_range(t_first, t_last, value, n, matching_indices.begin()); kernel.exec(queue); vector<uint_>::iterator index = ::boost::compute::find( matching_indices.begin(), matching_indices.end(), uint_(1), queue ); // pattern was not found if(index == matching_indices.end()) return t_last; return t_first + detail::iterator_range_size(matching_indices.begin(), index); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_N_HPP algorithm/rotate.hpp 0000644 00000003425 15125510617 0010550 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_ROTATE_HPP #define BOOST_COMPUTE_ALGORITHM_ROTATE_HPP #include <boost/compute/system.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/container/vector.hpp> namespace boost { namespace compute { /// Performs left rotation such that element at \p n_first comes to the /// beginning. /// /// Space complexity: \Omega(distance(\p first, \p last)) /// /// \see rotate_copy() template<class InputIterator> inline void rotate(InputIterator first, InputIterator n_first, InputIterator last, command_queue &queue = system::default_queue()) { //Handle trivial cases if (n_first==first || n_first==last) { return; } //Handle others typedef typename std::iterator_traits<InputIterator>::value_type T; size_t count = detail::iterator_range_size(first, n_first); size_t count2 = detail::iterator_range_size(first, last); const context &context = queue.get_context(); vector<T> temp(count2, context); ::boost::compute::copy(first, last, temp.begin(), queue); ::boost::compute::copy(temp.begin()+count, temp.end(), first, queue); ::boost::compute::copy(temp.begin(), temp.begin()+count, last-count, queue); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_ROTATE_HPP algorithm/remove_if.hpp 0000644 00000003472 15125510617 0011227 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REMOVE_IF_HPP #define BOOST_COMPUTE_ALGORITHM_REMOVE_IF_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/algorithm/copy_if.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/functional/logical.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Removes each element for which \p predicate returns \c true in the /// range [\p first, \p last). /// /// Space complexity: \Omega(3n) /// /// \see remove() template<class Iterator, class Predicate> inline Iterator remove_if(Iterator first, Iterator last, Predicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<Iterator>::value); typedef typename std::iterator_traits<Iterator>::value_type value_type; // temporary storage for the input data ::boost::compute::vector<value_type> tmp(first, last, queue); return ::boost::compute::copy_if(tmp.begin(), tmp.end(), first, not1(predicate), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REMOVE_IF_HPP algorithm/stable_sort.hpp 0000644 00000007341 15125510617 0011574 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_STABLE_SORT_HPP #define BOOST_COMPUTE_ALGORITHM_STABLE_SORT_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/detail/merge_sort_on_cpu.hpp> #include <boost/compute/algorithm/detail/merge_sort_on_gpu.hpp> #include <boost/compute/algorithm/detail/radix_sort.hpp> #include <boost/compute/algorithm/detail/insertion_sort.hpp> #include <boost/compute/algorithm/reverse.hpp> #include <boost/compute/functional/operator.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class Iterator, class Compare> inline void dispatch_gpu_stable_sort(Iterator first, Iterator last, Compare compare, command_queue &queue) { size_t count = detail::iterator_range_size(first, last); if(count < 32){ detail::serial_insertion_sort( first, last, compare, queue ); } else { detail::merge_sort_on_gpu( first, last, compare, true /* stable */, queue ); } } template<class T> inline typename boost::enable_if_c<is_radix_sortable<T>::value>::type dispatch_gpu_stable_sort(buffer_iterator<T> first, buffer_iterator<T> last, less<T>, command_queue &queue) { ::boost::compute::detail::radix_sort(first, last, queue); } template<class T> inline typename boost::enable_if_c<is_radix_sortable<T>::value>::type dispatch_gpu_stable_sort(buffer_iterator<T> first, buffer_iterator<T> last, greater<T>, command_queue &queue) { // radix sorts in descending order ::boost::compute::detail::radix_sort(first, last, false, queue); } } // end detail namespace /// Sorts the values in the range [\p first, \p last) according to /// \p compare. The relative order of identical values is preserved. /// /// Space complexity: \Omega(n) /// /// \see sort(), is_sorted() template<class Iterator, class Compare> inline void stable_sort(Iterator first, Iterator last, Compare compare, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<Iterator>::value); if(queue.get_device().type() & device::gpu) { ::boost::compute::detail::dispatch_gpu_stable_sort( first, last, compare, queue ); return; } ::boost::compute::detail::merge_sort_on_cpu(first, last, compare, queue); } /// \overload template<class Iterator> inline void stable_sort(Iterator first, Iterator last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<Iterator>::value); typedef typename std::iterator_traits<Iterator>::value_type value_type; ::boost::compute::less<value_type> less; ::boost::compute::stable_sort(first, last, less, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_STABLE_SORT_HPP algorithm/unique_copy.hpp 0000644 00000014435 15125510617 0011615 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_UNIQUE_COPY_HPP #define BOOST_COMPUTE_ALGORITHM_UNIQUE_COPY_HPP #include <boost/static_assert.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/lambda.hpp> #include <boost/compute/system.hpp> #include <boost/compute/algorithm/copy_if.hpp> #include <boost/compute/algorithm/transform.hpp> #include <boost/compute/algorithm/gather.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/functional/operator.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class OutputIterator, class BinaryPredicate> inline OutputIterator serial_unique_copy(InputIterator first, InputIterator last, OutputIterator result, BinaryPredicate op, command_queue &queue) { if(first == last){ return result; } typedef typename std::iterator_traits<InputIterator>::value_type value_type; const context &context = queue.get_context(); size_t count = detail::iterator_range_size(first, last); detail::meta_kernel k("serial_unique_copy"); vector<uint_> unique_count_vector(1, context); size_t size_arg = k.add_arg<const uint_>("size"); size_t unique_count_arg = k.add_arg<uint_ *>(memory_object::global_memory, "unique_count"); k << k.decl<uint_>("index") << " = 0;\n" << k.decl<value_type>("current") << " = " << first[k.var<uint_>("0")] << ";\n" << result[k.var<uint_>("0")] << " = current;\n" << "for(uint i = 1; i < size; i++){\n" << " " << k.decl<value_type>("next") << " = " << first[k.var<uint_>("i")] << ";\n" << " if(!" << op(k.var<value_type>("current"), k.var<value_type>("next")) << "){\n" << " " << result[k.var<uint_>("++index")] << " = next;\n" << " " << "current = next;\n" << " }\n" << "}\n" << "*unique_count = index + 1;\n"; k.set_arg<const uint_>(size_arg, count); k.set_arg(unique_count_arg, unique_count_vector.get_buffer()); k.exec_1d(queue, 0, 1, 1); uint_ unique_count; copy_n(unique_count_vector.begin(), 1, &unique_count, queue); return result + unique_count; } template<class InputIterator, class OutputIterator, class BinaryPredicate> inline OutputIterator unique_copy(InputIterator first, InputIterator last, OutputIterator result, BinaryPredicate op, command_queue &queue) { if(first == last){ return result; } const context &context = queue.get_context(); size_t count = detail::iterator_range_size(first, last); // flags marking unique elements vector<uint_> flags(count, context); // find each unique element and mark it with a one transform( first, last - 1, first + 1, flags.begin() + 1, not2(op), queue ); // first element is always unique fill_n(flags.begin(), 1, 1, queue); // storage for desination indices vector<uint_> indices(count, context); // copy indices for each unique element vector<uint_>::iterator last_index = detail::copy_index_if( flags.begin(), flags.end(), indices.begin(), lambda::_1 == 1, queue ); // copy unique values from input to output using the computed indices gather(indices.begin(), last_index, first, result, queue); // return an iterator to the end of the unique output range return result + std::distance(indices.begin(), last_index); } } // end detail namespace /// Makes a copy of the range [first, last) and removes all consecutive /// duplicate elements (determined by \p op) from the copy. If \p op is not /// provided, the equality operator is used. /// /// \param first first element in the input range /// \param last last element in the input range /// \param result first element in the result range /// \param op binary operator used to check for uniqueness /// \param queue command queue to perform the operation /// /// \return \c OutputIterator to the end of the result range /// /// Space complexity: \Omega(4n) /// /// \see unique() template<class InputIterator, class OutputIterator, class BinaryPredicate> inline OutputIterator unique_copy(InputIterator first, InputIterator last, OutputIterator result, BinaryPredicate op, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); size_t count = detail::iterator_range_size(first, last); if(count < 32){ return detail::serial_unique_copy(first, last, result, op, queue); } else { return detail::unique_copy(first, last, result, op, queue); } } /// \overload template<class InputIterator, class OutputIterator> inline OutputIterator unique_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type value_type; return ::boost::compute::unique_copy( first, last, result, ::boost::compute::equal_to<value_type>(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_UNIQUE_COPY_HPP algorithm/find_if_not.hpp 0000644 00000003164 15125510617 0011530 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FIND_IF_NOT_HPP #define BOOST_COMPUTE_ALGORITHM_FIND_IF_NOT_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/find_if.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns an iterator pointing to the first element in the range /// [\p first, \p last) for which \p predicate returns \c false. /// /// Space complexity: \Omega(1) /// /// \see find_if() template<class InputIterator, class UnaryPredicate> inline InputIterator find_if_not(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); return ::boost::compute::find_if( first, last, not1(predicate), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FIND_IF_NOT_HPP algorithm/lexicographical_compare.hpp 0000644 00000011526 15125510617 0014117 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Mageswaran.D <mageswaran1989@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/context.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/any_of.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/utility/program_cache.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { const char lexicographical_compare_source[] = "__kernel void lexicographical_compare(const uint size1,\n" " const uint size2,\n" " __global const T1 *range1,\n" " __global const T2 *range2,\n" " __global bool *result_buf)\n" "{\n" " const uint i = get_global_id(0);\n" " if((i != size1) && (i != size2)){\n" //Individual elements are compared and results are stored in parallel. //0 is true " if(range1[i] < range2[i])\n" " result_buf[i] = 0;\n" " else\n" " result_buf[i] = 1;\n" " }\n" " else\n" " result_buf[i] = !((i == size1) && (i != size2));\n" "}\n"; template<class InputIterator1, class InputIterator2> inline bool dispatch_lexicographical_compare(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, command_queue &queue) { const boost::compute::context &context = queue.get_context(); boost::shared_ptr<program_cache> cache = program_cache::get_global_cache(context); size_t iterator_size1 = iterator_range_size(first1, last1); size_t iterator_size2 = iterator_range_size(first2, last2); size_t max_size = (std::max)(iterator_size1, iterator_size2); if(max_size == 0){ return false; } boost::compute::vector<bool> result_vector(max_size, context); typedef typename std::iterator_traits<InputIterator1>::value_type value_type1; typedef typename std::iterator_traits<InputIterator2>::value_type value_type2; // load (or create) lexicographical compare program std::string cache_key = std::string("__boost_lexicographical_compare") + type_name<value_type1>() + type_name<value_type2>(); std::stringstream options; options << " -DT1=" << type_name<value_type1>(); options << " -DT2=" << type_name<value_type2>(); program lexicographical_compare_program = cache->get_or_build( cache_key, options.str(), lexicographical_compare_source, context ); kernel lexicographical_compare_kernel(lexicographical_compare_program, "lexicographical_compare"); lexicographical_compare_kernel.set_arg<uint_>(0, iterator_size1); lexicographical_compare_kernel.set_arg<uint_>(1, iterator_size2); lexicographical_compare_kernel.set_arg(2, first1.get_buffer()); lexicographical_compare_kernel.set_arg(3, first2.get_buffer()); lexicographical_compare_kernel.set_arg(4, result_vector.get_buffer()); queue.enqueue_1d_range_kernel(lexicographical_compare_kernel, 0, max_size, 0); return boost::compute::any_of(result_vector.begin(), result_vector.end(), _1 == 0, queue); } } // end detail namespace /// Checks if the first range [first1, last1) is lexicographically /// less than the second range [first2, last2). /// /// Space complexity: /// \Omega(max(distance(\p first1, \p last1), distance(\p first2, \p last2))) template<class InputIterator1, class InputIterator2> inline bool lexicographical_compare(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); return detail::dispatch_lexicographical_compare(first1, last1, first2, last2, queue); } } // end compute namespace } // end boost namespac algorithm/detail/compact.hpp 0000644 00000004214 15125510617 0012137 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COMPACT_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COMPACT_HPP #include <iterator> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/system.hpp> namespace boost { namespace compute { namespace detail { /// /// \brief Compact kernel class /// /// Subclass of meta_kernel to compact the result of set kernels to /// get actual sets /// class compact_kernel : public meta_kernel { public: unsigned int tile_size; compact_kernel() : meta_kernel("compact") { tile_size = 4; } template<class InputIterator1, class InputIterator2, class OutputIterator> void set_range(InputIterator1 start, InputIterator2 counts_begin, InputIterator2 counts_end, OutputIterator result) { m_count = iterator_range_size(counts_begin, counts_end) - 1; *this << "uint i = get_global_id(0);\n" << "uint count = i*" << tile_size << ";\n" << "for(uint j = " << counts_begin[expr<uint_>("i")] << "; j<" << counts_begin[expr<uint_>("i+1")] << "; j++, count++)\n" << "{\n" << result[expr<uint_>("j")] << " = " << start[expr<uint_>("count")] << ";\n" << "}\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; } //end detail namespace } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COMPACT_HPP algorithm/detail/merge_path.hpp 0000644 00000007442 15125510617 0012632 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP #include <iterator> #include <boost/compute/algorithm/find_if.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/lambda.hpp> #include <boost/compute/system.hpp> namespace boost { namespace compute { namespace detail { /// /// \brief Merge Path kernel class /// /// Subclass of meta_kernel to break two sets into tiles according /// to their merge path /// class merge_path_kernel : public meta_kernel { public: unsigned int tile_size; merge_path_kernel() : meta_kernel("merge_path") { tile_size = 4; } template<class InputIterator1, class InputIterator2, class OutputIterator1, class OutputIterator2, class Compare> void set_range(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator1 result_a, OutputIterator2 result_b, Compare comp) { m_a_count = iterator_range_size(first1, last1); m_a_count_arg = add_arg<uint_>("a_count"); m_b_count = iterator_range_size(first2, last2); m_b_count_arg = add_arg<uint_>("b_count"); *this << "uint i = get_global_id(0);\n" << "uint target = (i+1)*" << tile_size << ";\n" << "uint start = max(convert_int(0),convert_int(target)-convert_int(b_count));\n" << "uint end = min(target,a_count);\n" << "uint a_index, b_index;\n" << "while(start<end)\n" << "{\n" << " a_index = (start + end)/2;\n" << " b_index = target - a_index - 1;\n" << " if(!(" << comp(first2[expr<uint_>("b_index")], first1[expr<uint_>("a_index")]) << "))\n" << " start = a_index + 1;\n" << " else end = a_index;\n" << "}\n" << result_a[expr<uint_>("i")] << " = start;\n" << result_b[expr<uint_>("i")] << " = target - start;\n"; } template<class InputIterator1, class InputIterator2, class OutputIterator1, class OutputIterator2> void set_range(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator1 result_a, OutputIterator2 result_b) { typedef typename std::iterator_traits<InputIterator1>::value_type value_type; ::boost::compute::less<value_type> less_than; set_range(first1, last1, first2, last2, result_a, result_b, less_than); } event exec(command_queue &queue) { if((m_a_count + m_b_count)/tile_size == 0) { return event(); } set_arg(m_a_count_arg, uint_(m_a_count)); set_arg(m_b_count_arg, uint_(m_b_count)); return exec_1d(queue, 0, (m_a_count + m_b_count)/tile_size); } private: size_t m_a_count; size_t m_a_count_arg; size_t m_b_count; size_t m_b_count_arg; }; } //end detail namespace } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP algorithm/detail/find_extrema.hpp 0000644 00000004611 15125510617 0013157 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_HPP #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/algorithm/detail/find_extrema_on_cpu.hpp> #include <boost/compute/algorithm/detail/find_extrema_with_reduce.hpp> #include <boost/compute/algorithm/detail/find_extrema_with_atomics.hpp> #include <boost/compute/algorithm/detail/serial_find_extrema.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class Compare> inline InputIterator find_extrema(InputIterator first, InputIterator last, Compare compare, const bool find_minimum, command_queue &queue) { size_t count = iterator_range_size(first, last); // handle trivial cases if(count == 0 || count == 1){ return first; } const device &device = queue.get_device(); // CPU if(device.type() & device::cpu) { return find_extrema_on_cpu(first, last, compare, find_minimum, queue); } // GPU // use serial method for small inputs if(count < 512) { return serial_find_extrema(first, last, compare, find_minimum, queue); } // find_extrema_with_reduce() is used only if requirements are met if(find_extrema_with_reduce_requirements_met(first, last, queue)) { return find_extrema_with_reduce(first, last, compare, find_minimum, queue); } // use serial method for OpenCL version 1.0 due to // problems with atomic_cmpxchg() #ifndef BOOST_COMPUTE_CL_VERSION_1_1 return serial_find_extrema(first, last, compare, find_minimum, queue); #endif return find_extrema_with_atomics(first, last, compare, find_minimum, queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_HPP algorithm/detail/random_fill.hpp 0000644 00000003530 15125510617 0012777 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_RANDOM_FILL_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_RANDOM_FILL_HPP #include <iterator> #include <boost/compute/command_queue.hpp> #include <boost/compute/random/default_random_engine.hpp> #include <boost/compute/random/uniform_real_distribution.hpp> namespace boost { namespace compute { namespace detail { template<class OutputIterator, class Generator> inline void random_fill(OutputIterator first, OutputIterator last, Generator &g, command_queue &queue) { g.fill(first, last, queue); } template<class OutputIterator> inline void random_fill(OutputIterator first, OutputIterator last, typename std::iterator_traits<OutputIterator>::value_type lo, typename std::iterator_traits<OutputIterator>::value_type hi, command_queue &queue) { typedef typename std::iterator_traits<OutputIterator>::value_type value_type; typedef typename boost::compute::default_random_engine engine_type; typedef typename boost::compute::uniform_real_distribution<value_type> distribution_type; engine_type engine(queue); distribution_type generator(lo, hi); generator.fill(first, last, engine, queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_RANDOM_FILL_HPP algorithm/detail/count_if_with_threads.hpp 0000644 00000010221 15125510617 0015057 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP #include <numeric> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/container/vector.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class Predicate> class count_if_with_threads_kernel : meta_kernel { public: typedef typename std::iterator_traits<InputIterator>::value_type value_type; count_if_with_threads_kernel() : meta_kernel("count_if_with_threads") { } void set_args(InputIterator first, InputIterator last, Predicate predicate) { typedef typename std::iterator_traits<InputIterator>::value_type T; m_size = detail::iterator_range_size(first, last); m_size_arg = add_arg<const ulong_>("size"); m_counts_arg = add_arg<ulong_ *>(memory_object::global_memory, "counts"); *this << // thread parameters "const uint gid = get_global_id(0);\n" << "const uint block_size = size / get_global_size(0);\n" << "const uint start = block_size * gid;\n" << "uint end = 0;\n" << "if(gid == get_global_size(0) - 1)\n" << " end = size;\n" << "else\n" << " end = block_size * gid + block_size;\n" << // count values "uint count = 0;\n" << "for(uint i = start; i < end; i++){\n" << decl<const T>("value") << "=" << first[expr<uint_>("i")] << ";\n" << if_(predicate(var<const T>("value"))) << "{\n" << "count++;\n" << "}\n" << "}\n" << // write count "counts[gid] = count;\n"; } size_t exec(command_queue &queue) { const device &device = queue.get_device(); const context &context = queue.get_context(); size_t threads = device.compute_units(); const size_t minimum_block_size = 2048; if(m_size / threads < minimum_block_size){ threads = static_cast<size_t>( (std::max)( std::ceil(float(m_size) / minimum_block_size), 1.0f ) ); } // storage for counts ::boost::compute::vector<ulong_> counts(threads, context); // exec kernel set_arg(m_size_arg, static_cast<ulong_>(m_size)); set_arg(m_counts_arg, counts.get_buffer()); exec_1d(queue, 0, threads, 1); // copy counts to the host std::vector<ulong_> host_counts(threads); ::boost::compute::copy(counts.begin(), counts.end(), host_counts.begin(), queue); // return sum of counts return std::accumulate(host_counts.begin(), host_counts.end(), size_t(0)); } private: size_t m_size; size_t m_size_arg; size_t m_counts_arg; }; // counts values that match the predicate using one thread per block. this is // optimized for cpu-type devices with a small number of compute units. template<class InputIterator, class Predicate> inline size_t count_if_with_threads(InputIterator first, InputIterator last, Predicate predicate, command_queue &queue) { count_if_with_threads_kernel<InputIterator, Predicate> kernel; kernel.set_args(first, last, predicate); return kernel.exec(queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP algorithm/detail/balanced_path.hpp 0000644 00000014173 15125510617 0013263 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP #include <iterator> #include <boost/compute/algorithm/find_if.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/lambda.hpp> #include <boost/compute/system.hpp> namespace boost { namespace compute { namespace detail { /// /// \brief Balanced Path kernel class /// /// Subclass of meta_kernel to break two sets into tiles according /// to their balanced path. /// class balanced_path_kernel : public meta_kernel { public: unsigned int tile_size; balanced_path_kernel() : meta_kernel("balanced_path") { tile_size = 4; } template<class InputIterator1, class InputIterator2, class OutputIterator1, class OutputIterator2, class Compare> void set_range(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator1 result_a, OutputIterator2 result_b, Compare comp) { typedef typename std::iterator_traits<InputIterator1>::value_type value_type; m_a_count = iterator_range_size(first1, last1); m_a_count_arg = add_arg<uint_>("a_count"); m_b_count = iterator_range_size(first2, last2); m_b_count_arg = add_arg<uint_>("b_count"); *this << "uint i = get_global_id(0);\n" << "uint target = (i+1)*" << tile_size << ";\n" << "uint start = max(convert_int(0),convert_int(target)-convert_int(b_count));\n" << "uint end = min(target,a_count);\n" << "uint a_index, b_index;\n" << "while(start<end)\n" << "{\n" << " a_index = (start + end)/2;\n" << " b_index = target - a_index - 1;\n" << " if(!(" << comp(first2[expr<uint_>("b_index")], first1[expr<uint_>("a_index")]) << "))\n" << " start = a_index + 1;\n" << " else end = a_index;\n" << "}\n" << "a_index = start;\n" << "b_index = target - start;\n" << "if(b_index < b_count)\n" << "{\n" << " " << decl<const value_type>("x") << " = " << first2[expr<uint_>("b_index")] << ";\n" << " uint a_start = 0, a_end = a_index, a_mid;\n" << " uint b_start = 0, b_end = b_index, b_mid;\n" << " while(a_start<a_end)\n" << " {\n" << " a_mid = (a_start + a_end)/2;\n" << " if(" << comp(first1[expr<uint_>("a_mid")], expr<value_type>("x")) << ")\n" << " a_start = a_mid+1;\n" << " else a_end = a_mid;\n" << " }\n" << " while(b_start<b_end)\n" << " {\n" << " b_mid = (b_start + b_end)/2;\n" << " if(" << comp(first2[expr<uint_>("b_mid")], expr<value_type>("x")) << ")\n" << " b_start = b_mid+1;\n" << " else b_end = b_mid;\n" << " }\n" << " uint a_run = a_index - a_start;\n" << " uint b_run = b_index - b_start;\n" << " uint x_count = a_run + b_run;\n" << " uint b_advance = max(x_count / 2, x_count - a_run);\n" << " b_end = min(b_count, b_start + b_advance + 1);\n" << " uint temp_start = b_index, temp_end = b_end, temp_mid;" << " while(temp_start < temp_end)\n" << " {\n" << " temp_mid = (temp_start + temp_end + 1)/2;\n" << " if(" << comp(expr<value_type>("x"), first2[expr<uint_>("temp_mid")]) << ")\n" << " temp_end = temp_mid-1;\n" << " else temp_start = temp_mid;\n" << " }\n" << " b_run = temp_start - b_start + 1;\n" << " b_advance = min(b_advance, b_run);\n" << " uint a_advance = x_count - b_advance;\n" << " uint star = convert_uint((a_advance == b_advance + 1) " << "&& (b_advance < b_run));\n" << " a_index = a_start + a_advance;\n" << " b_index = target - a_index + star;\n" << "}\n" << result_a[expr<uint_>("i")] << " = a_index;\n" << result_b[expr<uint_>("i")] << " = b_index;\n"; } template<class InputIterator1, class InputIterator2, class OutputIterator1, class OutputIterator2> void set_range(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator1 result_a, OutputIterator2 result_b) { typedef typename std::iterator_traits<InputIterator1>::value_type value_type; ::boost::compute::less<value_type> less_than; set_range(first1, last1, first2, last2, result_a, result_b, less_than); } event exec(command_queue &queue) { if((m_a_count + m_b_count)/tile_size == 0) { return event(); } set_arg(m_a_count_arg, uint_(m_a_count)); set_arg(m_b_count_arg, uint_(m_b_count)); return exec_1d(queue, 0, (m_a_count + m_b_count)/tile_size); } private: size_t m_a_count; size_t m_a_count_arg; size_t m_b_count; size_t m_b_count_arg; }; } //end detail namespace } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP algorithm/detail/find_extrema_on_cpu.hpp 0000644 00000012325 15125510617 0014523 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2016 Jakub Szuppe <j.szuppe@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_ON_CPU_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_ON_CPU_HPP #include <algorithm> #include <boost/compute/algorithm/detail/find_extrema_with_reduce.hpp> #include <boost/compute/algorithm/detail/find_extrema_with_atomics.hpp> #include <boost/compute/algorithm/detail/serial_find_extrema.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class Compare> inline InputIterator find_extrema_on_cpu(InputIterator first, InputIterator last, Compare compare, const bool find_minimum, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type input_type; typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; size_t count = iterator_range_size(first, last); const device &device = queue.get_device(); const uint_ compute_units = queue.get_device().compute_units(); boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); std::string cache_key = "__boost_find_extrema_cpu_" + boost::lexical_cast<std::string>(sizeof(input_type)); // for inputs smaller than serial_find_extrema_threshold // serial_find_extrema algorithm is used uint_ serial_find_extrema_threshold = parameters->get( cache_key, "serial_find_extrema_threshold", 16384 * sizeof(input_type) ); serial_find_extrema_threshold = (std::max)(serial_find_extrema_threshold, uint_(2 * compute_units)); const context &context = queue.get_context(); if(count < serial_find_extrema_threshold) { return serial_find_extrema(first, last, compare, find_minimum, queue); } meta_kernel k("find_extrema_on_cpu"); buffer output(context, sizeof(input_type) * compute_units); buffer output_idx( context, sizeof(uint_) * compute_units, buffer::read_write | buffer::alloc_host_ptr ); size_t count_arg = k.add_arg<uint_>("count"); size_t output_arg = k.add_arg<input_type *>(memory_object::global_memory, "output"); size_t output_idx_arg = k.add_arg<uint_ *>(memory_object::global_memory, "output_idx"); k << "uint block = " << "(uint)ceil(((float)count)/get_global_size(0));\n" << "uint index = get_global_id(0) * block;\n" << "uint end = min(count, index + block);\n" << "uint value_index = index;\n" << k.decl<input_type>("value") << " = " << first[k.var<uint_>("index")] << ";\n" << "index++;\n" << "while(index < end){\n" << k.decl<input_type>("candidate") << " = " << first[k.var<uint_>("index")] << ";\n" << "#ifndef BOOST_COMPUTE_FIND_MAXIMUM\n" << "bool compare = " << compare(k.var<input_type>("candidate"), k.var<input_type>("value")) << ";\n" << "#else\n" << "bool compare = " << compare(k.var<input_type>("value"), k.var<input_type>("candidate")) << ";\n" << "#endif\n" << "value = compare ? candidate : value;\n" << "value_index = compare ? index : value_index;\n" << "index++;\n" << "}\n" << "output[get_global_id(0)] = value;\n" << "output_idx[get_global_id(0)] = value_index;\n"; size_t global_work_size = compute_units; std::string options; if(!find_minimum){ options = "-DBOOST_COMPUTE_FIND_MAXIMUM"; } kernel kernel = k.compile(context, options); kernel.set_arg(count_arg, static_cast<uint_>(count)); kernel.set_arg(output_arg, output); kernel.set_arg(output_idx_arg, output_idx); queue.enqueue_1d_range_kernel(kernel, 0, global_work_size, 0); buffer_iterator<input_type> result = serial_find_extrema( make_buffer_iterator<input_type>(output), make_buffer_iterator<input_type>(output, global_work_size), compare, find_minimum, queue ); uint_* output_idx_host_ptr = static_cast<uint_*>( queue.enqueue_map_buffer( output_idx, command_queue::map_read, 0, global_work_size * sizeof(uint_) ) ); difference_type extremum_idx = static_cast<difference_type>(*(output_idx_host_ptr + result.get_index())); return first + extremum_idx; } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_ON_CPU_HPP algorithm/detail/count_if_with_reduce.hpp 0000644 00000005130 15125510617 0014677 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_REDUCE_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_REDUCE_HPP #include <boost/compute/algorithm/reduce.hpp> #include <boost/compute/iterator/transform_iterator.hpp> #include <boost/compute/types/fundamental.hpp> namespace boost { namespace compute { namespace detail { template<class Predicate, class Arg> struct invoked_countable_predicate { invoked_countable_predicate(Predicate p, Arg a) : predicate(p), arg(a) { } Predicate predicate; Arg arg; }; template<class Predicate, class Arg> inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_countable_predicate<Predicate, Arg> &expr) { return kernel << "(" << expr.predicate(expr.arg) << " ? 1 : 0)"; } // the countable_predicate wraps Predicate and converts its result from // bool to ulong so that it can be used with reduce() template<class Predicate> struct countable_predicate { typedef ulong_ result_type; countable_predicate(Predicate predicate) : m_predicate(predicate) { } template<class Arg> invoked_countable_predicate<Predicate, Arg> operator()(const Arg &arg) const { return invoked_countable_predicate<Predicate, Arg>(m_predicate, arg); } Predicate m_predicate; }; // counts the number of elements matching predicate using reduce() template<class InputIterator, class Predicate> inline size_t count_if_with_reduce(InputIterator first, InputIterator last, Predicate predicate, command_queue &queue) { countable_predicate<Predicate> reduce_predicate(predicate); ulong_ count = 0; ::boost::compute::reduce( ::boost::compute::make_transform_iterator(first, reduce_predicate), ::boost::compute::make_transform_iterator(last, reduce_predicate), &count, ::boost::compute::plus<ulong_>(), queue ); return static_cast<size_t>(count); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_REDUCE_HPP algorithm/detail/serial_count_if.hpp 0000644 00000004303 15125510617 0013655 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_COUNT_IF_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_COUNT_IF_HPP #include <iterator> #include <boost/compute/container/detail/scalar.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> namespace boost { namespace compute { namespace detail { // counts values that match the predicate using a single thread template<class InputIterator, class Predicate> inline size_t serial_count_if(InputIterator first, InputIterator last, Predicate predicate, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; const context &context = queue.get_context(); size_t size = iterator_range_size(first, last); meta_kernel k("serial_count_if"); k.add_set_arg("size", static_cast<uint_>(size)); size_t result_arg = k.add_arg<uint_ *>(memory_object::global_memory, "result"); k << "uint count = 0;\n" << "for(uint i = 0; i < size; i++){\n" << k.decl<const value_type>("value") << "=" << first[k.var<uint_>("i")] << ";\n" << "if(" << predicate(k.var<const value_type>("value")) << "){\n" << "count++;\n" << "}\n" "}\n" "*result = count;\n"; kernel kernel = k.compile(context); // setup result buffer scalar<uint_> result(context); kernel.set_arg(result_arg, result.get_buffer()); // run kernel queue.enqueue_task(kernel); // read index return result.read(queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_COUNT_IF_HPP algorithm/detail/count_if_with_ballot.hpp 0000644 00000005055 15125510617 0014713 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP #include <boost/compute/context.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/algorithm/reduce.hpp> #include <boost/compute/functional/detail/nvidia_ballot.hpp> #include <boost/compute/functional/detail/nvidia_popcount.hpp> #include <boost/compute/detail/meta_kernel.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class Predicate> inline size_t count_if_with_ballot(InputIterator first, InputIterator last, Predicate predicate, command_queue &queue) { size_t count = iterator_range_size(first, last); size_t block_size = 32; size_t block_count = count / block_size; if(block_count * block_size != count){ block_count++; } const ::boost::compute::context &context = queue.get_context(); ::boost::compute::vector<uint_> counts(block_count, context); ::boost::compute::detail::nvidia_popcount<uint_> popc; ::boost::compute::detail::nvidia_ballot<uint_> ballot; meta_kernel k("count_if_with_ballot"); k << "const uint gid = get_global_id(0);\n" << "bool value = false;\n" << "if(gid < count)\n" << " value = " << predicate(first[k.var<const uint_>("gid")]) << ";\n" << "uint bits = " << ballot(k.var<const uint_>("value")) << ";\n" << "if(get_local_id(0) == 0)\n" << counts.begin()[k.var<uint_>("get_group_id(0)") ] << " = " << popc(k.var<uint_>("bits")) << ";\n"; k.add_set_arg<const uint_>("count", count); k.exec_1d(queue, 0, block_size * block_count, block_size); uint_ result; ::boost::compute::reduce( counts.begin(), counts.end(), &result, queue ); return result; } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP algorithm/detail/copy_to_host.hpp 0000644 00000015002 15125510617 0013217 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_HOST_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_HOST_HPP #include <iterator> #include <boost/utility/addressof.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/async/future.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/memory/svm_ptr.hpp> #include <boost/compute/detail/iterator_plus_distance.hpp> namespace boost { namespace compute { namespace detail { template<class DeviceIterator, class HostIterator> inline HostIterator copy_to_host(DeviceIterator first, DeviceIterator last, HostIterator result, command_queue &queue, const wait_list &events) { typedef typename std::iterator_traits<DeviceIterator>::value_type value_type; size_t count = iterator_range_size(first, last); if(count == 0){ return result; } const buffer &buffer = first.get_buffer(); size_t offset = first.get_index(); queue.enqueue_read_buffer(buffer, offset * sizeof(value_type), count * sizeof(value_type), ::boost::addressof(*result), events); return iterator_plus_distance(result, count); } template<class DeviceIterator, class HostIterator> inline HostIterator copy_to_host_map(DeviceIterator first, DeviceIterator last, HostIterator result, command_queue &queue, const wait_list &events) { typedef typename std::iterator_traits<DeviceIterator>::value_type value_type; typedef typename std::iterator_traits<DeviceIterator>::difference_type difference_type; size_t count = iterator_range_size(first, last); if(count == 0){ return result; } size_t offset = first.get_index(); // map [first; last) buffer to host value_type *pointer = static_cast<value_type*>( queue.enqueue_map_buffer( first.get_buffer(), CL_MAP_READ, offset * sizeof(value_type), count * sizeof(value_type), events ) ); // copy [first; last) to result buffer std::copy( pointer, pointer + static_cast<difference_type>(count), result ); // unmap [first; last) boost::compute::event unmap_event = queue.enqueue_unmap_buffer( first.get_buffer(), static_cast<void*>(pointer) ); unmap_event.wait(); return iterator_plus_distance(result, count); } template<class DeviceIterator, class HostIterator> inline future<HostIterator> copy_to_host_async(DeviceIterator first, DeviceIterator last, HostIterator result, command_queue &queue, const wait_list &events) { typedef typename std::iterator_traits<DeviceIterator>::value_type value_type; size_t count = iterator_range_size(first, last); if(count == 0){ return future<HostIterator>(); } const buffer &buffer = first.get_buffer(); size_t offset = first.get_index(); event event_ = queue.enqueue_read_buffer_async(buffer, offset * sizeof(value_type), count * sizeof(value_type), ::boost::addressof(*result), events); return make_future(iterator_plus_distance(result, count), event_); } #ifdef BOOST_COMPUTE_CL_VERSION_2_0 // copy_to_host() specialization for svm_ptr template<class T, class HostIterator> inline HostIterator copy_to_host(svm_ptr<T> first, svm_ptr<T> last, HostIterator result, command_queue &queue, const wait_list &events) { size_t count = iterator_range_size(first, last); if(count == 0){ return result; } queue.enqueue_svm_memcpy( ::boost::addressof(*result), first.get(), count * sizeof(T), events ); return result + count; } template<class T, class HostIterator> inline future<HostIterator> copy_to_host_async(svm_ptr<T> first, svm_ptr<T> last, HostIterator result, command_queue &queue, const wait_list &events) { size_t count = iterator_range_size(first, last); if(count == 0){ return future<HostIterator>(); } event event_ = queue.enqueue_svm_memcpy_async( ::boost::addressof(*result), first.get(), count * sizeof(T), events ); return make_future(iterator_plus_distance(result, count), event_); } template<class T, class HostIterator> inline HostIterator copy_to_host_map(svm_ptr<T> first, svm_ptr<T> last, HostIterator result, command_queue &queue, const wait_list &events) { size_t count = iterator_range_size(first, last); if(count == 0){ return result; } // map queue.enqueue_svm_map(first.get(), count * sizeof(T), CL_MAP_READ, events); // copy [first; last) to result std::copy( static_cast<T*>(first.get()), static_cast<T*>(last.get()), result ); // unmap [first; last) queue.enqueue_svm_unmap(first.get()).wait(); return iterator_plus_distance(result, count); } #endif // BOOST_COMPUTE_CL_VERSION_2_0 } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_HOST_HPP algorithm/detail/serial_reduce_by_key.hpp 0000644 00000010220 15125510617 0014653 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_BY_KEY_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_BY_KEY_HPP #include <iterator> #include <boost/compute/command_queue.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/container/detail/scalar.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/type_traits/result_of.hpp> namespace boost { namespace compute { namespace detail { template<class InputKeyIterator, class InputValueIterator, class OutputKeyIterator, class OutputValueIterator, class BinaryFunction, class BinaryPredicate> inline size_t serial_reduce_by_key(InputKeyIterator keys_first, InputKeyIterator keys_last, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, BinaryFunction function, BinaryPredicate predicate, command_queue &queue) { typedef typename std::iterator_traits<InputValueIterator>::value_type value_type; typedef typename std::iterator_traits<InputKeyIterator>::value_type key_type; typedef typename ::boost::compute::result_of<BinaryFunction(value_type, value_type)>::type result_type; const context &context = queue.get_context(); size_t count = detail::iterator_range_size(keys_first, keys_last); if(count < 1){ return count; } meta_kernel k("serial_reduce_by_key"); size_t count_arg = k.add_arg<uint_>("count"); size_t result_size_arg = k.add_arg<uint_ *>(memory_object::global_memory, "result_size"); k << k.decl<result_type>("result") << " = " << values_first[0] << ";\n" << k.decl<key_type>("previous_key") << " = " << keys_first[0] << ";\n" << k.decl<result_type>("value") << ";\n" << k.decl<key_type>("key") << ";\n" << k.decl<uint_>("size") << " = 1;\n" << keys_result[0] << " = previous_key;\n" << values_result[0] << " = result;\n" << "for(ulong i = 1; i < count; i++) {\n" << " value = " << values_first[k.var<uint_>("i")] << ";\n" << " key = " << keys_first[k.var<uint_>("i")] << ";\n" << " if (" << predicate(k.var<key_type>("previous_key"), k.var<key_type>("key")) << ") {\n" << " result = " << function(k.var<result_type>("result"), k.var<result_type>("value")) << ";\n" << " }\n " << " else { \n" << keys_result[k.var<uint_>("size - 1")] << " = previous_key;\n" << values_result[k.var<uint_>("size - 1")] << " = result;\n" << " result = value;\n" << " size++;\n" << " } \n" << " previous_key = key;\n" << "}\n" << keys_result[k.var<uint_>("size - 1")] << " = previous_key;\n" << values_result[k.var<uint_>("size - 1")] << " = result;\n" << "*result_size = size;"; kernel kernel = k.compile(context); scalar<uint_> result_size(context); kernel.set_arg(result_size_arg, result_size.get_buffer()); kernel.set_arg(count_arg, static_cast<uint_>(count)); queue.enqueue_task(kernel); return static_cast<size_t>(result_size.read(queue)); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_BY_KEY_HPP algorithm/detail/find_if_with_atomics.hpp 0000644 00000020475 15125510617 0014670 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_IF_WITH_ATOMICS_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_IF_WITH_ATOMICS_HPP #include <iterator> #include <boost/compute/types.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/container/detail/scalar.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/parameter_cache.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class UnaryPredicate> inline InputIterator find_if_with_atomics_one_vpt(InputIterator first, InputIterator last, UnaryPredicate predicate, const size_t count, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; const context &context = queue.get_context(); detail::meta_kernel k("find_if"); size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index"); atomic_min<uint_> atomic_min_uint; k << k.decl<const uint_>("i") << " = get_global_id(0);\n" << k.decl<const value_type>("value") << "=" << first[k.var<const uint_>("i")] << ";\n" << "if(" << predicate(k.var<const value_type>("value")) << "){\n" << " " << atomic_min_uint(k.var<uint_ *>("index"), k.var<uint_>("i")) << ";\n" << "}\n"; kernel kernel = k.compile(context); scalar<uint_> index(context); kernel.set_arg(index_arg, index.get_buffer()); // initialize index to the last iterator's index index.write(static_cast<uint_>(count), queue); queue.enqueue_1d_range_kernel(kernel, 0, count, 0); // read index and return iterator return first + static_cast<difference_type>(index.read(queue)); } template<class InputIterator, class UnaryPredicate> inline InputIterator find_if_with_atomics_multiple_vpt(InputIterator first, InputIterator last, UnaryPredicate predicate, const size_t count, const size_t vpt, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; const context &context = queue.get_context(); const device &device = queue.get_device(); detail::meta_kernel k("find_if"); size_t index_arg = k.add_arg<uint_ *>(memory_object::global_memory, "index"); size_t count_arg = k.add_arg<const uint_>("count"); size_t vpt_arg = k.add_arg<const uint_>("vpt"); atomic_min<uint_> atomic_min_uint; // for GPUs reads from global memory are coalesced if(device.type() & device::gpu) { k << k.decl<const uint_>("lsize") << " = get_local_size(0);\n" << k.decl<uint_>("id") << " = get_local_id(0) + get_group_id(0) * lsize * vpt;\n" << k.decl<const uint_>("end") << " = min(" << "id + (lsize *" << k.var<uint_>("vpt") << ")," << "count" << ");\n" << // checking if the index is already found "__local uint local_index;\n" << "if(get_local_id(0) == 0){\n" << " local_index = *index;\n " << "};\n" << "barrier(CLK_LOCAL_MEM_FENCE);\n" << "if(local_index < id){\n" << " return;\n" << "}\n" << "while(id < end){\n" << " " << k.decl<const value_type>("value") << " = " << first[k.var<const uint_>("id")] << ";\n" " if(" << predicate(k.var<const value_type>("value")) << "){\n" << " " << atomic_min_uint(k.var<uint_ *>("index"), k.var<uint_>("id")) << ";\n" << " return;\n" " }\n" << " id+=lsize;\n" << "}\n"; // for CPUs (and other devices) reads are ordered so the big cache is // efficiently used. } else { k << k.decl<uint_>("id") << " = get_global_id(0) * " << k.var<uint_>("vpt") << ";\n" << k.decl<const uint_>("end") << " = min(" << "id + " << k.var<uint_>("vpt") << "," << "count" << ");\n" << "while(id < end && (*index) > id){\n" << " " << k.decl<const value_type>("value") << " = " << first[k.var<const uint_>("id")] << ";\n" " if(" << predicate(k.var<const value_type>("value")) << "){\n" << " " << atomic_min_uint(k.var<uint_ *>("index"), k.var<uint_>("id")) << ";\n" << " return;\n" << " }\n" << " id++;\n" << "}\n"; } kernel kernel = k.compile(context); scalar<uint_> index(context); kernel.set_arg(index_arg, index.get_buffer()); kernel.set_arg(count_arg, static_cast<uint_>(count)); kernel.set_arg(vpt_arg, static_cast<uint_>(vpt)); // initialize index to the last iterator's index index.write(static_cast<uint_>(count), queue); const size_t global_wg_size = static_cast<size_t>( std::ceil(float(count) / vpt) ); queue.enqueue_1d_range_kernel(kernel, 0, global_wg_size, 0); // read index and return iterator return first + static_cast<difference_type>(index.read(queue)); } // Space complexity: O(1) template<class InputIterator, class UnaryPredicate> inline InputIterator find_if_with_atomics(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; size_t count = detail::iterator_range_size(first, last); if(count == 0){ return last; } const device &device = queue.get_device(); // load cached parameters std::string cache_key = std::string("__boost_find_if_with_atomics_") + type_name<value_type>(); boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); // for relatively small inputs on GPUs kernel checking one value per thread // (work-item) is more efficient than its multiple values per thread version if(device.type() & device::gpu){ const size_t one_vpt_threshold = parameters->get(cache_key, "one_vpt_threshold", 1048576); if(count <= one_vpt_threshold){ return find_if_with_atomics_one_vpt( first, last, predicate, count, queue ); } } // values per thread size_t vpt; if(device.type() & device::gpu){ // get vpt parameter vpt = parameters->get(cache_key, "vpt", 32); } else { // for CPUs work is split equally between compute units const size_t max_compute_units = device.get_info<CL_DEVICE_MAX_COMPUTE_UNITS>(); vpt = static_cast<size_t>( std::ceil(float(count) / max_compute_units) ); } return find_if_with_atomics_multiple_vpt( first, last, predicate, count, vpt, queue ); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_IF_WITH_ATOMICS_HPP algorithm/detail/merge_sort_on_cpu.hpp 0000644 00000034476 15125510617 0014237 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_CPU_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_CPU_HPP #include <boost/compute/kernel.hpp> #include <boost/compute/program.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/detail/merge_with_merge_path.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> namespace boost { namespace compute { namespace detail { template<class KeyIterator, class ValueIterator, class Compare> inline void merge_blocks(KeyIterator keys_first, ValueIterator values_first, KeyIterator keys_result, ValueIterator values_result, Compare compare, size_t count, const size_t block_size, const bool sort_by_key, command_queue &queue) { (void) values_result; (void) values_first; meta_kernel k("merge_sort_on_cpu_merge_blocks"); size_t count_arg = k.add_arg<const uint_>("count"); size_t block_size_arg = k.add_arg<uint_>("block_size"); k << k.decl<uint_>("b1_start") << " = get_global_id(0) * block_size * 2;\n" << k.decl<uint_>("b1_end") << " = min(count, b1_start + block_size);\n" << k.decl<uint_>("b2_start") << " = min(count, b1_start + block_size);\n" << k.decl<uint_>("b2_end") << " = min(count, b2_start + block_size);\n" << k.decl<uint_>("result_idx") << " = b1_start;\n" << // merging block 1 and block 2 (stable) "while(b1_start < b1_end && b2_start < b2_end){\n" << " if( " << compare(keys_first[k.var<uint_>("b2_start")], keys_first[k.var<uint_>("b1_start")]) << "){\n" << " " << keys_result[k.var<uint_>("result_idx")] << " = " << keys_first[k.var<uint_>("b2_start")] << ";\n"; if(sort_by_key){ k << " " << values_result[k.var<uint_>("result_idx")] << " = " << values_first[k.var<uint_>("b2_start")] << ";\n"; } k << " b2_start++;\n" << " }\n" << " else {\n" << " " << keys_result[k.var<uint_>("result_idx")] << " = " << keys_first[k.var<uint_>("b1_start")] << ";\n"; if(sort_by_key){ k << " " << values_result[k.var<uint_>("result_idx")] << " = " << values_first[k.var<uint_>("b1_start")] << ";\n"; } k << " b1_start++;\n" << " }\n" << " result_idx++;\n" << "}\n" << "while(b1_start < b1_end){\n" << " " << keys_result[k.var<uint_>("result_idx")] << " = " << keys_first[k.var<uint_>("b1_start")] << ";\n"; if(sort_by_key){ k << " " << values_result[k.var<uint_>("result_idx")] << " = " << values_first[k.var<uint_>("b1_start")] << ";\n"; } k << " b1_start++;\n" << " result_idx++;\n" << "}\n" << "while(b2_start < b2_end){\n" << " " << keys_result[k.var<uint_>("result_idx")] << " = " << keys_first[k.var<uint_>("b2_start")] << ";\n"; if(sort_by_key){ k << " " << values_result[k.var<uint_>("result_idx")] << " = " << values_first[k.var<uint_>("b2_start")] << ";\n"; } k << " b2_start++;\n" << " result_idx++;\n" << "}\n"; const context &context = queue.get_context(); ::boost::compute::kernel kernel = k.compile(context); kernel.set_arg(count_arg, static_cast<const uint_>(count)); kernel.set_arg(block_size_arg, static_cast<uint_>(block_size)); const size_t global_size = static_cast<size_t>( std::ceil(float(count) / (2 * block_size)) ); queue.enqueue_1d_range_kernel(kernel, 0, global_size, 0); } template<class Iterator, class Compare> inline void merge_blocks(Iterator first, Iterator result, Compare compare, size_t count, const size_t block_size, const bool sort_by_key, command_queue &queue) { // dummy iterator as it's not sort by key Iterator dummy; merge_blocks(first, dummy, result, dummy, compare, count, block_size, false, queue); } template<class Iterator, class Compare> inline void dispatch_merge_blocks(Iterator first, Iterator result, Compare compare, size_t count, const size_t block_size, const size_t input_size_threshold, const size_t blocks_no_threshold, command_queue &queue) { const size_t blocks_no = static_cast<size_t>( std::ceil(float(count) / block_size) ); // merge with merge path should used only for the large arrays and at the // end of merging part when there are only a few big blocks left to be merged if(blocks_no <= blocks_no_threshold && count >= input_size_threshold){ Iterator last = first + count; for(size_t i = 0; i < count; i+= 2*block_size) { Iterator first1 = (std::min)(first + i, last); Iterator last1 = (std::min)(first1 + block_size, last); Iterator first2 = last1; Iterator last2 = (std::min)(first2 + block_size, last); Iterator block_result = (std::min)(result + i, result + count); merge_with_merge_path(first1, last1, first2, last2, block_result, compare, queue); } } else { merge_blocks(first, result, compare, count, block_size, false, queue); } } template<class KeyIterator, class ValueIterator, class Compare> inline void block_insertion_sort(KeyIterator keys_first, ValueIterator values_first, Compare compare, const size_t count, const size_t block_size, const bool sort_by_key, command_queue &queue) { (void) values_first; typedef typename std::iterator_traits<KeyIterator>::value_type K; typedef typename std::iterator_traits<ValueIterator>::value_type T; meta_kernel k("merge_sort_on_cpu_block_insertion_sort"); size_t count_arg = k.add_arg<uint_>("count"); size_t block_size_arg = k.add_arg<uint_>("block_size"); k << k.decl<uint_>("start") << " = get_global_id(0) * block_size;\n" << k.decl<uint_>("end") << " = min(count, start + block_size);\n" << // block insertion sort (stable) "for(uint i = start+1; i < end; i++){\n" << " " << k.decl<const K>("key") << " = " << keys_first[k.var<uint_>("i")] << ";\n"; if(sort_by_key){ k << " " << k.decl<const T>("value") << " = " << values_first[k.var<uint_>("i")] << ";\n"; } k << " uint pos = i;\n" << " while(pos > start && " << compare(k.var<const K>("key"), keys_first[k.var<uint_>("pos-1")]) << "){\n" << " " << keys_first[k.var<uint_>("pos")] << " = " << keys_first[k.var<uint_>("pos-1")] << ";\n"; if(sort_by_key){ k << " " << values_first[k.var<uint_>("pos")] << " = " << values_first[k.var<uint_>("pos-1")] << ";\n"; } k << " pos--;\n" << " }\n" << " " << keys_first[k.var<uint_>("pos")] << " = key;\n"; if(sort_by_key) { k << " " << values_first[k.var<uint_>("pos")] << " = value;\n"; } k << "}\n"; // block insertion sort const context &context = queue.get_context(); ::boost::compute::kernel kernel = k.compile(context); kernel.set_arg(count_arg, static_cast<uint_>(count)); kernel.set_arg(block_size_arg, static_cast<uint_>(block_size)); const size_t global_size = static_cast<size_t>(std::ceil(float(count) / block_size)); queue.enqueue_1d_range_kernel(kernel, 0, global_size, 0); } template<class Iterator, class Compare> inline void block_insertion_sort(Iterator first, Compare compare, const size_t count, const size_t block_size, command_queue &queue) { // dummy iterator as it's not sort by key Iterator dummy; block_insertion_sort(first, dummy, compare, count, block_size, false, queue); } // This sort is stable. template<class Iterator, class Compare> inline void merge_sort_on_cpu(Iterator first, Iterator last, Compare compare, command_queue &queue) { typedef typename std::iterator_traits<Iterator>::value_type value_type; size_t count = iterator_range_size(first, last); if(count < 2){ return; } // for small input size only insertion sort is performed else if(count <= 512){ block_insertion_sort(first, compare, count, count, queue); return; } const context &context = queue.get_context(); const device &device = queue.get_device(); // loading parameters std::string cache_key = std::string("__boost_merge_sort_on_cpu_") + type_name<value_type>(); boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); // When there is merge_with_path_blocks_no_threshold or less blocks left to // merge AND input size is merge_with_merge_path_input_size_threshold or more // merge_with_merge_path() algorithm is used to merge sorted blocks; // otherwise merge_blocks() is used. const size_t merge_with_path_blocks_no_threshold = parameters->get(cache_key, "merge_with_merge_path_blocks_no_threshold", 8); const size_t merge_with_path_input_size_threshold = parameters->get(cache_key, "merge_with_merge_path_input_size_threshold", 2097152); const size_t block_size = parameters->get(cache_key, "insertion_sort_block_size", 64); block_insertion_sort(first, compare, count, block_size, queue); // temporary buffer for merge result vector<value_type> temp(count, context); bool result_in_temporary_buffer = false; for(size_t i = block_size; i < count; i *= 2){ result_in_temporary_buffer = !result_in_temporary_buffer; if(result_in_temporary_buffer) { dispatch_merge_blocks(first, temp.begin(), compare, count, i, merge_with_path_input_size_threshold, merge_with_path_blocks_no_threshold, queue); } else { dispatch_merge_blocks(temp.begin(), first, compare, count, i, merge_with_path_input_size_threshold, merge_with_path_blocks_no_threshold, queue); } } if(result_in_temporary_buffer) { copy(temp.begin(), temp.end(), first, queue); } } // This sort is stable. template<class KeyIterator, class ValueIterator, class Compare> inline void merge_sort_by_key_on_cpu(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, command_queue &queue) { typedef typename std::iterator_traits<KeyIterator>::value_type key_type; typedef typename std::iterator_traits<ValueIterator>::value_type value_type; size_t count = iterator_range_size(keys_first, keys_last); if(count < 2){ return; } // for small input size only insertion sort is performed else if(count <= 512){ block_insertion_sort(keys_first, values_first, compare, count, count, true, queue); return; } const context &context = queue.get_context(); const device &device = queue.get_device(); // loading parameters std::string cache_key = std::string("__boost_merge_sort_by_key_on_cpu_") + type_name<value_type>() + "_with_" + type_name<key_type>(); boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); const size_t block_size = parameters->get(cache_key, "insertion_sort_by_key_block_size", 64); block_insertion_sort(keys_first, values_first, compare, count, block_size, true, queue); // temporary buffer for merge results vector<value_type> values_temp(count, context); vector<key_type> keys_temp(count, context); bool result_in_temporary_buffer = false; for(size_t i = block_size; i < count; i *= 2){ result_in_temporary_buffer = !result_in_temporary_buffer; if(result_in_temporary_buffer) { merge_blocks(keys_first, values_first, keys_temp.begin(), values_temp.begin(), compare, count, i, true, queue); } else { merge_blocks(keys_temp.begin(), values_temp.begin(), keys_first, values_first, compare, count, i, true, queue); } } if(result_in_temporary_buffer) { copy(keys_temp.begin(), keys_temp.end(), keys_first, queue); copy(values_temp.begin(), values_temp.end(), values_first, queue); } } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_CPU_HPP algorithm/detail/serial_merge.hpp 0000644 00000006660 15125510617 0013156 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SERIAL_MERGE_HPP #define BOOST_COMPUTE_ALGORITHM_SERIAL_MERGE_HPP #include <iterator> #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator1, class InputIterator2, class OutputIterator, class Compare> inline OutputIterator serial_merge(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, command_queue &queue) { typedef typename std::iterator_traits<InputIterator1>::value_type input_type1; typedef typename std::iterator_traits<InputIterator2>::value_type input_type2; typedef typename std::iterator_traits<OutputIterator>::difference_type result_difference_type; std::ptrdiff_t size1 = std::distance(first1, last1); std::ptrdiff_t size2 = std::distance(first2, last2); meta_kernel k("serial_merge"); k.add_set_arg<uint_>("size1", static_cast<uint_>(size1)); k.add_set_arg<uint_>("size2", static_cast<uint_>(size2)); k << "uint i = 0;\n" << // index in result range "uint j = 0;\n" << // index in first input range "uint k = 0;\n" << // index in second input range // fetch initial values from each range k.decl<input_type1>("j_value") << " = " << first1[0] << ";\n" << k.decl<input_type2>("k_value") << " = " << first2[0] << ";\n" << // merge values from both input ranges to the result range "while(j < size1 && k < size2){\n" << " if(" << comp(k.var<input_type1>("j_value"), k.var<input_type2>("k_value")) << "){\n" << " " << result[k.var<uint_>("i++")] << " = j_value;\n" << " j_value = " << first1[k.var<uint_>("++j")] << ";\n" << " }\n" << " else{\n" " " << result[k.var<uint_>("i++")] << " = k_value;\n" " k_value = " << first2[k.var<uint_>("++k")] << ";\n" << " }\n" "}\n" // copy any remaining values from first range "while(j < size1){\n" << result[k.var<uint_>("i++")] << " = " << first1[k.var<uint_>("j++")] << ";\n" << "}\n" // copy any remaining values from second range "while(k < size2){\n" << result[k.var<uint_>("i++")] << " = " << first2[k.var<uint_>("k++")] << ";\n" << "}\n"; // run kernel k.exec(queue); return result + static_cast<result_difference_type>(size1 + size2); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SERIAL_MERGE_HPP algorithm/detail/binary_find.hpp 0000644 00000011022 15125510617 0012770 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_BINARY_FIND_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_BINARY_FIND_HPP #include <boost/compute/functional.hpp> #include <boost/compute/algorithm/find_if.hpp> #include <boost/compute/algorithm/transform.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/parameter_cache.hpp> namespace boost { namespace compute { namespace detail{ /// /// \brief Binary find kernel class /// /// Subclass of meta_kernel to perform single step in binary find. /// template<class InputIterator, class UnaryPredicate> class binary_find_kernel : public meta_kernel { public: binary_find_kernel(InputIterator first, InputIterator last, UnaryPredicate predicate) : meta_kernel("binary_find") { typedef typename std::iterator_traits<InputIterator>::value_type value_type; m_index_arg = add_arg<uint_ *>(memory_object::global_memory, "index"); m_block_arg = add_arg<uint_>("block"); atomic_min<uint_> atomic_min_uint; *this << "uint i = get_global_id(0) * block;\n" << decl<value_type>("value") << "=" << first[var<uint_>("i")] << ";\n" << "if(" << predicate(var<value_type>("value")) << ") {\n" << atomic_min_uint(var<uint_ *>("index"), var<uint_>("i")) << ";\n" << "}\n"; } size_t m_index_arg; size_t m_block_arg; }; /// /// \brief Binary find algorithm /// /// Finds the end of true values in the partitioned range [first, last). /// \return Iterator pointing to end of true values /// /// \param first Iterator pointing to start of range /// \param last Iterator pointing to end of range /// \param predicate Predicate according to which the range is partitioned /// \param queue Queue on which to execute /// template<class InputIterator, class UnaryPredicate> inline InputIterator binary_find(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { const device &device = queue.get_device(); boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); const std::string cache_key = "__boost_binary_find"; size_t find_if_limit = 128; size_t threads = parameters->get(cache_key, "tpb", 128); size_t count = iterator_range_size(first, last); InputIterator search_first = first; InputIterator search_last = last; scalar<uint_> index(queue.get_context()); // construct and compile binary_find kernel binary_find_kernel<InputIterator, UnaryPredicate> binary_find_kernel(search_first, search_last, predicate); ::boost::compute::kernel kernel = binary_find_kernel.compile(queue.get_context()); // set buffer for index kernel.set_arg(binary_find_kernel.m_index_arg, index.get_buffer()); while(count > find_if_limit) { index.write(static_cast<uint_>(count), queue); // set block and run binary_find kernel uint_ block = static_cast<uint_>((count - 1)/(threads - 1)); kernel.set_arg(binary_find_kernel.m_block_arg, block); queue.enqueue_1d_range_kernel(kernel, 0, threads, 0); size_t i = index.read(queue); if(i == count) { search_first = search_last - ((count - 1)%(threads - 1)); break; } else { search_last = search_first + i; search_first = search_last - ((count - 1)/(threads - 1)); } // Make sure that first and last stay within the input range search_last = (std::min)(search_last, last); search_last = (std::max)(search_last, first); search_first = (std::max)(search_first, first); search_first = (std::min)(search_first, last); count = iterator_range_size(search_first, search_last); } return find_if(search_first, search_last, predicate, queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_BINARY_FIND_HPP algorithm/detail/scan_on_gpu.hpp 0000644 00000026070 15125510617 0013010 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_GPU_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_GPU_HPP #include <boost/compute/kernel.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/memory/local_buffer.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class OutputIterator, class BinaryOperator> class local_scan_kernel : public meta_kernel { public: local_scan_kernel(InputIterator first, InputIterator last, OutputIterator result, bool exclusive, BinaryOperator op) : meta_kernel("local_scan") { typedef typename std::iterator_traits<InputIterator>::value_type T; (void) last; bool checked = true; m_block_sums_arg = add_arg<T *>(memory_object::global_memory, "block_sums"); m_scratch_arg = add_arg<T *>(memory_object::local_memory, "scratch"); m_block_size_arg = add_arg<const cl_uint>("block_size"); m_count_arg = add_arg<const cl_uint>("count"); m_init_value_arg = add_arg<const T>("init"); // work-item parameters *this << "const uint gid = get_global_id(0);\n" << "const uint lid = get_local_id(0);\n"; // check against data size if(checked){ *this << "if(gid < count){\n"; } // copy values from input to local memory if(exclusive){ *this << decl<const T>("local_init") << "= (gid == 0) ? init : 0;\n" << "if(lid == 0){ scratch[lid] = local_init; }\n" << "else { scratch[lid] = " << first[expr<cl_uint>("gid-1")] << "; }\n"; } else{ *this << "scratch[lid] = " << first[expr<cl_uint>("gid")] << ";\n"; } if(checked){ *this << "}\n" "else {\n" << " scratch[lid] = 0;\n" << "}\n"; } // wait for all threads to read from input *this << "barrier(CLK_LOCAL_MEM_FENCE);\n"; // perform scan *this << "for(uint i = 1; i < block_size; i <<= 1){\n" << " " << decl<const T>("x") << " = lid >= i ? scratch[lid-i] : 0;\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << " if(lid >= i){\n" << " scratch[lid] = " << op(var<T>("scratch[lid]"), var<T>("x")) << ";\n" << " }\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << "}\n"; // copy results to output if(checked){ *this << "if(gid < count){\n"; } *this << result[expr<cl_uint>("gid")] << " = scratch[lid];\n"; if(checked){ *this << "}\n"; } // store sum for the block if(exclusive){ *this << "if(lid == block_size - 1 && gid < count) {\n" << " block_sums[get_group_id(0)] = " << op(first[expr<cl_uint>("gid")], var<T>("scratch[lid]")) << ";\n" << "}\n"; } else { *this << "if(lid == block_size - 1){\n" << " block_sums[get_group_id(0)] = scratch[lid];\n" << "}\n"; } } size_t m_block_sums_arg; size_t m_scratch_arg; size_t m_block_size_arg; size_t m_count_arg; size_t m_init_value_arg; }; template<class T, class BinaryOperator> class write_scanned_output_kernel : public meta_kernel { public: write_scanned_output_kernel(BinaryOperator op) : meta_kernel("write_scanned_output") { bool checked = true; m_output_arg = add_arg<T *>(memory_object::global_memory, "output"); m_block_sums_arg = add_arg<const T *>(memory_object::global_memory, "block_sums"); m_count_arg = add_arg<const cl_uint>("count"); // work-item parameters *this << "const uint gid = get_global_id(0);\n" << "const uint block_id = get_group_id(0);\n"; // check against data size if(checked){ *this << "if(gid < count){\n"; } // write output *this << "output[gid] = " << op(var<T>("block_sums[block_id]"), var<T>("output[gid] ")) << ";\n"; if(checked){ *this << "}\n"; } } size_t m_output_arg; size_t m_block_sums_arg; size_t m_count_arg; }; template<class InputIterator> inline size_t pick_scan_block_size(InputIterator first, InputIterator last) { size_t count = iterator_range_size(first, last); if(count == 0) { return 0; } else if(count <= 1) { return 1; } else if(count <= 2) { return 2; } else if(count <= 4) { return 4; } else if(count <= 8) { return 8; } else if(count <= 16) { return 16; } else if(count <= 32) { return 32; } else if(count <= 64) { return 64; } else if(count <= 128) { return 128; } else { return 256; } } template<class InputIterator, class OutputIterator, class T, class BinaryOperator> inline OutputIterator scan_impl(InputIterator first, InputIterator last, OutputIterator result, bool exclusive, T init, BinaryOperator op, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type input_type; typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; typedef typename std::iterator_traits<OutputIterator>::value_type output_type; const context &context = queue.get_context(); const size_t count = detail::iterator_range_size(first, last); size_t block_size = pick_scan_block_size(first, last); size_t block_count = count / block_size; if(block_count * block_size < count){ block_count++; } ::boost::compute::vector<input_type> block_sums(block_count, context); // zero block sums input_type zero; std::memset(&zero, 0, sizeof(input_type)); ::boost::compute::fill(block_sums.begin(), block_sums.end(), zero, queue); // local scan local_scan_kernel<InputIterator, OutputIterator, BinaryOperator> local_scan_kernel(first, last, result, exclusive, op); ::boost::compute::kernel kernel = local_scan_kernel.compile(context); kernel.set_arg(local_scan_kernel.m_scratch_arg, local_buffer<input_type>(block_size)); kernel.set_arg(local_scan_kernel.m_block_sums_arg, block_sums); kernel.set_arg(local_scan_kernel.m_block_size_arg, static_cast<cl_uint>(block_size)); kernel.set_arg(local_scan_kernel.m_count_arg, static_cast<cl_uint>(count)); kernel.set_arg(local_scan_kernel.m_init_value_arg, static_cast<output_type>(init)); queue.enqueue_1d_range_kernel(kernel, 0, block_count * block_size, block_size); // inclusive scan block sums if(block_count > 1){ scan_impl(block_sums.begin(), block_sums.end(), block_sums.begin(), false, init, op, queue ); } // add block sums to each block if(block_count > 1){ write_scanned_output_kernel<input_type, BinaryOperator> write_output_kernel(op); kernel = write_output_kernel.compile(context); kernel.set_arg(write_output_kernel.m_output_arg, result.get_buffer()); kernel.set_arg(write_output_kernel.m_block_sums_arg, block_sums); kernel.set_arg(write_output_kernel.m_count_arg, static_cast<cl_uint>(count)); queue.enqueue_1d_range_kernel(kernel, block_size, block_count * block_size, block_size); } return result + static_cast<difference_type>(count); } template<class InputIterator, class OutputIterator, class T, class BinaryOperator> inline OutputIterator dispatch_scan(InputIterator first, InputIterator last, OutputIterator result, bool exclusive, T init, BinaryOperator op, command_queue &queue) { return scan_impl(first, last, result, exclusive, init, op, queue); } template<class InputIterator, class T, class BinaryOperator> inline InputIterator dispatch_scan(InputIterator first, InputIterator last, InputIterator result, bool exclusive, T init, BinaryOperator op, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; if(first == result){ // scan input in-place const context &context = queue.get_context(); // make a temporary copy the input size_t count = iterator_range_size(first, last); vector<value_type> tmp(count, context); copy(first, last, tmp.begin(), queue); // scan from temporary values return scan_impl(tmp.begin(), tmp.end(), first, exclusive, init, op, queue); } else { // scan input to output return scan_impl(first, last, result, exclusive, init, op, queue); } } template<class InputIterator, class OutputIterator, class T, class BinaryOperator> inline OutputIterator scan_on_gpu(InputIterator first, InputIterator last, OutputIterator result, bool exclusive, T init, BinaryOperator op, command_queue &queue) { if(first == last){ return result; } return dispatch_scan(first, last, result, exclusive, init, op, queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_GPU_HPP algorithm/detail/scan_on_cpu.hpp 0000644 00000015620 15125510617 0013003 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2016 Jakub Szuppe <j.szuppe@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_CPU_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_CPU_HPP #include <iterator> #include <boost/compute/device.hpp> #include <boost/compute/kernel.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/detail/serial_scan.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/parameter_cache.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class OutputIterator, class T, class BinaryOperator> inline OutputIterator scan_on_cpu(InputIterator first, InputIterator last, OutputIterator result, bool exclusive, T init, BinaryOperator op, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type input_type; typedef typename std::iterator_traits<OutputIterator>::value_type output_type; const context &context = queue.get_context(); const device &device = queue.get_device(); const size_t compute_units = queue.get_device().compute_units(); boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); std::string cache_key = "__boost_scan_cpu_" + boost::lexical_cast<std::string>(sizeof(T)); // for inputs smaller than serial_scan_threshold // serial_scan algorithm is used uint_ serial_scan_threshold = parameters->get(cache_key, "serial_scan_threshold", 16384 * sizeof(T)); serial_scan_threshold = (std::max)(serial_scan_threshold, uint_(compute_units)); size_t count = detail::iterator_range_size(first, last); if(count == 0){ return result; } else if(count < serial_scan_threshold) { return serial_scan(first, last, result, exclusive, init, op, queue); } buffer block_partial_sums(context, sizeof(output_type) * compute_units ); // create scan kernel meta_kernel k("scan_on_cpu_block_scan"); // Arguments size_t count_arg = k.add_arg<uint_>("count"); size_t init_arg = k.add_arg<output_type>("initial_value"); size_t block_partial_sums_arg = k.add_arg<output_type *>(memory_object::global_memory, "block_partial_sums"); k << "uint block = (count + get_global_size(0))/(get_global_size(0) + 1);\n" << "uint index = get_global_id(0) * block;\n" << "uint end = min(count, index + block);\n" << "if(index >= end) return;\n"; if(!exclusive){ k << k.decl<output_type>("sum") << " = " << first[k.var<uint_>("index")] << ";\n" << result[k.var<uint_>("index")] << " = sum;\n" << "index++;\n"; } else { k << k.decl<output_type>("sum") << ";\n" << "if(index == 0){\n" << "sum = initial_value;\n" << "}\n" << "else {\n" << "sum = " << first[k.var<uint_>("index")] << ";\n" << "index++;\n" << "}\n"; } k << "while(index < end){\n" << // load next value k.decl<const input_type>("value") << " = " << first[k.var<uint_>("index")] << ";\n"; if(exclusive){ k << "if(get_global_id(0) == 0){\n" << result[k.var<uint_>("index")] << " = sum;\n" << "}\n"; } k << "sum = " << op(k.var<output_type>("sum"), k.var<output_type>("value")) << ";\n"; if(!exclusive){ k << "if(get_global_id(0) == 0){\n" << result[k.var<uint_>("index")] << " = sum;\n" << "}\n"; } k << "index++;\n" << "}\n" << // end while "block_partial_sums[get_global_id(0)] = sum;\n"; // compile scan kernel kernel block_scan_kernel = k.compile(context); // setup kernel arguments block_scan_kernel.set_arg(count_arg, static_cast<uint_>(count)); block_scan_kernel.set_arg(init_arg, static_cast<output_type>(init)); block_scan_kernel.set_arg(block_partial_sums_arg, block_partial_sums); // execute the kernel size_t global_work_size = compute_units; queue.enqueue_1d_range_kernel(block_scan_kernel, 0, global_work_size, 0); // scan is done if(compute_units < 2) { return result + count; } // final scan kernel meta_kernel l("scan_on_cpu_final_scan"); // Arguments count_arg = l.add_arg<uint_>("count"); block_partial_sums_arg = l.add_arg<output_type *>(memory_object::global_memory, "block_partial_sums"); l << "uint block = (count + get_global_size(0))/(get_global_size(0) + 1);\n" << "uint index = block + get_global_id(0) * block;\n" << "uint end = min(count, index + block);\n" << k.decl<output_type>("sum") << " = block_partial_sums[0];\n" << "for(uint i = 0; i < get_global_id(0); i++) {\n" << "sum = " << op(k.var<output_type>("sum"), k.var<output_type>("block_partial_sums[i + 1]")) << ";\n" << "}\n" << "while(index < end){\n"; if(exclusive){ l << l.decl<output_type>("value") << " = " << first[k.var<uint_>("index")] << ";\n" << result[k.var<uint_>("index")] << " = sum;\n" << "sum = " << op(k.var<output_type>("sum"), k.var<output_type>("value")) << ";\n"; } else { l << "sum = " << op(k.var<output_type>("sum"), first[k.var<uint_>("index")]) << ";\n" << result[k.var<uint_>("index")] << " = sum;\n"; } l << "index++;\n" << "}\n"; // compile scan kernel kernel final_scan_kernel = l.compile(context); // setup kernel arguments final_scan_kernel.set_arg(count_arg, static_cast<uint_>(count)); final_scan_kernel.set_arg(block_partial_sums_arg, block_partial_sums); // execute the kernel global_work_size = compute_units; queue.enqueue_1d_range_kernel(final_scan_kernel, 0, global_work_size, 0); // return iterator pointing to the end of the result range return result + count; } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_CPU_HPP algorithm/detail/insertion_sort.hpp 0000644 00000013616 15125510617 0013600 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_INSERTION_SORT_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_INSERTION_SORT_HPP #include <boost/compute/kernel.hpp> #include <boost/compute/program.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/memory/local_buffer.hpp> namespace boost { namespace compute { namespace detail { template<class Iterator, class Compare> inline void serial_insertion_sort(Iterator first, Iterator last, Compare compare, command_queue &queue) { typedef typename std::iterator_traits<Iterator>::value_type T; size_t count = iterator_range_size(first, last); if(count < 2){ return; } meta_kernel k("serial_insertion_sort"); size_t local_data_arg = k.add_arg<T *>(memory_object::local_memory, "data"); size_t count_arg = k.add_arg<uint_>("n"); k << // copy data to local memory "for(uint i = 0; i < n; i++){\n" << " data[i] = " << first[k.var<uint_>("i")] << ";\n" "}\n" // sort data in local memory "for(uint i = 1; i < n; i++){\n" << " " << k.decl<const T>("value") << " = data[i];\n" << " uint pos = i;\n" << " while(pos > 0 && " << compare(k.var<const T>("value"), k.var<const T>("data[pos-1]")) << "){\n" << " data[pos] = data[pos-1];\n" << " pos--;\n" << " }\n" << " data[pos] = value;\n" << "}\n" << // copy sorted data to output "for(uint i = 0; i < n; i++){\n" << " " << first[k.var<uint_>("i")] << " = data[i];\n" "}\n"; const context &context = queue.get_context(); ::boost::compute::kernel kernel = k.compile(context); kernel.set_arg(local_data_arg, local_buffer<T>(count)); kernel.set_arg(count_arg, static_cast<uint_>(count)); queue.enqueue_task(kernel); } template<class Iterator> inline void serial_insertion_sort(Iterator first, Iterator last, command_queue &queue) { typedef typename std::iterator_traits<Iterator>::value_type T; ::boost::compute::less<T> less; return serial_insertion_sort(first, last, less, queue); } template<class KeyIterator, class ValueIterator, class Compare> inline void serial_insertion_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, command_queue &queue) { typedef typename std::iterator_traits<KeyIterator>::value_type key_type; typedef typename std::iterator_traits<ValueIterator>::value_type value_type; size_t count = iterator_range_size(keys_first, keys_last); if(count < 2){ return; } meta_kernel k("serial_insertion_sort_by_key"); size_t local_keys_arg = k.add_arg<key_type *>(memory_object::local_memory, "keys"); size_t local_data_arg = k.add_arg<value_type *>(memory_object::local_memory, "data"); size_t count_arg = k.add_arg<uint_>("n"); k << // copy data to local memory "for(uint i = 0; i < n; i++){\n" << " keys[i] = " << keys_first[k.var<uint_>("i")] << ";\n" " data[i] = " << values_first[k.var<uint_>("i")] << ";\n" "}\n" // sort data in local memory "for(uint i = 1; i < n; i++){\n" << " " << k.decl<const key_type>("key") << " = keys[i];\n" << " " << k.decl<const value_type>("value") << " = data[i];\n" << " uint pos = i;\n" << " while(pos > 0 && " << compare(k.var<const key_type>("key"), k.var<const key_type>("keys[pos-1]")) << "){\n" << " keys[pos] = keys[pos-1];\n" << " data[pos] = data[pos-1];\n" << " pos--;\n" << " }\n" << " keys[pos] = key;\n" << " data[pos] = value;\n" << "}\n" << // copy sorted data to output "for(uint i = 0; i < n; i++){\n" << " " << keys_first[k.var<uint_>("i")] << " = keys[i];\n" " " << values_first[k.var<uint_>("i")] << " = data[i];\n" "}\n"; const context &context = queue.get_context(); ::boost::compute::kernel kernel = k.compile(context); kernel.set_arg(local_keys_arg, static_cast<uint_>(count * sizeof(key_type)), 0); kernel.set_arg(local_data_arg, static_cast<uint_>(count * sizeof(value_type)), 0); kernel.set_arg(count_arg, static_cast<uint_>(count)); queue.enqueue_task(kernel); } template<class KeyIterator, class ValueIterator> inline void serial_insertion_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, command_queue &queue) { typedef typename std::iterator_traits<KeyIterator>::value_type key_type; serial_insertion_sort_by_key( keys_first, keys_last, values_first, boost::compute::less<key_type>(), queue ); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_INSERTION_SORT_HPP algorithm/detail/merge_with_merge_path.hpp 0000644 00000016253 15125510617 0015044 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP #include <iterator> #include <boost/compute/algorithm/detail/merge_path.hpp> #include <boost/compute/algorithm/fill_n.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/system.hpp> namespace boost { namespace compute { namespace detail { /// /// \brief Serial merge kernel class /// /// Subclass of meta_kernel to perform serial merge after tiling /// class serial_merge_kernel : meta_kernel { public: unsigned int tile_size; serial_merge_kernel() : meta_kernel("merge") { tile_size = 4; } template<class InputIterator1, class InputIterator2, class InputIterator3, class InputIterator4, class OutputIterator, class Compare> void set_range(InputIterator1 first1, InputIterator2 first2, InputIterator3 tile_first1, InputIterator3 tile_last1, InputIterator4 tile_first2, OutputIterator result, Compare comp) { m_count = iterator_range_size(tile_first1, tile_last1) - 1; *this << "uint i = get_global_id(0);\n" << "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" << "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" << "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" << "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" << "uint index = i*" << tile_size << ";\n" << "while(start1<end1 && start2<end2)\n" << "{\n" << " if(!(" << comp(first2[expr<uint_>("start2")], first1[expr<uint_>("start1")]) << "))\n" << " {\n" << result[expr<uint_>("index")] << " = " << first1[expr<uint_>("start1")] << ";\n" << " index++;\n" << " start1++;\n" << " }\n" << " else\n" << " {\n" << result[expr<uint_>("index")] << " = " << first2[expr<uint_>("start2")] << ";\n" << " index++;\n" << " start2++;\n" << " }\n" << "}\n" << "while(start1<end1)\n" << "{\n" << result[expr<uint_>("index")] << " = " << first1[expr<uint_>("start1")] << ";\n" << " index++;\n" << " start1++;\n" << "}\n" << "while(start2<end2)\n" << "{\n" << result[expr<uint_>("index")] << " = " << first2[expr<uint_>("start2")] << ";\n" << " index++;\n" << " start2++;\n" << "}\n"; } template<class InputIterator1, class InputIterator2, class InputIterator3, class InputIterator4, class OutputIterator> void set_range(InputIterator1 first1, InputIterator2 first2, InputIterator3 tile_first1, InputIterator3 tile_last1, InputIterator4 tile_first2, OutputIterator result) { typedef typename std::iterator_traits<InputIterator1>::value_type value_type; ::boost::compute::less<value_type> less_than; set_range(first1, first2, tile_first1, tile_last1, tile_first2, result, less_than); } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; /// /// \brief Merge algorithm with merge path /// /// Merges the sorted values in the range [\p first1, \p last1) with /// the sorted values in the range [\p first2, last2) and stores the /// result in the range beginning at \p result /// /// \param first1 Iterator pointing to start of first set /// \param last1 Iterator pointing to end of first set /// \param first2 Iterator pointing to start of second set /// \param last2 Iterator pointing to end of second set /// \param result Iterator pointing to start of range in which the result /// will be stored /// \param comp Comparator which performs less than function /// \param queue Queue on which to execute /// template<class InputIterator1, class InputIterator2, class OutputIterator, class Compare> inline OutputIterator merge_with_merge_path(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits<OutputIterator>::difference_type result_difference_type; size_t tile_size = 1024; size_t count1 = iterator_range_size(first1, last1); size_t count2 = iterator_range_size(first2, last2); vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets merge_path_kernel tiling_kernel; tiling_kernel.tile_size = static_cast<unsigned int>(tile_size); tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1, comp); fill_n(tile_a.begin(), 1, uint_(0), queue); fill_n(tile_b.begin(), 1, uint_(0), queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, static_cast<uint_>(count1), queue); fill_n(tile_b.end()-1, 1, static_cast<uint_>(count2), queue); // Merge serial_merge_kernel merge_kernel; merge_kernel.tile_size = static_cast<unsigned int>(tile_size); merge_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), result, comp); merge_kernel.exec(queue); return result + static_cast<result_difference_type>(count1 + count2); } /// \overload template<class InputIterator1, class InputIterator2, class OutputIterator> inline OutputIterator merge_with_merge_path(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits<InputIterator1>::value_type value_type; ::boost::compute::less<value_type> less_than; return merge_with_merge_path(first1, last1, first2, last2, result, less_than, queue); } } //end detail namespace } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP algorithm/detail/scan.hpp 0000644 00000003011 15125510617 0011427 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_HPP #include <boost/compute/device.hpp> #include <boost/compute/algorithm/detail/scan_on_cpu.hpp> #include <boost/compute/algorithm/detail/scan_on_gpu.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class OutputIterator, class T, class BinaryOperator> inline OutputIterator scan(InputIterator first, InputIterator last, OutputIterator result, bool exclusive, T init, BinaryOperator op, command_queue &queue) { const device &device = queue.get_device(); if(device.type() & device::cpu){ return scan_on_cpu(first, last, result, exclusive, init, op, queue); } else { return scan_on_gpu(first, last, result, exclusive, init, op, queue); } } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_HPP algorithm/detail/find_extrema_with_reduce.hpp 0000644 00000044360 15125510617 0015546 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_REDUCE_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_REDUCE_HPP #include <algorithm> #include <boost/compute/types.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/allocator/pinned_allocator.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/parameter_cache.hpp> #include <boost/compute/memory/local_buffer.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/utility/program_cache.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator> bool find_extrema_with_reduce_requirements_met(InputIterator first, InputIterator last, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type input_type; const device &device = queue.get_device(); // device must have dedicated local memory storage // otherwise reduction would be highly inefficient if(device.get_info<CL_DEVICE_LOCAL_MEM_TYPE>() != CL_LOCAL) { return false; } const size_t max_work_group_size = device.get_info<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); // local memory size in bytes (per compute unit) const size_t local_mem_size = device.get_info<CL_DEVICE_LOCAL_MEM_SIZE>(); std::string cache_key = std::string("__boost_find_extrema_reduce_") + type_name<input_type>(); // load parameters boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); // Get preferred work group size size_t work_group_size = parameters->get(cache_key, "wgsize", 256); work_group_size = (std::min)(max_work_group_size, work_group_size); // local memory size needed to perform parallel reduction size_t required_local_mem_size = 0; // indices size required_local_mem_size += sizeof(uint_) * work_group_size; // values size required_local_mem_size += sizeof(input_type) * work_group_size; // at least 4 work groups per compute unit otherwise reduction // would be highly inefficient return ((required_local_mem_size * 4) <= local_mem_size); } /// \internal_ /// Algorithm finds the first extremum in given range, i.e., with the lowest /// index. /// /// If \p use_input_idx is false, it's assumed that input data is ordered by /// increasing index and \p input_idx is not used in the algorithm. template<class InputIterator, class ResultIterator, class Compare> inline void find_extrema_with_reduce(InputIterator input, vector<uint_>::iterator input_idx, size_t count, ResultIterator result, vector<uint_>::iterator result_idx, size_t work_groups_no, size_t work_group_size, Compare compare, const bool find_minimum, const bool use_input_idx, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type input_type; const context &context = queue.get_context(); meta_kernel k("find_extrema_reduce"); size_t count_arg = k.add_arg<uint_>("count"); size_t block_arg = k.add_arg<input_type *>(memory_object::local_memory, "block"); size_t block_idx_arg = k.add_arg<uint_ *>(memory_object::local_memory, "block_idx"); k << // Work item global id k.decl<const uint_>("gid") << " = get_global_id(0);\n" << // Index of element that will be read from input buffer k.decl<uint_>("idx") << " = gid;\n" << k.decl<input_type>("acc") << ";\n" << k.decl<uint_>("acc_idx") << ";\n" << "if(gid < count) {\n" << // Real index of currently best element "#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << k.var<uint_>("acc_idx") << " = " << input_idx[k.var<uint_>("idx")] << ";\n" << "#else\n" << k.var<uint_>("acc_idx") << " = idx;\n" << "#endif\n" << // Init accumulator with first[get_global_id(0)] "acc = " << input[k.var<uint_>("idx")] << ";\n" << "idx += get_global_size(0);\n" << "}\n" << k.decl<bool>("compare_result") << ";\n" << k.decl<bool>("equal") << ";\n\n" << "while( idx < count ){\n" << // Next element k.decl<input_type>("next") << " = " << input[k.var<uint_>("idx")] << ";\n" << "#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << k.decl<uint_>("next_idx") << " = " << input_idx[k.var<uint_>("idx")] << ";\n" << "#endif\n" << // Comparison between currently best element (acc) and next element "#ifdef BOOST_COMPUTE_FIND_MAXIMUM\n" << "compare_result = " << compare(k.var<input_type>("next"), k.var<input_type>("acc")) << ";\n" << "# ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << "equal = !compare_result && !" << compare(k.var<input_type>("acc"), k.var<input_type>("next")) << ";\n" << "# endif\n" << "#else\n" << "compare_result = " << compare(k.var<input_type>("acc"), k.var<input_type>("next")) << ";\n" << "# ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << "equal = !compare_result && !" << compare(k.var<input_type>("next"), k.var<input_type>("acc")) << ";\n" << "# endif\n" << "#endif\n" << // save the winner "acc = compare_result ? acc : next;\n" << "#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << "acc_idx = compare_result ? " << "acc_idx : " << "(equal ? min(acc_idx, next_idx) : next_idx);\n" << "#else\n" << "acc_idx = compare_result ? acc_idx : idx;\n" << "#endif\n" << "idx += get_global_size(0);\n" << "}\n\n" << // Work item local id k.decl<const uint_>("lid") << " = get_local_id(0);\n" << "block[lid] = acc;\n" << "block_idx[lid] = acc_idx;\n" << "barrier(CLK_LOCAL_MEM_FENCE);\n" << k.decl<uint_>("group_offset") << " = count - (get_local_size(0) * get_group_id(0));\n\n"; k << "#pragma unroll\n" "for(" << k.decl<uint_>("offset") << " = " << uint_(work_group_size) << " / 2; offset > 0; " << "offset = offset / 2) {\n" << "if((lid < offset) && ((lid + offset) < group_offset)) { \n" << k.decl<input_type>("mine") << " = block[lid];\n" << k.decl<input_type>("other") << " = block[lid+offset];\n" << "#ifdef BOOST_COMPUTE_FIND_MAXIMUM\n" << "compare_result = " << compare(k.var<input_type>("other"), k.var<input_type>("mine")) << ";\n" << "equal = !compare_result && !" << compare(k.var<input_type>("mine"), k.var<input_type>("other")) << ";\n" << "#else\n" << "compare_result = " << compare(k.var<input_type>("mine"), k.var<input_type>("other")) << ";\n" << "equal = !compare_result && !" << compare(k.var<input_type>("other"), k.var<input_type>("mine")) << ";\n" << "#endif\n" << "block[lid] = compare_result ? mine : other;\n" << k.decl<uint_>("mine_idx") << " = block_idx[lid];\n" << k.decl<uint_>("other_idx") << " = block_idx[lid+offset];\n" << "block_idx[lid] = compare_result ? " << "mine_idx : " << "(equal ? min(mine_idx, other_idx) : other_idx);\n" << "}\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" << "}\n\n" << // write block result to global output "if(lid == 0){\n" << result[k.var<uint_>("get_group_id(0)")] << " = block[0];\n" << result_idx[k.var<uint_>("get_group_id(0)")] << " = block_idx[0];\n" << "}"; std::string options; if(!find_minimum){ options = "-DBOOST_COMPUTE_FIND_MAXIMUM"; } if(use_input_idx){ options += " -DBOOST_COMPUTE_USE_INPUT_IDX"; } kernel kernel = k.compile(context, options); kernel.set_arg(count_arg, static_cast<uint_>(count)); kernel.set_arg(block_arg, local_buffer<input_type>(work_group_size)); kernel.set_arg(block_idx_arg, local_buffer<uint_>(work_group_size)); queue.enqueue_1d_range_kernel(kernel, 0, work_groups_no * work_group_size, work_group_size); } template<class InputIterator, class ResultIterator, class Compare> inline void find_extrema_with_reduce(InputIterator input, size_t count, ResultIterator result, vector<uint_>::iterator result_idx, size_t work_groups_no, size_t work_group_size, Compare compare, const bool find_minimum, command_queue &queue) { // dummy will not be used buffer_iterator<uint_> dummy = result_idx; return find_extrema_with_reduce( input, dummy, count, result, result_idx, work_groups_no, work_group_size, compare, find_minimum, false, queue ); } // Space complexity: \Omega(2 * work-group-size * work-groups-per-compute-unit) template<class InputIterator, class Compare> InputIterator find_extrema_with_reduce(InputIterator first, InputIterator last, Compare compare, const bool find_minimum, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; typedef typename std::iterator_traits<InputIterator>::value_type input_type; const context &context = queue.get_context(); const device &device = queue.get_device(); // Getting information about used queue and device const size_t compute_units_no = device.get_info<CL_DEVICE_MAX_COMPUTE_UNITS>(); const size_t max_work_group_size = device.get_info<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); const size_t count = detail::iterator_range_size(first, last); std::string cache_key = std::string("__boost_find_extrema_with_reduce_") + type_name<input_type>(); // load parameters boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); // get preferred work group size and preferred number // of work groups per compute unit size_t work_group_size = parameters->get(cache_key, "wgsize", 256); size_t work_groups_per_cu = parameters->get(cache_key, "wgpcu", 100); // calculate work group size and number of work groups work_group_size = (std::min)(max_work_group_size, work_group_size); size_t work_groups_no = compute_units_no * work_groups_per_cu; work_groups_no = (std::min)( work_groups_no, static_cast<size_t>(std::ceil(float(count) / work_group_size)) ); // phase I: finding candidates for extremum // device buffors for extremum candidates and their indices // each work-group computes its candidate vector<input_type> candidates(work_groups_no, context); vector<uint_> candidates_idx(work_groups_no, context); // finding candidates for first extremum and their indices find_extrema_with_reduce( first, count, candidates.begin(), candidates_idx.begin(), work_groups_no, work_group_size, compare, find_minimum, queue ); // phase II: finding extremum from among the candidates // zero-copy buffers for final result (value and index) vector<input_type, ::boost::compute::pinned_allocator<input_type> > result(1, context); vector<uint_, ::boost::compute::pinned_allocator<uint_> > result_idx(1, context); // get extremum from among the candidates find_extrema_with_reduce( candidates.begin(), candidates_idx.begin(), work_groups_no, result.begin(), result_idx.begin(), 1, work_group_size, compare, find_minimum, true, queue ); // mapping extremum index to host uint_* result_idx_host_ptr = static_cast<uint_*>( queue.enqueue_map_buffer( result_idx.get_buffer(), command_queue::map_read, 0, sizeof(uint_) ) ); return first + static_cast<difference_type>(*result_idx_host_ptr); } template<class InputIterator> InputIterator find_extrema_with_reduce(InputIterator first, InputIterator last, ::boost::compute::less< typename std::iterator_traits< InputIterator >::value_type > compare, const bool find_minimum, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; typedef typename std::iterator_traits<InputIterator>::value_type input_type; const context &context = queue.get_context(); const device &device = queue.get_device(); // Getting information about used queue and device const size_t compute_units_no = device.get_info<CL_DEVICE_MAX_COMPUTE_UNITS>(); const size_t max_work_group_size = device.get_info<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); const size_t count = detail::iterator_range_size(first, last); std::string cache_key = std::string("__boost_find_extrema_with_reduce_") + type_name<input_type>(); // load parameters boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); // get preferred work group size and preferred number // of work groups per compute unit size_t work_group_size = parameters->get(cache_key, "wgsize", 256); size_t work_groups_per_cu = parameters->get(cache_key, "wgpcu", 64); // calculate work group size and number of work groups work_group_size = (std::min)(max_work_group_size, work_group_size); size_t work_groups_no = compute_units_no * work_groups_per_cu; work_groups_no = (std::min)( work_groups_no, static_cast<size_t>(std::ceil(float(count) / work_group_size)) ); // phase I: finding candidates for extremum // device buffors for extremum candidates and their indices // each work-group computes its candidate // zero-copy buffers are used to eliminate copying data back to host vector<input_type, ::boost::compute::pinned_allocator<input_type> > candidates(work_groups_no, context); vector<uint_, ::boost::compute::pinned_allocator <uint_> > candidates_idx(work_groups_no, context); // finding candidates for first extremum and their indices find_extrema_with_reduce( first, count, candidates.begin(), candidates_idx.begin(), work_groups_no, work_group_size, compare, find_minimum, queue ); // phase II: finding extremum from among the candidates // mapping candidates and their indices to host input_type* candidates_host_ptr = static_cast<input_type*>( queue.enqueue_map_buffer( candidates.get_buffer(), command_queue::map_read, 0, work_groups_no * sizeof(input_type) ) ); uint_* candidates_idx_host_ptr = static_cast<uint_*>( queue.enqueue_map_buffer( candidates_idx.get_buffer(), command_queue::map_read, 0, work_groups_no * sizeof(uint_) ) ); input_type* i = candidates_host_ptr; uint_* idx = candidates_idx_host_ptr; uint_* extremum_idx = idx; input_type extremum = *candidates_host_ptr; i++; idx++; // find extremum (serial) from among the candidates on host if(!find_minimum) { while(idx != (candidates_idx_host_ptr + work_groups_no)) { input_type next = *i; bool compare_result = next > extremum; bool equal = next == extremum; extremum = compare_result ? next : extremum; extremum_idx = compare_result ? idx : extremum_idx; extremum_idx = equal ? ((*extremum_idx < *idx) ? extremum_idx : idx) : extremum_idx; idx++, i++; } } else { while(idx != (candidates_idx_host_ptr + work_groups_no)) { input_type next = *i; bool compare_result = next < extremum; bool equal = next == extremum; extremum = compare_result ? next : extremum; extremum_idx = compare_result ? idx : extremum_idx; extremum_idx = equal ? ((*extremum_idx < *idx) ? extremum_idx : idx) : extremum_idx; idx++, i++; } } return first + static_cast<difference_type>(*extremum_idx); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_REDUCE_HPP algorithm/detail/serial_scan.hpp 0000644 00000006160 15125510617 0012776 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_SCAN_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_SCAN_HPP #include <iterator> #include <boost/compute/device.hpp> #include <boost/compute/kernel.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class OutputIterator, class T, class BinaryOperator> inline OutputIterator serial_scan(InputIterator first, InputIterator last, OutputIterator result, bool exclusive, T init, BinaryOperator op, command_queue &queue) { if(first == last){ return result; } typedef typename std::iterator_traits<InputIterator>::value_type input_type; typedef typename std::iterator_traits<OutputIterator>::value_type output_type; const context &context = queue.get_context(); // create scan kernel meta_kernel k("serial_scan"); // Arguments size_t n_arg = k.add_arg<ulong_>("n"); size_t init_arg = k.add_arg<output_type>("initial_value"); if(!exclusive){ k << k.decl<const ulong_>("start_idx") << " = 1;\n" << k.decl<output_type>("sum") << " = " << first[0] << ";\n" << result[0] << " = sum;\n"; } else { k << k.decl<const ulong_>("start_idx") << " = 0;\n" << k.decl<output_type>("sum") << " = initial_value;\n"; } k << "for(ulong i = start_idx; i < n; i++){\n" << k.decl<const input_type>("x") << " = " << first[k.var<ulong_>("i")] << ";\n"; if(exclusive){ k << result[k.var<ulong_>("i")] << " = sum;\n"; } k << " sum = " << op(k.var<output_type>("sum"), k.var<output_type>("x")) << ";\n"; if(!exclusive){ k << result[k.var<ulong_>("i")] << " = sum;\n"; } k << "}\n"; // compile scan kernel kernel scan_kernel = k.compile(context); // setup kernel arguments size_t n = detail::iterator_range_size(first, last); scan_kernel.set_arg<ulong_>(n_arg, n); scan_kernel.set_arg<output_type>(init_arg, static_cast<output_type>(init)); // execute the kernel queue.enqueue_1d_range_kernel(scan_kernel, 0, 1, 1); // return iterator pointing to the end of the result range return result + n; } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_SCAN_HPP algorithm/detail/reduce_on_cpu.hpp 0000644 00000007555 15125510617 0013336 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2016 Jakub Szuppe <j.szuppe@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_CPU_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_CPU_HPP #include <algorithm> #include <boost/compute/buffer.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/parameter_cache.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/type_traits/result_of.hpp> #include <boost/compute/algorithm/detail/serial_reduce.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class OutputIterator, class BinaryFunction> inline void reduce_on_cpu(InputIterator first, InputIterator last, OutputIterator result, BinaryFunction function, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type T; typedef typename ::boost::compute::result_of<BinaryFunction(T, T)>::type result_type; const device &device = queue.get_device(); const uint_ compute_units = queue.get_device().compute_units(); boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); std::string cache_key = "__boost_reduce_cpu_" + boost::lexical_cast<std::string>(sizeof(T)); // for inputs smaller than serial_reduce_threshold // serial_reduce algorithm is used uint_ serial_reduce_threshold = parameters->get(cache_key, "serial_reduce_threshold", 16384 * sizeof(T)); serial_reduce_threshold = (std::max)(serial_reduce_threshold, uint_(compute_units)); const context &context = queue.get_context(); size_t count = detail::iterator_range_size(first, last); if(count == 0){ return; } else if(count < serial_reduce_threshold) { return serial_reduce(first, last, result, function, queue); } meta_kernel k("reduce_on_cpu"); buffer output(context, sizeof(result_type) * compute_units); size_t count_arg = k.add_arg<uint_>("count"); size_t output_arg = k.add_arg<result_type *>(memory_object::global_memory, "output"); k << "uint block = " << "(uint)ceil(((float)count)/get_global_size(0));\n" << "uint index = get_global_id(0) * block;\n" << "uint end = min(count, index + block);\n" << k.decl<result_type>("result") << " = " << first[k.var<uint_>("index")] << ";\n" << "index++;\n" << "while(index < end){\n" << "result = " << function(k.var<T>("result"), first[k.var<uint_>("index")]) << ";\n" << "index++;\n" << "}\n" << "output[get_global_id(0)] = result;\n"; size_t global_work_size = compute_units; kernel kernel = k.compile(context); // reduction to global_work_size elements kernel.set_arg(count_arg, static_cast<uint_>(count)); kernel.set_arg(output_arg, output); queue.enqueue_1d_range_kernel(kernel, 0, global_work_size, 0); // final reduction reduce_on_cpu( make_buffer_iterator<result_type>(output), make_buffer_iterator<result_type>(output, global_work_size), result, function, queue ); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_CPU_HPP algorithm/detail/search_all.hpp 0000644 00000005023 15125510617 0012605 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_ALL_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_ALL_HPP #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/lambda.hpp> #include <boost/compute/system.hpp> namespace boost { namespace compute { namespace detail { /// /// \brief Search kernel class /// /// Subclass of meta_kernel which is capable of performing pattern matching /// template<class PatternIterator, class TextIterator, class OutputIterator> class search_kernel : public meta_kernel { public: search_kernel() : meta_kernel("search") {} void set_range(PatternIterator p_first, PatternIterator p_last, TextIterator t_first, TextIterator t_last, OutputIterator result) { m_p_count = iterator_range_size(p_first, p_last); m_p_count_arg = add_arg<uint_>("p_count"); m_count = iterator_range_size(t_first, t_last); m_count = m_count + 1 - m_p_count; *this << "uint i = get_global_id(0);\n" << "const uint i1 = i;\n" << "uint j;\n" << "for(j = 0; j<p_count; j++,i++)\n" << "{\n" << " if(" << p_first[expr<uint_>("j")] << " != " << t_first[expr<uint_>("i")] << ")\n" << " j = p_count + 1;\n" << "}\n" << "if(j == p_count)\n" << result[expr<uint_>("i1")] << " = 1;\n" << "else\n" << result[expr<uint_>("i1")] << " = 0;\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } set_arg(m_p_count_arg, uint_(m_p_count)); return exec_1d(queue, 0, m_count); } private: size_t m_p_count; size_t m_p_count_arg; size_t m_count; }; } //end detail namespace } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_ALL_HPP algorithm/detail/copy_to_device.hpp 0000644 00000015113 15125510617 0013504 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_DEVICE_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_DEVICE_HPP #include <iterator> #include <boost/utility/addressof.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/async/future.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/memory/svm_ptr.hpp> namespace boost { namespace compute { namespace detail { template<class HostIterator, class DeviceIterator> inline DeviceIterator copy_to_device(HostIterator first, HostIterator last, DeviceIterator result, command_queue &queue, const wait_list &events) { typedef typename std::iterator_traits<DeviceIterator>::value_type value_type; typedef typename std::iterator_traits<DeviceIterator>::difference_type difference_type; size_t count = iterator_range_size(first, last); if(count == 0){ return result; } size_t offset = result.get_index(); queue.enqueue_write_buffer(result.get_buffer(), offset * sizeof(value_type), count * sizeof(value_type), ::boost::addressof(*first), events); return result + static_cast<difference_type>(count); } template<class HostIterator, class DeviceIterator> inline DeviceIterator copy_to_device_map(HostIterator first, HostIterator last, DeviceIterator result, command_queue &queue, const wait_list &events) { typedef typename std::iterator_traits<DeviceIterator>::value_type value_type; typedef typename std::iterator_traits<DeviceIterator>::difference_type difference_type; size_t count = iterator_range_size(first, last); if(count == 0){ return result; } size_t offset = result.get_index(); // map result buffer to host value_type *pointer = static_cast<value_type*>( queue.enqueue_map_buffer( result.get_buffer(), CL_MAP_WRITE, offset * sizeof(value_type), count * sizeof(value_type), events ) ); // copy [first; last) to result buffer std::copy(first, last, pointer); // unmap result buffer boost::compute::event unmap_event = queue.enqueue_unmap_buffer( result.get_buffer(), static_cast<void*>(pointer) ); unmap_event.wait(); return result + static_cast<difference_type>(count); } template<class HostIterator, class DeviceIterator> inline future<DeviceIterator> copy_to_device_async(HostIterator first, HostIterator last, DeviceIterator result, command_queue &queue, const wait_list &events) { typedef typename std::iterator_traits<DeviceIterator>::value_type value_type; typedef typename std::iterator_traits<DeviceIterator>::difference_type difference_type; size_t count = iterator_range_size(first, last); if(count == 0){ return future<DeviceIterator>(); } size_t offset = result.get_index(); event event_ = queue.enqueue_write_buffer_async(result.get_buffer(), offset * sizeof(value_type), count * sizeof(value_type), ::boost::addressof(*first), events); return make_future(result + static_cast<difference_type>(count), event_); } #ifdef BOOST_COMPUTE_CL_VERSION_2_0 // copy_to_device() specialization for svm_ptr template<class HostIterator, class T> inline svm_ptr<T> copy_to_device(HostIterator first, HostIterator last, svm_ptr<T> result, command_queue &queue, const wait_list &events) { size_t count = iterator_range_size(first, last); if(count == 0){ return result; } queue.enqueue_svm_memcpy( result.get(), ::boost::addressof(*first), count * sizeof(T), events ); return result + count; } template<class HostIterator, class T> inline future<svm_ptr<T> > copy_to_device_async(HostIterator first, HostIterator last, svm_ptr<T> result, command_queue &queue, const wait_list &events) { size_t count = iterator_range_size(first, last); if(count == 0){ return future<svm_ptr<T> >(); } event event_ = queue.enqueue_svm_memcpy_async( result.get(), ::boost::addressof(*first), count * sizeof(T), events ); return make_future(result + count, event_); } template<class HostIterator, class T> inline svm_ptr<T> copy_to_device_map(HostIterator first, HostIterator last, svm_ptr<T> result, command_queue &queue, const wait_list &events) { size_t count = iterator_range_size(first, last); if(count == 0){ return result; } // map queue.enqueue_svm_map( result.get(), count * sizeof(T), CL_MAP_WRITE, events ); // copy [first; last) to result buffer std::copy(first, last, static_cast<T*>(result.get())); // unmap result queue.enqueue_svm_unmap(result.get()).wait(); return result + count; } #endif // BOOST_COMPUTE_CL_VERSION_2_0 } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_DEVICE_HPP algorithm/detail/reduce_by_key_with_scan.hpp 0000644 00000056267 15125510617 0015400 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_WITH_SCAN_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_WITH_SCAN_HPP #include <algorithm> #include <iterator> #include <boost/compute/command_queue.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/algorithm/inclusive_scan.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/container/detail/scalar.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/read_write_single_value.hpp> #include <boost/compute/type_traits.hpp> #include <boost/compute/utility/program_cache.hpp> namespace boost { namespace compute { namespace detail { /// \internal_ /// /// Fills \p new_keys_first with unsigned integer keys generated from vector /// of original keys \p keys_first. New keys can be distinguish by simple equality /// predicate. /// /// \param keys_first iterator pointing to the first key /// \param number_of_keys number of keys /// \param predicate binary predicate for key comparison /// \param new_keys_first iterator pointing to the new keys vector /// \param preferred_work_group_size preferred work group size /// \param queue command queue to perform the operation /// /// Binary function \p predicate must take two keys as arguments and /// return true only if they are considered the same. /// /// The first new key equals zero and the last equals number of unique keys /// minus one. /// /// No local memory usage. template<class InputKeyIterator, class BinaryPredicate> inline void generate_uint_keys(InputKeyIterator keys_first, size_t number_of_keys, BinaryPredicate predicate, vector<uint_>::iterator new_keys_first, size_t preferred_work_group_size, command_queue &queue) { typedef typename std::iterator_traits<InputKeyIterator>::value_type key_type; detail::meta_kernel k("reduce_by_key_new_key_flags"); k.add_set_arg<const uint_>("count", uint_(number_of_keys)); k << k.decl<const uint_>("gid") << " = get_global_id(0);\n" << k.decl<uint_>("value") << " = 0;\n" << "if(gid >= count){\n return;\n}\n" << "if(gid > 0){ \n" << k.decl<key_type>("key") << " = " << keys_first[k.var<const uint_>("gid")] << ";\n" << k.decl<key_type>("previous_key") << " = " << keys_first[k.var<const uint_>("gid - 1")] << ";\n" << " value = " << predicate(k.var<key_type>("previous_key"), k.var<key_type>("key")) << " ? 0 : 1;\n" << "}\n else {\n" << " value = 0;\n" << "}\n" << new_keys_first[k.var<const uint_>("gid")] << " = value;\n"; const context &context = queue.get_context(); kernel kernel = k.compile(context); size_t work_group_size = preferred_work_group_size; size_t work_groups_no = static_cast<size_t>( std::ceil(float(number_of_keys) / work_group_size) ); queue.enqueue_1d_range_kernel(kernel, 0, work_groups_no * work_group_size, work_group_size); inclusive_scan(new_keys_first, new_keys_first + number_of_keys, new_keys_first, queue); } /// \internal_ /// Calculate carry-out for each work group. /// Carry-out is a pair of the last key processed by a work group and sum of all /// values under this key in this work group. template<class InputValueIterator, class OutputValueIterator, class BinaryFunction> inline void carry_outs(vector<uint_>::iterator keys_first, InputValueIterator values_first, size_t count, vector<uint_>::iterator carry_out_keys_first, OutputValueIterator carry_out_values_first, BinaryFunction function, size_t work_group_size, command_queue &queue) { typedef typename std::iterator_traits<OutputValueIterator>::value_type value_out_type; detail::meta_kernel k("reduce_by_key_with_scan_carry_outs"); k.add_set_arg<const uint_>("count", uint_(count)); size_t local_keys_arg = k.add_arg<uint_ *>(memory_object::local_memory, "lkeys"); size_t local_vals_arg = k.add_arg<value_out_type *>(memory_object::local_memory, "lvals"); k << k.decl<const uint_>("gid") << " = get_global_id(0);\n" << k.decl<const uint_>("wg_size") << " = get_local_size(0);\n" << k.decl<const uint_>("lid") << " = get_local_id(0);\n" << k.decl<const uint_>("group_id") << " = get_group_id(0);\n" << k.decl<uint_>("key") << ";\n" << k.decl<value_out_type>("value") << ";\n" << "if(gid < count){\n" << k.var<uint_>("key") << " = " << keys_first[k.var<const uint_>("gid")] << ";\n" << k.var<value_out_type>("value") << " = " << values_first[k.var<const uint_>("gid")] << ";\n" << "lkeys[lid] = key;\n" << "lvals[lid] = value;\n" << "}\n" << // Calculate carry out for each work group by performing Hillis/Steele scan // where only last element (key-value pair) is saved k.decl<value_out_type>("result") << " = value;\n" << k.decl<uint_>("other_key") << ";\n" << k.decl<value_out_type>("other_value") << ";\n" << "for(" << k.decl<uint_>("offset") << " = 1; " << "offset < wg_size; offset *= 2){\n" " barrier(CLK_LOCAL_MEM_FENCE);\n" << " if(lid >= offset){\n" " other_key = lkeys[lid - offset];\n" << " if(other_key == key){\n" << " other_value = lvals[lid - offset];\n" << " result = " << function(k.var<value_out_type>("result"), k.var<value_out_type>("other_value")) << ";\n" << " }\n" << " }\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << " lvals[lid] = result;\n" << "}\n" << // save carry out "if(lid == (wg_size - 1)){\n" << carry_out_keys_first[k.var<const uint_>("group_id")] << " = key;\n" << carry_out_values_first[k.var<const uint_>("group_id")] << " = result;\n" << "}\n"; size_t work_groups_no = static_cast<size_t>( std::ceil(float(count) / work_group_size) ); const context &context = queue.get_context(); kernel kernel = k.compile(context); kernel.set_arg(local_keys_arg, local_buffer<uint_>(work_group_size)); kernel.set_arg(local_vals_arg, local_buffer<value_out_type>(work_group_size)); queue.enqueue_1d_range_kernel(kernel, 0, work_groups_no * work_group_size, work_group_size); } /// \internal_ /// Calculate carry-in by performing inclusive scan by key on carry-outs vector. template<class OutputValueIterator, class BinaryFunction> inline void carry_ins(vector<uint_>::iterator carry_out_keys_first, OutputValueIterator carry_out_values_first, OutputValueIterator carry_in_values_first, size_t carry_out_size, BinaryFunction function, size_t work_group_size, command_queue &queue) { typedef typename std::iterator_traits<OutputValueIterator>::value_type value_out_type; uint_ values_pre_work_item = static_cast<uint_>( std::ceil(float(carry_out_size) / work_group_size) ); detail::meta_kernel k("reduce_by_key_with_scan_carry_ins"); k.add_set_arg<const uint_>("carry_out_size", uint_(carry_out_size)); k.add_set_arg<const uint_>("values_per_work_item", values_pre_work_item); size_t local_keys_arg = k.add_arg<uint_ *>(memory_object::local_memory, "lkeys"); size_t local_vals_arg = k.add_arg<value_out_type *>(memory_object::local_memory, "lvals"); k << k.decl<uint_>("id") << " = get_global_id(0) * values_per_work_item;\n" << k.decl<uint_>("idx") << " = id;\n" << k.decl<const uint_>("wg_size") << " = get_local_size(0);\n" << k.decl<const uint_>("lid") << " = get_local_id(0);\n" << k.decl<const uint_>("group_id") << " = get_group_id(0);\n" << k.decl<uint_>("key") << ";\n" << k.decl<value_out_type>("value") << ";\n" << k.decl<uint_>("previous_key") << ";\n" << k.decl<value_out_type>("result") << ";\n" << "if(id < carry_out_size){\n" << k.var<uint_>("previous_key") << " = " << carry_out_keys_first[k.var<const uint_>("id")] << ";\n" << k.var<value_out_type>("result") << " = " << carry_out_values_first[k.var<const uint_>("id")] << ";\n" << carry_in_values_first[k.var<const uint_>("id")] << " = result;\n" << "}\n" << k.decl<const uint_>("end") << " = (id + values_per_work_item) <= carry_out_size" << " ? (values_per_work_item + id) : carry_out_size;\n" << "for(idx = idx + 1; idx < end; idx += 1){\n" << " key = " << carry_out_keys_first[k.var<const uint_>("idx")] << ";\n" << " value = " << carry_out_values_first[k.var<const uint_>("idx")] << ";\n" << " if(previous_key == key){\n" << " result = " << function(k.var<value_out_type>("result"), k.var<value_out_type>("value")) << ";\n" << " }\n else { \n" << " result = value;\n" " }\n" << " " << carry_in_values_first[k.var<const uint_>("idx")] << " = result;\n" << " previous_key = key;\n" "}\n" << // save the last key and result to local memory "lkeys[lid] = previous_key;\n" << "lvals[lid] = result;\n" << // Hillis/Steele scan "for(" << k.decl<uint_>("offset") << " = 1; " << "offset < wg_size; offset *= 2){\n" " barrier(CLK_LOCAL_MEM_FENCE);\n" << " if(lid >= offset){\n" " key = lkeys[lid - offset];\n" << " if(previous_key == key){\n" << " value = lvals[lid - offset];\n" << " result = " << function(k.var<value_out_type>("result"), k.var<value_out_type>("value")) << ";\n" << " }\n" << " }\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << " lvals[lid] = result;\n" << "}\n" << "barrier(CLK_LOCAL_MEM_FENCE);\n" << "if(lid > 0){\n" << // load key-value reduced by previous work item " previous_key = lkeys[lid - 1];\n" << " result = lvals[lid - 1];\n" << "}\n" << // add key-value reduced by previous work item "for(idx = id; idx < id + values_per_work_item; idx += 1){\n" << // make sure all carry-ins are saved in global memory " barrier( CLK_GLOBAL_MEM_FENCE );\n" << " if(lid > 0 && idx < carry_out_size) {\n" " key = " << carry_out_keys_first[k.var<const uint_>("idx")] << ";\n" << " value = " << carry_in_values_first[k.var<const uint_>("idx")] << ";\n" << " if(previous_key == key){\n" << " value = " << function(k.var<value_out_type>("result"), k.var<value_out_type>("value")) << ";\n" << " }\n" << " " << carry_in_values_first[k.var<const uint_>("idx")] << " = value;\n" << " }\n" << "}\n"; const context &context = queue.get_context(); kernel kernel = k.compile(context); kernel.set_arg(local_keys_arg, local_buffer<uint_>(work_group_size)); kernel.set_arg(local_vals_arg, local_buffer<value_out_type>(work_group_size)); queue.enqueue_1d_range_kernel(kernel, 0, work_group_size, work_group_size); } /// \internal_ /// /// Perform final reduction by key. Each work item: /// 1. Perform local work-group reduction (Hillis/Steele scan) /// 2. Add carry-in (if keys are right) /// 3. Save reduced value if next key is different than processed one template<class InputKeyIterator, class InputValueIterator, class OutputKeyIterator, class OutputValueIterator, class BinaryFunction> inline void final_reduction(InputKeyIterator keys_first, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, size_t count, BinaryFunction function, vector<uint_>::iterator new_keys_first, vector<uint_>::iterator carry_in_keys_first, OutputValueIterator carry_in_values_first, size_t carry_in_size, size_t work_group_size, command_queue &queue) { typedef typename std::iterator_traits<OutputValueIterator>::value_type value_out_type; detail::meta_kernel k("reduce_by_key_with_scan_final_reduction"); k.add_set_arg<const uint_>("count", uint_(count)); size_t local_keys_arg = k.add_arg<uint_ *>(memory_object::local_memory, "lkeys"); size_t local_vals_arg = k.add_arg<value_out_type *>(memory_object::local_memory, "lvals"); k << k.decl<const uint_>("gid") << " = get_global_id(0);\n" << k.decl<const uint_>("wg_size") << " = get_local_size(0);\n" << k.decl<const uint_>("lid") << " = get_local_id(0);\n" << k.decl<const uint_>("group_id") << " = get_group_id(0);\n" << k.decl<uint_>("key") << ";\n" << k.decl<value_out_type>("value") << ";\n" "if(gid < count){\n" << k.var<uint_>("key") << " = " << new_keys_first[k.var<const uint_>("gid")] << ";\n" << k.var<value_out_type>("value") << " = " << values_first[k.var<const uint_>("gid")] << ";\n" << "lkeys[lid] = key;\n" << "lvals[lid] = value;\n" << "}\n" << // Hillis/Steele scan k.decl<value_out_type>("result") << " = value;\n" << k.decl<uint_>("other_key") << ";\n" << k.decl<value_out_type>("other_value") << ";\n" << "for(" << k.decl<uint_>("offset") << " = 1; " << "offset < wg_size ; offset *= 2){\n" " barrier(CLK_LOCAL_MEM_FENCE);\n" << " if(lid >= offset) {\n" << " other_key = lkeys[lid - offset];\n" << " if(other_key == key){\n" << " other_value = lvals[lid - offset];\n" << " result = " << function(k.var<value_out_type>("result"), k.var<value_out_type>("other_value")) << ";\n" << " }\n" << " }\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << " lvals[lid] = result;\n" << "}\n" << "if(gid >= count) {\n return;\n};\n" << k.decl<const bool>("save") << " = (gid < (count - 1)) ?" << new_keys_first[k.var<const uint_>("gid + 1")] << " != key" << ": true;\n" << // Add carry in k.decl<uint_>("carry_in_key") << ";\n" << "if(group_id > 0 && save) {\n" << " carry_in_key = " << carry_in_keys_first[k.var<const uint_>("group_id - 1")] << ";\n" << " if(key == carry_in_key){\n" << " other_value = " << carry_in_values_first[k.var<const uint_>("group_id - 1")] << ";\n" << " result = " << function(k.var<value_out_type>("result"), k.var<value_out_type>("other_value")) << ";\n" << " }\n" << "}\n" << // Save result only if the next key is different or it's the last element. "if(save){\n" << keys_result[k.var<uint_>("key")] << " = " << keys_first[k.var<const uint_>("gid")] << ";\n" << values_result[k.var<uint_>("key")] << " = result;\n" << "}\n" ; size_t work_groups_no = static_cast<size_t>( std::ceil(float(count) / work_group_size) ); const context &context = queue.get_context(); kernel kernel = k.compile(context); kernel.set_arg(local_keys_arg, local_buffer<uint_>(work_group_size)); kernel.set_arg(local_vals_arg, local_buffer<value_out_type>(work_group_size)); queue.enqueue_1d_range_kernel(kernel, 0, work_groups_no * work_group_size, work_group_size); } /// \internal_ /// Returns preferred work group size for reduce by key with scan algorithm. template<class KeyType, class ValueType> inline size_t get_work_group_size(const device& device) { std::string cache_key = std::string("__boost_reduce_by_key_with_scan") + "k_" + type_name<KeyType>() + "_v_" + type_name<ValueType>(); // load parameters boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); return (std::max)( static_cast<size_t>(parameters->get(cache_key, "wgsize", 256)), static_cast<size_t>(device.get_info<CL_DEVICE_MAX_WORK_GROUP_SIZE>()) ); } /// \internal_ /// /// 1. For each work group carry-out value is calculated (it's done by key-oriented /// Hillis/Steele scan). Carry-out is a pair of the last key processed by work /// group and sum of all values under this key in work group. /// 2. From every carry-out carry-in is calculated by performing inclusive scan /// by key. /// 3. Final reduction by key is performed (key-oriented Hillis/Steele scan), /// carry-in values are added where needed. template<class InputKeyIterator, class InputValueIterator, class OutputKeyIterator, class OutputValueIterator, class BinaryFunction, class BinaryPredicate> inline size_t reduce_by_key_with_scan(InputKeyIterator keys_first, InputKeyIterator keys_last, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, BinaryFunction function, BinaryPredicate predicate, command_queue &queue) { typedef typename std::iterator_traits<InputValueIterator>::value_type value_type; typedef typename std::iterator_traits<InputKeyIterator>::value_type key_type; typedef typename std::iterator_traits<OutputValueIterator>::value_type value_out_type; const context &context = queue.get_context(); size_t count = detail::iterator_range_size(keys_first, keys_last); if(count == 0){ return size_t(0); } const device &device = queue.get_device(); size_t work_group_size = get_work_group_size<value_type, key_type>(device); // Replace original key with unsigned integer keys generated based on given // predicate. New key is also an index for keys_result and values_result vectors, // which points to place where reduced value should be saved. vector<uint_> new_keys(count, context); vector<uint_>::iterator new_keys_first = new_keys.begin(); generate_uint_keys(keys_first, count, predicate, new_keys_first, work_group_size, queue); // Calculate carry-out and carry-in vectors size const size_t carry_out_size = static_cast<size_t>( std::ceil(float(count) / work_group_size) ); vector<uint_> carry_out_keys(carry_out_size, context); vector<value_out_type> carry_out_values(carry_out_size, context); carry_outs(new_keys_first, values_first, count, carry_out_keys.begin(), carry_out_values.begin(), function, work_group_size, queue); vector<value_out_type> carry_in_values(carry_out_size, context); carry_ins(carry_out_keys.begin(), carry_out_values.begin(), carry_in_values.begin(), carry_out_size, function, work_group_size, queue); final_reduction(keys_first, values_first, keys_result, values_result, count, function, new_keys_first, carry_out_keys.begin(), carry_in_values.begin(), carry_out_size, work_group_size, queue); const size_t result = read_single_value<uint_>(new_keys.get_buffer(), count - 1, queue); return result + 1; } /// \internal_ /// Return true if requirements for running reduce by key with scan on given /// device are met (at least one work group of preferred size can be run). template<class InputKeyIterator, class InputValueIterator, class OutputKeyIterator, class OutputValueIterator> bool reduce_by_key_with_scan_requirements_met(InputKeyIterator keys_first, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, const size_t count, command_queue &queue) { typedef typename std::iterator_traits<InputValueIterator>::value_type value_type; typedef typename std::iterator_traits<InputKeyIterator>::value_type key_type; typedef typename std::iterator_traits<OutputValueIterator>::value_type value_out_type; (void) keys_first; (void) values_first; (void) keys_result; (void) values_result; const device &device = queue.get_device(); // device must have dedicated local memory storage if(device.get_info<CL_DEVICE_LOCAL_MEM_TYPE>() != CL_LOCAL) { return false; } // local memory size in bytes (per compute unit) const size_t local_mem_size = device.get_info<CL_DEVICE_LOCAL_MEM_SIZE>(); // preferred work group size size_t work_group_size = get_work_group_size<key_type, value_type>(device); // local memory size needed to perform parallel reduction size_t required_local_mem_size = 0; // keys size required_local_mem_size += sizeof(uint_) * work_group_size; // reduced values size required_local_mem_size += sizeof(value_out_type) * work_group_size; return (required_local_mem_size <= local_mem_size); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_WITH_SCAN_HPP algorithm/detail/reduce_on_gpu.hpp 0000644 00000024227 15125510617 0013335 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_GPU_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_GPU_HPP #include <iterator> #include <boost/compute/utility/source.hpp> #include <boost/compute/program.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/vendor.hpp> #include <boost/compute/detail/parameter_cache.hpp> #include <boost/compute/detail/work_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/utility/program_cache.hpp> namespace boost { namespace compute { namespace detail { /// \internal /// body reduction inside a warp template<typename T,bool isNvidiaDevice> struct ReduceBody { static std::string body() { std::stringstream k; // local reduction k << "for(int i = 1; i < TPB; i <<= 1){\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << " uint mask = (i << 1) - 1;\n" << " if((lid & mask) == 0){\n" << " scratch[lid] += scratch[lid+i];\n" << " }\n" << "}\n"; return k.str(); } }; /// \internal /// body reduction inside a warp /// for nvidia device we can use the "unsafe" /// memory optimisation template<typename T> struct ReduceBody<T,true> { static std::string body() { std::stringstream k; // local reduction // we use TPB to compile only useful instruction // local reduction when size is greater than warp size k << "barrier(CLK_LOCAL_MEM_FENCE);\n" << "if(TPB >= 1024){\n" << "if(lid < 512) { sum += scratch[lid + 512]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);}\n" << "if(TPB >= 512){\n" << "if(lid < 256) { sum += scratch[lid + 256]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);}\n" << "if(TPB >= 256){\n" << "if(lid < 128) { sum += scratch[lid + 128]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);}\n" << "if(TPB >= 128){\n" << "if(lid < 64) { sum += scratch[lid + 64]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);} \n" << // warp reduction "if(lid < 32){\n" << // volatile this way we don't need any barrier "volatile __local " << type_name<T>() << " *lmem = scratch;\n" << "if(TPB >= 64) { lmem[lid] = sum = sum + lmem[lid+32];} \n" << "if(TPB >= 32) { lmem[lid] = sum = sum + lmem[lid+16];} \n" << "if(TPB >= 16) { lmem[lid] = sum = sum + lmem[lid+ 8];} \n" << "if(TPB >= 8) { lmem[lid] = sum = sum + lmem[lid+ 4];} \n" << "if(TPB >= 4) { lmem[lid] = sum = sum + lmem[lid+ 2];} \n" << "if(TPB >= 2) { lmem[lid] = sum = sum + lmem[lid+ 1];} \n" << "}\n"; return k.str(); } }; template<class InputIterator, class Function> inline void initial_reduce(InputIterator first, InputIterator last, buffer result, const Function &function, kernel &reduce_kernel, const uint_ vpt, const uint_ tpb, command_queue &queue) { (void) function; (void) reduce_kernel; typedef typename std::iterator_traits<InputIterator>::value_type Arg; typedef typename boost::tr1_result_of<Function(Arg, Arg)>::type T; size_t count = std::distance(first, last); detail::meta_kernel k("initial_reduce"); k.add_set_arg<const uint_>("count", uint_(count)); size_t output_arg = k.add_arg<T *>(memory_object::global_memory, "output"); k << k.decl<const uint_>("offset") << " = get_group_id(0) * VPT * TPB;\n" << k.decl<const uint_>("lid") << " = get_local_id(0);\n" << "__local " << type_name<T>() << " scratch[TPB];\n" << // private reduction k.decl<T>("sum") << " = 0;\n" << "for(uint i = 0; i < VPT; i++){\n" << " if(offset + lid + i*TPB < count){\n" << " sum = sum + " << first[k.var<uint_>("offset+lid+i*TPB")] << ";\n" << " }\n" << "}\n" << "scratch[lid] = sum;\n" << // local reduction ReduceBody<T,false>::body() << // write sum to output "if(lid == 0){\n" << " output[get_group_id(0)] = scratch[0];\n" << "}\n"; const context &context = queue.get_context(); std::stringstream options; options << "-DVPT=" << vpt << " -DTPB=" << tpb; kernel generic_reduce_kernel = k.compile(context, options.str()); generic_reduce_kernel.set_arg(output_arg, result); size_t work_size = calculate_work_size(count, vpt, tpb); queue.enqueue_1d_range_kernel(generic_reduce_kernel, 0, work_size, tpb); } template<class T> inline void initial_reduce(const buffer_iterator<T> &first, const buffer_iterator<T> &last, const buffer &result, const plus<T> &function, kernel &reduce_kernel, const uint_ vpt, const uint_ tpb, command_queue &queue) { (void) function; size_t count = std::distance(first, last); reduce_kernel.set_arg(0, first.get_buffer()); reduce_kernel.set_arg(1, uint_(first.get_index())); reduce_kernel.set_arg(2, uint_(count)); reduce_kernel.set_arg(3, result); reduce_kernel.set_arg(4, uint_(0)); size_t work_size = calculate_work_size(count, vpt, tpb); queue.enqueue_1d_range_kernel(reduce_kernel, 0, work_size, tpb); } template<class InputIterator, class T, class Function> inline void reduce_on_gpu(InputIterator first, InputIterator last, buffer_iterator<T> result, Function function, command_queue &queue) { const device &device = queue.get_device(); const context &context = queue.get_context(); detail::meta_kernel k("reduce"); k.add_arg<const T*>(memory_object::global_memory, "input"); k.add_arg<const uint_>("offset"); k.add_arg<const uint_>("count"); k.add_arg<T*>(memory_object::global_memory, "output"); k.add_arg<const uint_>("output_offset"); k << k.decl<const uint_>("block_offset") << " = get_group_id(0) * VPT * TPB;\n" << "__global const " << type_name<T>() << " *block = input + offset + block_offset;\n" << k.decl<const uint_>("lid") << " = get_local_id(0);\n" << "__local " << type_name<T>() << " scratch[TPB];\n" << // private reduction k.decl<T>("sum") << " = 0;\n" << "for(uint i = 0; i < VPT; i++){\n" << " if(block_offset + lid + i*TPB < count){\n" << " sum = sum + block[lid+i*TPB]; \n" << " }\n" << "}\n" << "scratch[lid] = sum;\n"; // discrimination on vendor name if(is_nvidia_device(device)) k << ReduceBody<T,true>::body(); else k << ReduceBody<T,false>::body(); k << // write sum to output "if(lid == 0){\n" << " output[output_offset + get_group_id(0)] = scratch[0];\n" << "}\n"; std::string cache_key = std::string("__boost_reduce_on_gpu_") + type_name<T>(); // load parameters boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); uint_ vpt = parameters->get(cache_key, "vpt", 8); uint_ tpb = parameters->get(cache_key, "tpb", 128); // reduce program compiler flags std::stringstream options; options << "-DT=" << type_name<T>() << " -DVPT=" << vpt << " -DTPB=" << tpb; // load program boost::shared_ptr<program_cache> cache = program_cache::get_global_cache(context); program reduce_program = cache->get_or_build( cache_key, options.str(), k.source(), context ); // create reduce kernel kernel reduce_kernel(reduce_program, "reduce"); size_t count = std::distance(first, last); // first pass, reduce from input to ping buffer ping(context, std::ceil(float(count) / vpt / tpb) * sizeof(T)); initial_reduce(first, last, ping, function, reduce_kernel, vpt, tpb, queue); // update count after initial reduce count = static_cast<size_t>(std::ceil(float(count) / vpt / tpb)); // middle pass(es), reduce between ping and pong const buffer *input_buffer = &ping; buffer pong(context, static_cast<size_t>(count / vpt / tpb * sizeof(T))); const buffer *output_buffer = &pong; if(count > vpt * tpb){ while(count > vpt * tpb){ reduce_kernel.set_arg(0, *input_buffer); reduce_kernel.set_arg(1, uint_(0)); reduce_kernel.set_arg(2, uint_(count)); reduce_kernel.set_arg(3, *output_buffer); reduce_kernel.set_arg(4, uint_(0)); size_t work_size = static_cast<size_t>(std::ceil(float(count) / vpt)); if(work_size % tpb != 0){ work_size += tpb - work_size % tpb; } queue.enqueue_1d_range_kernel(reduce_kernel, 0, work_size, tpb); std::swap(input_buffer, output_buffer); count = static_cast<size_t>(std::ceil(float(count) / vpt / tpb)); } } // final pass, reduce from ping/pong to result reduce_kernel.set_arg(0, *input_buffer); reduce_kernel.set_arg(1, uint_(0)); reduce_kernel.set_arg(2, uint_(count)); reduce_kernel.set_arg(3, result.get_buffer()); reduce_kernel.set_arg(4, uint_(result.get_index())); queue.enqueue_1d_range_kernel(reduce_kernel, 0, tpb, tpb); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_GPU_HPP algorithm/detail/copy_on_device.hpp 0000644 00000016326 15125510617 0013505 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP #include <iterator> #include <boost/compute/command_queue.hpp> #include <boost/compute/async/future.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/iterator/discard_iterator.hpp> #include <boost/compute/memory/svm_ptr.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/parameter_cache.hpp> #include <boost/compute/detail/work_size.hpp> #include <boost/compute/detail/vendor.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class OutputIterator> inline event copy_on_device_cpu(InputIterator first, OutputIterator result, size_t count, command_queue &queue, const wait_list &events) { meta_kernel k("copy"); const device& device = queue.get_device(); k << "uint block = " << "(uint)ceil(((float)count)/get_global_size(0));\n" << "uint index = get_global_id(0) * block;\n" << "uint end = min(count, index + block);\n" << "while(index < end){\n" << result[k.var<uint_>("index")] << '=' << first[k.var<uint_>("index")] << ";\n" << "index++;\n" << "}\n"; k.add_set_arg<const uint_>("count", static_cast<uint_>(count)); size_t global_work_size = device.compute_units(); if(count <= 1024) global_work_size = 1; return k.exec_1d(queue, 0, global_work_size, events); } template<class InputIterator, class OutputIterator> inline event copy_on_device_gpu(InputIterator first, OutputIterator result, size_t count, command_queue &queue, const wait_list &events) { typedef typename std::iterator_traits<InputIterator>::value_type input_type; const device& device = queue.get_device(); boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); std::string cache_key = "__boost_copy_kernel_" + boost::lexical_cast<std::string>(sizeof(input_type)); uint_ vpt = parameters->get(cache_key, "vpt", 4); uint_ tpb = parameters->get(cache_key, "tpb", 128); meta_kernel k("copy"); k << "uint index = get_local_id(0) + " << "(" << vpt * tpb << " * get_group_id(0));\n" << "for(uint i = 0; i < " << vpt << "; i++){\n" << " if(index < count){\n" << result[k.var<uint_>("index")] << '=' << first[k.var<uint_>("index")] << ";\n" << " index += " << tpb << ";\n" " }\n" "}\n"; k.add_set_arg<const uint_>("count", static_cast<uint_>(count)); size_t global_work_size = calculate_work_size(count, vpt, tpb); return k.exec_1d(queue, 0, global_work_size, tpb, events); } template<class InputIterator, class OutputIterator> inline event dispatch_copy_on_device(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events) { const size_t count = detail::iterator_range_size(first, last); if(count == 0){ // nothing to do return event(); } const device& device = queue.get_device(); // copy_on_device_cpu() does not work for CPU on Apple platform // due to bug in its compiler. // See https://github.com/boostorg/compute/pull/626 if((device.type() & device::cpu) && !is_apple_platform_device(device)) { return copy_on_device_cpu(first, result, count, queue, events); } return copy_on_device_gpu(first, result, count, queue, events); } template<class InputIterator, class OutputIterator> inline OutputIterator copy_on_device(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events) { dispatch_copy_on_device(first, last, result, queue, events); return result + std::distance(first, last); } template<class InputIterator> inline discard_iterator copy_on_device(InputIterator first, InputIterator last, discard_iterator result, command_queue &queue, const wait_list &events) { (void) queue; (void) events; return result + std::distance(first, last); } template<class InputIterator, class OutputIterator> inline future<OutputIterator> copy_on_device_async(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, const wait_list &events) { event event_ = dispatch_copy_on_device(first, last, result, queue, events); return make_future(result + std::distance(first, last), event_); } #ifdef BOOST_COMPUTE_CL_VERSION_2_0 // copy_on_device() specialization for svm_ptr template<class T> inline svm_ptr<T> copy_on_device(svm_ptr<T> first, svm_ptr<T> last, svm_ptr<T> result, command_queue &queue, const wait_list &events) { size_t count = iterator_range_size(first, last); if(count == 0){ return result; } queue.enqueue_svm_memcpy( result.get(), first.get(), count * sizeof(T), events ); return result + count; } template<class T> inline future<svm_ptr<T> > copy_on_device_async(svm_ptr<T> first, svm_ptr<T> last, svm_ptr<T> result, command_queue &queue, const wait_list &events) { size_t count = iterator_range_size(first, last); if(count == 0){ return future<svm_ptr<T> >(); } event event_ = queue.enqueue_svm_memcpy_async( result.get(), first.get(), count * sizeof(T), events ); return make_future(result + count, event_); } #endif // BOOST_COMPUTE_CL_VERSION_2_0 } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP algorithm/detail/find_extrema_with_atomics.hpp 0000644 00000010233 15125510617 0015726 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_ATOMICS_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_ATOMICS_HPP #include <boost/compute/types.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/container/detail/scalar.hpp> #include <boost/compute/functional/atomic.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class Compare> inline InputIterator find_extrema_with_atomics(InputIterator first, InputIterator last, Compare compare, const bool find_minimum, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; const context &context = queue.get_context(); meta_kernel k("find_extrema"); atomic_cmpxchg<uint_> atomic_cmpxchg_uint; k << "const uint gid = get_global_id(0);\n" << "uint old_index = *index;\n" << k.decl<value_type>("old") << " = " << first[k.var<uint_>("old_index")] << ";\n" << k.decl<value_type>("new") << " = " << first[k.var<uint_>("gid")] << ";\n" << k.decl<bool>("compare_result") << ";\n" << "#ifdef BOOST_COMPUTE_FIND_MAXIMUM\n" << "while(" << "(compare_result = " << compare(k.var<value_type>("old"), k.var<value_type>("new")) << ")" << " || (!(compare_result" << " || " << compare(k.var<value_type>("new"), k.var<value_type>("old")) << ") " "&& gid < old_index)){\n" << "#else\n" << // while condition explained for minimum case with less (<) // as comparison function: // while(new_value < old_value // OR (new_value == old_value AND new_index < old_index)) "while(" << "(compare_result = " << compare(k.var<value_type>("new"), k.var<value_type>("old")) << ")" << " || (!(compare_result" << " || " << compare(k.var<value_type>("old"), k.var<value_type>("new")) << ") " "&& gid < old_index)){\n" << "#endif\n" << " if(" << atomic_cmpxchg_uint(k.var<uint_ *>("index"), k.var<uint_>("old_index"), k.var<uint_>("gid")) << " == old_index)\n" << " break;\n" << " else\n" << " old_index = *index;\n" << "old = " << first[k.var<uint_>("old_index")] << ";\n" << "}\n"; size_t index_arg_index = k.add_arg<uint_ *>(memory_object::global_memory, "index"); std::string options; if(!find_minimum){ options = "-DBOOST_COMPUTE_FIND_MAXIMUM"; } kernel kernel = k.compile(context, options); // setup index buffer scalar<uint_> index(context); kernel.set_arg(index_arg_index, index.get_buffer()); // initialize index index.write(0, queue); // run kernel size_t count = iterator_range_size(first, last); queue.enqueue_1d_range_kernel(kernel, 0, count, 0); // read index and return iterator return first + static_cast<difference_type>(index.read(queue)); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_ATOMICS_HPP algorithm/detail/serial_find_extrema.hpp 0000644 00000006117 15125510617 0014521 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_FIND_EXTREMA_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_FIND_EXTREMA_HPP #include <boost/compute/command_queue.hpp> #include <boost/compute/types/fundamental.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/container/detail/scalar.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class Compare> inline InputIterator serial_find_extrema(InputIterator first, InputIterator last, Compare compare, const bool find_minimum, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; const context &context = queue.get_context(); meta_kernel k("serial_find_extrema"); k << k.decl<value_type>("value") << " = " << first[k.expr<uint_>("0")] << ";\n" << k.decl<uint_>("value_index") << " = 0;\n" << "for(uint i = 1; i < size; i++){\n" << " " << k.decl<value_type>("candidate") << "=" << first[k.expr<uint_>("i")] << ";\n" << "#ifndef BOOST_COMPUTE_FIND_MAXIMUM\n" << " if(" << compare(k.var<value_type>("candidate"), k.var<value_type>("value")) << "){\n" << "#else\n" << " if(" << compare(k.var<value_type>("value"), k.var<value_type>("candidate")) << "){\n" << "#endif\n" << " value = candidate;\n" << " value_index = i;\n" << " }\n" << "}\n" << "*index = value_index;\n"; size_t index_arg_index = k.add_arg<uint_ *>(memory_object::global_memory, "index"); size_t size_arg_index = k.add_arg<uint_>("size"); std::string options; if(!find_minimum){ options = "-DBOOST_COMPUTE_FIND_MAXIMUM"; } kernel kernel = k.compile(context, options); // setup index buffer scalar<uint_> index(context); kernel.set_arg(index_arg_index, index.get_buffer()); // setup count size_t count = iterator_range_size(first, last); kernel.set_arg(size_arg_index, static_cast<uint_>(count)); // run kernel queue.enqueue_task(kernel); // read index and return iterator return first + static_cast<difference_type>(index.read(queue)); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_FIND_EXTREMA_HPP algorithm/detail/serial_accumulate.hpp 0000644 00000003706 15125510617 0014200 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_ACCUMULATE_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_ACCUMULATE_HPP #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class OutputIterator, class T, class BinaryFunction> inline void serial_accumulate(InputIterator first, InputIterator last, OutputIterator result, T init, BinaryFunction function, command_queue &queue) { const context &context = queue.get_context(); size_t count = detail::iterator_range_size(first, last); meta_kernel k("serial_accumulate"); size_t init_arg = k.add_arg<T>("init"); size_t count_arg = k.add_arg<cl_uint>("count"); k << k.decl<T>("result") << " = init;\n" << "for(uint i = 0; i < count; i++)\n" << " result = " << function(k.var<T>("result"), first[k.var<cl_uint>("i")]) << ";\n" << result[0] << " = result;\n"; kernel kernel = k.compile(context); kernel.set_arg(init_arg, init); kernel.set_arg(count_arg, static_cast<cl_uint>(count)); queue.enqueue_task(kernel); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_ACCUMULATE_HPP algorithm/detail/radix_sort.hpp 0000644 00000036216 15125510617 0012676 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_RADIX_SORT_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_RADIX_SORT_HPP #include <iterator> #include <boost/assert.hpp> #include <boost/type_traits/is_signed.hpp> #include <boost/type_traits/is_floating_point.hpp> #include <boost/mpl/and.hpp> #include <boost/mpl/not.hpp> #include <boost/compute/kernel.hpp> #include <boost/compute/program.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/exclusive_scan.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/parameter_cache.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/type_traits/is_fundamental.hpp> #include <boost/compute/type_traits/is_vector_type.hpp> #include <boost/compute/utility/program_cache.hpp> namespace boost { namespace compute { namespace detail { // meta-function returning true if type T is radix-sortable template<class T> struct is_radix_sortable : boost::mpl::and_< typename ::boost::compute::is_fundamental<T>::type, typename boost::mpl::not_<typename is_vector_type<T>::type>::type > { }; template<size_t N> struct radix_sort_value_type { }; template<> struct radix_sort_value_type<1> { typedef uchar_ type; }; template<> struct radix_sort_value_type<2> { typedef ushort_ type; }; template<> struct radix_sort_value_type<4> { typedef uint_ type; }; template<> struct radix_sort_value_type<8> { typedef ulong_ type; }; template<typename T> inline const char* enable_double() { return " -DT2_double=0"; } template<> inline const char* enable_double<double>() { return " -DT2_double=1"; } const char radix_sort_source[] = "#if T2_double\n" "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" "#endif\n" "#define K2_BITS (1 << K_BITS)\n" "#define RADIX_MASK ((((T)(1)) << K_BITS) - 1)\n" "#define SIGN_BIT ((sizeof(T) * CHAR_BIT) - 1)\n" "#if defined(ASC)\n" // asc order "inline uint radix(const T x, const uint low_bit)\n" "{\n" "#if defined(IS_FLOATING_POINT)\n" " const T mask = -(x >> SIGN_BIT) | (((T)(1)) << SIGN_BIT);\n" " return ((x ^ mask) >> low_bit) & RADIX_MASK;\n" "#elif defined(IS_SIGNED)\n" " return ((x ^ (((T)(1)) << SIGN_BIT)) >> low_bit) & RADIX_MASK;\n" "#else\n" " return (x >> low_bit) & RADIX_MASK;\n" "#endif\n" "}\n" "#else\n" // desc order // For signed types we just negate the x and for unsigned types we // subtract the x from max value of its type ((T)(-1) is a max value // of type T when T is an unsigned type). "inline uint radix(const T x, const uint low_bit)\n" "{\n" "#if defined(IS_FLOATING_POINT)\n" " const T mask = -(x >> SIGN_BIT) | (((T)(1)) << SIGN_BIT);\n" " return (((-x) ^ mask) >> low_bit) & RADIX_MASK;\n" "#elif defined(IS_SIGNED)\n" " return (((-x) ^ (((T)(1)) << SIGN_BIT)) >> low_bit) & RADIX_MASK;\n" "#else\n" " return (((T)(-1) - x) >> low_bit) & RADIX_MASK;\n" "#endif\n" "}\n" "#endif\n" // #if defined(ASC) "__kernel void count(__global const T *input,\n" " const uint input_offset,\n" " const uint input_size,\n" " __global uint *global_counts,\n" " __global uint *global_offsets,\n" " __local uint *local_counts,\n" " const uint low_bit)\n" "{\n" // work-item parameters " const uint gid = get_global_id(0);\n" " const uint lid = get_local_id(0);\n" // zero local counts " if(lid < K2_BITS){\n" " local_counts[lid] = 0;\n" " }\n" " barrier(CLK_LOCAL_MEM_FENCE);\n" // reduce local counts " if(gid < input_size){\n" " T value = input[input_offset+gid];\n" " uint bucket = radix(value, low_bit);\n" " atomic_inc(local_counts + bucket);\n" " }\n" " barrier(CLK_LOCAL_MEM_FENCE);\n" // write block-relative offsets " if(lid < K2_BITS){\n" " global_counts[K2_BITS*get_group_id(0) + lid] = local_counts[lid];\n" // write global offsets " if(get_group_id(0) == (get_num_groups(0) - 1)){\n" " global_offsets[lid] = local_counts[lid];\n" " }\n" " }\n" "}\n" "__kernel void scan(__global const uint *block_offsets,\n" " __global uint *global_offsets,\n" " const uint block_count)\n" "{\n" " __global const uint *last_block_offsets =\n" " block_offsets + K2_BITS * (block_count - 1);\n" // calculate and scan global_offsets " uint sum = 0;\n" " for(uint i = 0; i < K2_BITS; i++){\n" " uint x = global_offsets[i] + last_block_offsets[i];\n" " mem_fence(CLK_GLOBAL_MEM_FENCE);\n" // work around the RX 500/Vega bug, see #811 " global_offsets[i] = sum;\n" " sum += x;\n" " mem_fence(CLK_GLOBAL_MEM_FENCE);\n" // work around the RX Vega bug, see #811 " }\n" "}\n" "__kernel void scatter(__global const T *input,\n" " const uint input_offset,\n" " const uint input_size,\n" " const uint low_bit,\n" " __global const uint *counts,\n" " __global const uint *global_offsets,\n" "#ifndef SORT_BY_KEY\n" " __global T *output,\n" " const uint output_offset)\n" "#else\n" " __global T *keys_output,\n" " const uint keys_output_offset,\n" " __global T2 *values_input,\n" " const uint values_input_offset,\n" " __global T2 *values_output,\n" " const uint values_output_offset)\n" "#endif\n" "{\n" // work-item parameters " const uint gid = get_global_id(0);\n" " const uint lid = get_local_id(0);\n" // copy input to local memory " T value;\n" " uint bucket;\n" " __local uint local_input[BLOCK_SIZE];\n" " if(gid < input_size){\n" " value = input[input_offset+gid];\n" " bucket = radix(value, low_bit);\n" " local_input[lid] = bucket;\n" " }\n" // copy block counts to local memory " __local uint local_counts[(1 << K_BITS)];\n" " if(lid < K2_BITS){\n" " local_counts[lid] = counts[get_group_id(0) * K2_BITS + lid];\n" " }\n" // wait until local memory is ready " barrier(CLK_LOCAL_MEM_FENCE);\n" " if(gid >= input_size){\n" " return;\n" " }\n" // get global offset " uint offset = global_offsets[bucket] + local_counts[bucket];\n" // calculate local offset " uint local_offset = 0;\n" " for(uint i = 0; i < lid; i++){\n" " if(local_input[i] == bucket)\n" " local_offset++;\n" " }\n" "#ifndef SORT_BY_KEY\n" // write value to output " output[output_offset + offset + local_offset] = value;\n" "#else\n" // write key and value if doing sort_by_key " keys_output[keys_output_offset+offset + local_offset] = value;\n" " values_output[values_output_offset+offset + local_offset] =\n" " values_input[values_input_offset+gid];\n" "#endif\n" "}\n"; template<class T, class T2> inline void radix_sort_impl(const buffer_iterator<T> first, const buffer_iterator<T> last, const buffer_iterator<T2> values_first, const bool ascending, command_queue &queue) { typedef T value_type; typedef typename radix_sort_value_type<sizeof(T)>::type sort_type; const device &device = queue.get_device(); const context &context = queue.get_context(); // if we have a valid values iterator then we are doing a // sort by key and have to set up the values buffer bool sort_by_key = (values_first.get_buffer().get() != 0); // load (or create) radix sort program std::string cache_key = std::string("__boost_radix_sort_") + type_name<value_type>(); if(sort_by_key){ cache_key += std::string("_with_") + type_name<T2>(); } boost::shared_ptr<program_cache> cache = program_cache::get_global_cache(context); boost::shared_ptr<parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); // sort parameters const uint_ k = parameters->get(cache_key, "k", 4); const uint_ k2 = 1 << k; const uint_ block_size = parameters->get(cache_key, "tpb", 128); // sort program compiler options std::stringstream options; options << "-DK_BITS=" << k; options << " -DT=" << type_name<sort_type>(); options << " -DBLOCK_SIZE=" << block_size; if(boost::is_floating_point<value_type>::value){ options << " -DIS_FLOATING_POINT"; } if(boost::is_signed<value_type>::value){ options << " -DIS_SIGNED"; } if(sort_by_key){ options << " -DSORT_BY_KEY"; options << " -DT2=" << type_name<T2>(); options << enable_double<T2>(); } if(ascending){ options << " -DASC"; } // get type definition if it is a custom struct std::string custom_type_def = boost::compute::type_definition<T2>() + "\n"; // load radix sort program program radix_sort_program = cache->get_or_build( cache_key, options.str(), custom_type_def + radix_sort_source, context ); kernel count_kernel(radix_sort_program, "count"); kernel scan_kernel(radix_sort_program, "scan"); kernel scatter_kernel(radix_sort_program, "scatter"); size_t count = detail::iterator_range_size(first, last); uint_ block_count = static_cast<uint_>(count / block_size); if(block_count * block_size != count){ block_count++; } // setup temporary buffers vector<value_type> output(count, context); vector<T2> values_output(sort_by_key ? count : 0, context); vector<uint_> offsets(k2, context); vector<uint_> counts(block_count * k2, context); const buffer *input_buffer = &first.get_buffer(); uint_ input_offset = static_cast<uint_>(first.get_index()); const buffer *output_buffer = &output.get_buffer(); uint_ output_offset = 0; const buffer *values_input_buffer = &values_first.get_buffer(); uint_ values_input_offset = static_cast<uint_>(values_first.get_index()); const buffer *values_output_buffer = &values_output.get_buffer(); uint_ values_output_offset = 0; for(uint_ i = 0; i < sizeof(sort_type) * CHAR_BIT / k; i++){ // write counts count_kernel.set_arg(0, *input_buffer); count_kernel.set_arg(1, input_offset); count_kernel.set_arg(2, static_cast<uint_>(count)); count_kernel.set_arg(3, counts); count_kernel.set_arg(4, offsets); count_kernel.set_arg(5, block_size * sizeof(uint_), 0); count_kernel.set_arg(6, i * k); queue.enqueue_1d_range_kernel(count_kernel, 0, block_count * block_size, block_size); // scan counts if(k == 1){ typedef uint2_ counter_type; ::boost::compute::exclusive_scan( make_buffer_iterator<counter_type>(counts.get_buffer(), 0), make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 2), make_buffer_iterator<counter_type>(counts.get_buffer()), queue ); } else if(k == 2){ typedef uint4_ counter_type; ::boost::compute::exclusive_scan( make_buffer_iterator<counter_type>(counts.get_buffer(), 0), make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 4), make_buffer_iterator<counter_type>(counts.get_buffer()), queue ); } else if(k == 4){ typedef uint16_ counter_type; ::boost::compute::exclusive_scan( make_buffer_iterator<counter_type>(counts.get_buffer(), 0), make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 16), make_buffer_iterator<counter_type>(counts.get_buffer()), queue ); } else { BOOST_ASSERT(false && "unknown k"); break; } // scan global offsets scan_kernel.set_arg(0, counts); scan_kernel.set_arg(1, offsets); scan_kernel.set_arg(2, block_count); queue.enqueue_task(scan_kernel); // scatter values scatter_kernel.set_arg(0, *input_buffer); scatter_kernel.set_arg(1, input_offset); scatter_kernel.set_arg(2, static_cast<uint_>(count)); scatter_kernel.set_arg(3, i * k); scatter_kernel.set_arg(4, counts); scatter_kernel.set_arg(5, offsets); scatter_kernel.set_arg(6, *output_buffer); scatter_kernel.set_arg(7, output_offset); if(sort_by_key){ scatter_kernel.set_arg(8, *values_input_buffer); scatter_kernel.set_arg(9, values_input_offset); scatter_kernel.set_arg(10, *values_output_buffer); scatter_kernel.set_arg(11, values_output_offset); } queue.enqueue_1d_range_kernel(scatter_kernel, 0, block_count * block_size, block_size); // swap buffers std::swap(input_buffer, output_buffer); std::swap(values_input_buffer, values_output_buffer); std::swap(input_offset, output_offset); std::swap(values_input_offset, values_output_offset); } } template<class Iterator> inline void radix_sort(Iterator first, Iterator last, command_queue &queue) { radix_sort_impl(first, last, buffer_iterator<int>(), true, queue); } template<class KeyIterator, class ValueIterator> inline void radix_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, command_queue &queue) { radix_sort_impl(keys_first, keys_last, values_first, true, queue); } template<class Iterator> inline void radix_sort(Iterator first, Iterator last, const bool ascending, command_queue &queue) { radix_sort_impl(first, last, buffer_iterator<int>(), ascending, queue); } template<class KeyIterator, class ValueIterator> inline void radix_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, const bool ascending, command_queue &queue) { radix_sort_impl(keys_first, keys_last, values_first, ascending, queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_RADIX_SORT_HPP algorithm/detail/merge_sort_on_gpu.hpp 0000644 00000053115 15125510617 0014232 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2016 Jakub Szuppe <j.szuppe@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_GPU_HPP_ #define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_GPU_HPP_ #include <algorithm> #include <boost/compute/kernel.hpp> #include <boost/compute/program.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/memory/local_buffer.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> namespace boost { namespace compute { namespace detail { template<class KeyType, class ValueType> inline size_t pick_bitonic_block_sort_block_size(size_t proposed_wg, size_t lmem_size, bool sort_by_key) { size_t n = proposed_wg; size_t lmem_required = n * sizeof(KeyType); if(sort_by_key) { lmem_required += n * sizeof(ValueType); } // try to force at least 4 work-groups of >64 elements // for better occupancy while(lmem_size < (lmem_required * 4) && (n > 64)) { n /= 2; lmem_required = n * sizeof(KeyType); } while(lmem_size < lmem_required && (n != 1)) { n /= 2; if(n < 1) n = 1; lmem_required = n * sizeof(KeyType); } if(n < 2) { return 1; } else if(n < 4) { return 2; } else if(n < 8) { return 4; } else if(n < 16) { return 8; } else if(n < 32) { return 16; } else if(n < 64) { return 32; } else if(n < 128) { return 64; } else if(n < 256) { return 128; } else { return 256; } } /// Performs bitonic block sort according to \p compare. /// /// Since bitonic sort can be only performed when input size is equal to 2^n, /// in this case input size is block size (\p work_group_size), we would have /// to require \p count be a exact multiple of block size. That would not be /// great. /// Instead, bitonic sort kernel is merged with odd-even merge sort so if the /// last block is not equal to 2^n (where n is some natural number) the odd-even /// sort is performed for that block. That way bitonic_block_sort() works for /// input of any size. Block size (\p work_group_size) still have to be equal /// to 2^n. /// /// This is NOT stable. /// /// \param keys_first first key element in the range to sort /// \param values_first first value element in the range to sort /// \param compare comparison function for keys /// \param count number of elements in the range; count > 0 /// \param work_group_size size of the work group, also the block size; must be /// equal to n^2 where n is natural number /// \param queue command queue to perform the operation template<class KeyIterator, class ValueIterator, class Compare> inline size_t bitonic_block_sort(KeyIterator keys_first, ValueIterator values_first, Compare compare, const size_t count, const bool sort_by_key, command_queue &queue) { typedef typename std::iterator_traits<KeyIterator>::value_type key_type; typedef typename std::iterator_traits<ValueIterator>::value_type value_type; meta_kernel k("bitonic_block_sort"); size_t count_arg = k.add_arg<const uint_>("count"); size_t local_keys_arg = k.add_arg<key_type *>(memory_object::local_memory, "lkeys"); size_t local_vals_arg = 0; if(sort_by_key) { local_vals_arg = k.add_arg<uchar_ *>(memory_object::local_memory, "lidx"); } k << // Work item global and local ids k.decl<const uint_>("gid") << " = get_global_id(0);\n" << k.decl<const uint_>("lid") << " = get_local_id(0);\n"; // declare my_key and my_value k << k.decl<key_type>("my_key") << ";\n"; // Instead of copying values (my_value) in local memory with keys // we save local index (uchar) and copy my_value at the end at // final index. This saves local memory. if(sort_by_key) { k << k.decl<uchar_>("my_index") << " = (uchar)(lid);\n"; } // load key k << "if(gid < count) {\n" << k.var<key_type>("my_key") << " = " << keys_first[k.var<const uint_>("gid")] << ";\n" << "}\n"; // load key and index to local memory k << "lkeys[lid] = my_key;\n"; if(sort_by_key) { k << "lidx[lid] = my_index;\n"; } k << k.decl<const uint_>("offset") << " = get_group_id(0) * get_local_size(0);\n" << k.decl<const uint_>("n") << " = min((uint)(get_local_size(0)),(count - offset));\n"; // When work group size is a power of 2 bitonic sorter can be used; // otherwise, slower odd-even sort is used. k << // check if n is power of 2 "if(((n != 0) && ((n & (~n + 1)) == n))) {\n"; // bitonic sort, not stable k << // wait for keys and vals to be stored in local memory "barrier(CLK_LOCAL_MEM_FENCE);\n" << "#pragma unroll\n" << "for(" << k.decl<uint_>("length") << " = 1; " << "length < n; " << "length <<= 1" << ") {\n" << // direction of sort: false -> asc, true -> desc k.decl<bool>("direction") << "= ((lid & (length<<1)) != 0);\n" << "for(" << k.decl<uint_>("k") << " = length; " << "k > 0; " << "k >>= 1" << ") {\n" << // sibling to compare with my key k.decl<uint_>("sibling_idx") << " = lid ^ k;\n" << k.decl<key_type>("sibling_key") << " = lkeys[sibling_idx];\n" << k.decl<bool>("compare") << " = " << compare(k.var<key_type>("sibling_key"), k.var<key_type>("my_key")) << ";\n" << k.decl<bool>("equal") << " = !(compare || " << compare(k.var<key_type>("my_key"), k.var<key_type>("sibling_key")) << ");\n" << k.decl<bool>("swap") << " = compare ^ (sibling_idx < lid) ^ direction;\n" << "swap = equal ? false : swap;\n" << "my_key = swap ? sibling_key : my_key;\n"; if(sort_by_key) { k << "my_index = swap ? lidx[sibling_idx] : my_index;\n"; } k << "barrier(CLK_LOCAL_MEM_FENCE);\n" << "lkeys[lid] = my_key;\n"; if(sort_by_key) { k << "lidx[lid] = my_index;\n"; } k << "barrier(CLK_LOCAL_MEM_FENCE);\n" << "}\n" << "}\n"; // end of bitonic sort // odd-even sort, not stable k << "}\n" << "else { \n"; k << k.decl<bool>("lid_is_even") << " = (lid%2) == 0;\n" << k.decl<uint_>("oddsibling_idx") << " = " << "(lid_is_even) ? max(lid,(uint)(1)) - 1 : min(lid+1,n-1);\n" << k.decl<uint_>("evensibling_idx") << " = " << "(lid_is_even) ? min(lid+1,n-1) : max(lid,(uint)(1)) - 1;\n" << // wait for keys and vals to be stored in local memory "barrier(CLK_LOCAL_MEM_FENCE);\n" << "#pragma unroll\n" << "for(" << k.decl<uint_>("i") << " = 0; " << "i < n; " << "i++" << ") {\n" << k.decl<uint_>("sibling_idx") << " = i%2 == 0 ? evensibling_idx : oddsibling_idx;\n" << k.decl<key_type>("sibling_key") << " = lkeys[sibling_idx];\n" << k.decl<bool>("compare") << " = " << compare(k.var<key_type>("sibling_key"), k.var<key_type>("my_key")) << ";\n" << k.decl<bool>("equal") << " = !(compare || " << compare(k.var<key_type>("my_key"), k.var<key_type>("sibling_key")) << ");\n" << k.decl<bool>("swap") << " = compare ^ (sibling_idx < lid);\n" << "swap = equal ? false : swap;\n" << "my_key = swap ? sibling_key : my_key;\n"; if(sort_by_key) { k << "my_index = swap ? lidx[sibling_idx] : my_index;\n"; } k << "barrier(CLK_LOCAL_MEM_FENCE);\n" << "lkeys[lid] = my_key;\n"; if(sort_by_key) { k << "lidx[lid] = my_index;\n"; } k << "barrier(CLK_LOCAL_MEM_FENCE);\n" "}\n" << // for "}\n"; // else // end of odd-even sort // save key and value k << "if(gid < count) {\n" << keys_first[k.var<const uint_>("gid")] << " = " << k.var<key_type>("my_key") << ";\n"; if(sort_by_key) { k << k.decl<value_type>("my_value") << " = " << values_first[k.var<const uint_>("offset + my_index")] << ";\n" << "barrier(CLK_GLOBAL_MEM_FENCE);\n" << values_first[k.var<const uint_>("gid")] << " = my_value;\n"; } k << // end if "}\n"; const context &context = queue.get_context(); const device &device = queue.get_device(); ::boost::compute::kernel kernel = k.compile(context); const size_t work_group_size = pick_bitonic_block_sort_block_size<key_type, uchar_>( kernel.get_work_group_info<size_t>( device, CL_KERNEL_WORK_GROUP_SIZE ), device.get_info<size_t>(CL_DEVICE_LOCAL_MEM_SIZE), sort_by_key ); const size_t global_size = work_group_size * static_cast<size_t>( std::ceil(float(count) / work_group_size) ); kernel.set_arg(count_arg, static_cast<uint_>(count)); kernel.set_arg(local_keys_arg, local_buffer<key_type>(work_group_size)); if(sort_by_key) { kernel.set_arg(local_vals_arg, local_buffer<uchar_>(work_group_size)); } queue.enqueue_1d_range_kernel(kernel, 0, global_size, work_group_size); // return size of the block return work_group_size; } template<class KeyIterator, class ValueIterator, class Compare> inline size_t block_sort(KeyIterator keys_first, ValueIterator values_first, Compare compare, const size_t count, const bool sort_by_key, const bool stable, command_queue &queue) { if(stable) { // TODO: Implement stable block sort (stable odd-even merge sort) return size_t(1); } return bitonic_block_sort( keys_first, values_first, compare, count, sort_by_key, queue ); } /// space: O(n + m); n - number of keys, m - number of values template<class KeyIterator, class ValueIterator, class Compare> inline void merge_blocks_on_gpu(KeyIterator keys_first, ValueIterator values_first, KeyIterator out_keys_first, ValueIterator out_values_first, Compare compare, const size_t count, const size_t block_size, const bool sort_by_key, command_queue &queue) { typedef typename std::iterator_traits<KeyIterator>::value_type key_type; typedef typename std::iterator_traits<ValueIterator>::value_type value_type; meta_kernel k("merge_blocks"); size_t count_arg = k.add_arg<const uint_>("count"); size_t block_size_arg = k.add_arg<const uint_>("block_size"); k << // get global id k.decl<const uint_>("gid") << " = get_global_id(0);\n" << "if(gid >= count) {\n" << "return;\n" << "}\n" << k.decl<const key_type>("my_key") << " = " << keys_first[k.var<const uint_>("gid")] << ";\n"; if(sort_by_key) { k << k.decl<const value_type>("my_value") << " = " << values_first[k.var<const uint_>("gid")] << ";\n"; } k << // get my block idx k.decl<const uint_>("my_block_idx") << " = gid / block_size;\n" << k.decl<const bool>("my_block_idx_is_odd") << " = " << "my_block_idx & 0x1;\n" << k.decl<const uint_>("other_block_idx") << " = " << // if(my_block_idx is odd) {} else {} "my_block_idx_is_odd ? my_block_idx - 1 : my_block_idx + 1;\n" << // get ranges of my block and the other block // [my_block_start; my_block_end) // [other_block_start; other_block_end) k.decl<const uint_>("my_block_start") << " = " << "min(my_block_idx * block_size, count);\n" << // including k.decl<const uint_>("my_block_end") << " = " << "min((my_block_idx + 1) * block_size, count);\n" << // excluding k.decl<const uint_>("other_block_start") << " = " << "min(other_block_idx * block_size, count);\n" << // including k.decl<const uint_>("other_block_end") << " = " << "min((other_block_idx + 1) * block_size, count);\n" << // excluding // other block is empty, nothing to merge here "if(other_block_start == count){\n" << out_keys_first[k.var<uint_>("gid")] << " = my_key;\n"; if(sort_by_key) { k << out_values_first[k.var<uint_>("gid")] << " = my_value;\n"; } k << "return;\n" << "}\n" << // lower bound // left_idx - lower bound k.decl<uint_>("left_idx") << " = other_block_start;\n" << k.decl<uint_>("right_idx") << " = other_block_end;\n" << "while(left_idx < right_idx) {\n" << k.decl<uint_>("mid_idx") << " = (left_idx + right_idx) / 2;\n" << k.decl<key_type>("mid_key") << " = " << keys_first[k.var<const uint_>("mid_idx")] << ";\n" << k.decl<bool>("smaller") << " = " << compare(k.var<key_type>("mid_key"), k.var<key_type>("my_key")) << ";\n" << "left_idx = smaller ? mid_idx + 1 : left_idx;\n" << "right_idx = smaller ? right_idx : mid_idx;\n" << "}\n" << // left_idx is found position in other block // if my_block is odd we need to get the upper bound "right_idx = other_block_end;\n" << "if(my_block_idx_is_odd && left_idx != right_idx) {\n" << k.decl<key_type>("upper_key") << " = " << keys_first[k.var<const uint_>("left_idx")] << ";\n" << "while(" << "!(" << compare(k.var<key_type>("upper_key"), k.var<key_type>("my_key")) << ") && " << "!(" << compare(k.var<key_type>("my_key"), k.var<key_type>("upper_key")) << ") && " << "left_idx < right_idx" << ")" << "{\n" << k.decl<uint_>("mid_idx") << " = (left_idx + right_idx) / 2;\n" << k.decl<key_type>("mid_key") << " = " << keys_first[k.var<const uint_>("mid_idx")] << ";\n" << k.decl<bool>("equal") << " = " << "!(" << compare(k.var<key_type>("mid_key"), k.var<key_type>("my_key")) << ") && " << "!(" << compare(k.var<key_type>("my_key"), k.var<key_type>("mid_key")) << ");\n" << "left_idx = equal ? mid_idx + 1 : left_idx + 1;\n" << "right_idx = equal ? right_idx : mid_idx;\n" << "upper_key = " << keys_first[k.var<const uint_>("left_idx")] << ";\n" << "}\n" << "}\n" << k.decl<uint_>("offset") << " = 0;\n" << "offset += gid - my_block_start;\n" << "offset += left_idx - other_block_start;\n" << "offset += min(my_block_start, other_block_start);\n" << out_keys_first[k.var<uint_>("offset")] << " = my_key;\n"; if(sort_by_key) { k << out_values_first[k.var<uint_>("offset")] << " = my_value;\n"; } const context &context = queue.get_context(); ::boost::compute::kernel kernel = k.compile(context); const size_t work_group_size = (std::min)( size_t(256), kernel.get_work_group_info<size_t>( queue.get_device(), CL_KERNEL_WORK_GROUP_SIZE ) ); const size_t global_size = work_group_size * static_cast<size_t>( std::ceil(float(count) / work_group_size) ); kernel.set_arg(count_arg, static_cast<uint_>(count)); kernel.set_arg(block_size_arg, static_cast<uint_>(block_size)); queue.enqueue_1d_range_kernel(kernel, 0, global_size, work_group_size); } template<class KeyIterator, class ValueIterator, class Compare> inline void merge_sort_by_key_on_gpu(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, bool stable, command_queue &queue) { typedef typename std::iterator_traits<KeyIterator>::value_type key_type; typedef typename std::iterator_traits<ValueIterator>::value_type value_type; size_t count = iterator_range_size(keys_first, keys_last); if(count < 2){ return; } size_t block_size = block_sort( keys_first, values_first, compare, count, true /* sort_by_key */, stable /* stable */, queue ); // for small input size only block sort is performed if(count <= block_size) { return; } const context &context = queue.get_context(); bool result_in_temporary_buffer = false; ::boost::compute::vector<key_type> temp_keys(count, context); ::boost::compute::vector<value_type> temp_values(count, context); for(; block_size < count; block_size *= 2) { result_in_temporary_buffer = !result_in_temporary_buffer; if(result_in_temporary_buffer) { merge_blocks_on_gpu(keys_first, values_first, temp_keys.begin(), temp_values.begin(), compare, count, block_size, true /* sort_by_key */, queue); } else { merge_blocks_on_gpu(temp_keys.begin(), temp_values.begin(), keys_first, values_first, compare, count, block_size, true /* sort_by_key */, queue); } } if(result_in_temporary_buffer) { copy_async(temp_keys.begin(), temp_keys.end(), keys_first, queue); copy_async(temp_values.begin(), temp_values.end(), values_first, queue); } } template<class Iterator, class Compare> inline void merge_sort_on_gpu(Iterator first, Iterator last, Compare compare, bool stable, command_queue &queue) { typedef typename std::iterator_traits<Iterator>::value_type key_type; size_t count = iterator_range_size(first, last); if(count < 2){ return; } Iterator dummy; size_t block_size = block_sort( first, dummy, compare, count, false /* sort_by_key */, stable /* stable */, queue ); // for small input size only block sort is performed if(count <= block_size) { return; } const context &context = queue.get_context(); bool result_in_temporary_buffer = false; ::boost::compute::vector<key_type> temp_keys(count, context); for(; block_size < count; block_size *= 2) { result_in_temporary_buffer = !result_in_temporary_buffer; if(result_in_temporary_buffer) { merge_blocks_on_gpu(first, dummy, temp_keys.begin(), dummy, compare, count, block_size, false /* sort_by_key */, queue); } else { merge_blocks_on_gpu(temp_keys.begin(), dummy, first, dummy, compare, count, block_size, false /* sort_by_key */, queue); } } if(result_in_temporary_buffer) { copy_async(temp_keys.begin(), temp_keys.end(), first, queue); } } template<class KeyIterator, class ValueIterator, class Compare> inline void merge_sort_by_key_on_gpu(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, command_queue &queue) { merge_sort_by_key_on_gpu( keys_first, keys_last, values_first, compare, false /* not stable */, queue ); } template<class Iterator, class Compare> inline void merge_sort_on_gpu(Iterator first, Iterator last, Compare compare, command_queue &queue) { merge_sort_on_gpu( first, last, compare, false /* not stable */, queue ); } } // end detail namespace } // end compute namespace } // end boost namespace #endif /* BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_GPU_HPP_ */ algorithm/detail/serial_reduce.hpp 0000644 00000004141 15125510617 0013316 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_HPP #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/type_traits/result_of.hpp> namespace boost { namespace compute { namespace detail { // Space complexity: O(1) template<class InputIterator, class OutputIterator, class BinaryFunction> inline void serial_reduce(InputIterator first, InputIterator last, OutputIterator result, BinaryFunction function, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type T; typedef typename ::boost::compute::result_of<BinaryFunction(T, T)>::type result_type; const context &context = queue.get_context(); size_t count = detail::iterator_range_size(first, last); if(count == 0){ return; } meta_kernel k("serial_reduce"); size_t count_arg = k.add_arg<cl_uint>("count"); k << k.decl<result_type>("result") << " = " << first[0] << ";\n" << "for(uint i = 1; i < count; i++)\n" << " result = " << function(k.var<T>("result"), first[k.var<uint_>("i")]) << ";\n" << result[0] << " = result;\n"; kernel kernel = k.compile(context); kernel.set_arg(count_arg, static_cast<uint_>(count)); queue.enqueue_task(kernel); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_HPP algorithm/detail/inplace_reduce.hpp 0000644 00000011347 15125510617 0013460 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_INPLACE_REDUCE_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_INPLACE_REDUCE_HPP #include <iterator> #include <boost/utility/result_of.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/memory/local_buffer.hpp> namespace boost { namespace compute { namespace detail { template<class Iterator, class BinaryFunction> inline void inplace_reduce(Iterator first, Iterator last, BinaryFunction function, command_queue &queue) { typedef typename std::iterator_traits<Iterator>::value_type value_type; size_t input_size = iterator_range_size(first, last); if(input_size < 2){ return; } const context &context = queue.get_context(); size_t block_size = 64; size_t values_per_thread = 8; size_t block_count = input_size / (block_size * values_per_thread); if(block_count * block_size * values_per_thread != input_size) block_count++; vector<value_type> output(block_count, context); meta_kernel k("inplace_reduce"); size_t input_arg = k.add_arg<value_type *>(memory_object::global_memory, "input"); size_t input_size_arg = k.add_arg<const uint_>("input_size"); size_t output_arg = k.add_arg<value_type *>(memory_object::global_memory, "output"); size_t scratch_arg = k.add_arg<value_type *>(memory_object::local_memory, "scratch"); k << "const uint gid = get_global_id(0);\n" << "const uint lid = get_local_id(0);\n" << "const uint values_per_thread =\n" << uint_(values_per_thread) << ";\n" << // thread reduce "const uint index = gid * values_per_thread;\n" << "if(index < input_size){\n" << k.decl<value_type>("sum") << " = input[index];\n" << "for(uint i = 1;\n" << "i < values_per_thread && (index + i) < input_size;\n" << "i++){\n" << " sum = " << function(k.var<value_type>("sum"), k.var<value_type>("input[index+i]")) << ";\n" << "}\n" << "scratch[lid] = sum;\n" << "}\n" << // local reduce "for(uint i = 1; i < get_local_size(0); i <<= 1){\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << " uint mask = (i << 1) - 1;\n" << " uint next_index = (gid + i) * values_per_thread;\n" " if((lid & mask) == 0 && next_index < input_size){\n" << " scratch[lid] = " << function(k.var<value_type>("scratch[lid]"), k.var<value_type>("scratch[lid+i]")) << ";\n" << " }\n" << "}\n" << // write output for block "if(lid == 0){\n" << " output[get_group_id(0)] = scratch[0];\n" << "}\n" ; const buffer *input_buffer = &first.get_buffer(); const buffer *output_buffer = &output.get_buffer(); kernel kernel = k.compile(context); while(input_size > 1){ kernel.set_arg(input_arg, *input_buffer); kernel.set_arg(input_size_arg, static_cast<uint_>(input_size)); kernel.set_arg(output_arg, *output_buffer); kernel.set_arg(scratch_arg, local_buffer<value_type>(block_size)); queue.enqueue_1d_range_kernel(kernel, 0, block_count * block_size, block_size); input_size = static_cast<size_t>( std::ceil(float(input_size) / (block_size * values_per_thread) ) ); block_count = input_size / (block_size * values_per_thread); if(block_count * block_size * values_per_thread != input_size) block_count++; std::swap(input_buffer, output_buffer); } if(input_buffer != &first.get_buffer()){ ::boost::compute::copy(output.begin(), output.begin() + 1, first, queue); } } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_INPLACE_REDUCE_HPP algorithm/detail/reduce_by_key.hpp 0000644 00000012173 15125510617 0013325 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_HPP #include <algorithm> #include <iterator> #include <boost/compute/command_queue.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/algorithm/detail/serial_reduce_by_key.hpp> #include <boost/compute/algorithm/detail/reduce_by_key_with_scan.hpp> #include <boost/compute/type_traits.hpp> namespace boost { namespace compute { namespace detail { template<class InputKeyIterator, class InputValueIterator, class OutputKeyIterator, class OutputValueIterator, class BinaryFunction, class BinaryPredicate> size_t reduce_by_key_on_gpu(InputKeyIterator keys_first, InputKeyIterator keys_last, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, BinaryFunction function, BinaryPredicate predicate, command_queue &queue) { return detail::reduce_by_key_with_scan(keys_first, keys_last, values_first, keys_result, values_result, function, predicate, queue); } template<class InputKeyIterator, class InputValueIterator, class OutputKeyIterator, class OutputValueIterator> bool reduce_by_key_on_gpu_requirements_met(InputKeyIterator keys_first, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, const size_t count, command_queue &queue) { const device &device = queue.get_device(); return (count > 256) && !(device.type() & device::cpu) && reduce_by_key_with_scan_requirements_met(keys_first, values_first, keys_result,values_result, count, queue); return true; } template<class InputKeyIterator, class InputValueIterator, class OutputKeyIterator, class OutputValueIterator, class BinaryFunction, class BinaryPredicate> inline std::pair<OutputKeyIterator, OutputValueIterator> dispatch_reduce_by_key(InputKeyIterator keys_first, InputKeyIterator keys_last, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, BinaryFunction function, BinaryPredicate predicate, command_queue &queue) { typedef typename std::iterator_traits<OutputKeyIterator>::difference_type key_difference_type; typedef typename std::iterator_traits<OutputValueIterator>::difference_type value_difference_type; const size_t count = detail::iterator_range_size(keys_first, keys_last); if (count < 2) { boost::compute::copy_n(keys_first, count, keys_result, queue); boost::compute::copy_n(values_first, count, values_result, queue); return std::make_pair<OutputKeyIterator, OutputValueIterator>( keys_result + static_cast<key_difference_type>(count), values_result + static_cast<value_difference_type>(count) ); } size_t result_size = 0; if(reduce_by_key_on_gpu_requirements_met(keys_first, values_first, keys_result, values_result, count, queue)){ result_size = detail::reduce_by_key_on_gpu(keys_first, keys_last, values_first, keys_result, values_result, function, predicate, queue); } else { result_size = detail::serial_reduce_by_key(keys_first, keys_last, values_first, keys_result, values_result, function, predicate, queue); } return std::make_pair<OutputKeyIterator, OutputValueIterator>( keys_result + static_cast<key_difference_type>(result_size), values_result + static_cast<value_difference_type>(result_size) ); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_HPP algorithm/gather.hpp 0000644 00000005354 15125510617 0010527 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_GATHER_HPP #define BOOST_COMPUTE_ALGORITHM_GATHER_HPP #include <boost/static_assert.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/exception.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/system.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class MapIterator, class OutputIterator> class gather_kernel : public meta_kernel { public: gather_kernel() : meta_kernel("gather") {} void set_range(MapIterator first, MapIterator last, InputIterator input, OutputIterator result) { m_count = iterator_range_size(first, last); *this << "const uint i = get_global_id(0);\n" << result[expr<uint_>("i")] << "=" << input[first[expr<uint_>("i")]] << ";\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; } // end detail namespace /// Copies the elements using the indices from the range [\p first, \p last) /// to the range beginning at \p result using the input values from the range /// beginning at \p input. /// /// Space complexity: \Omega(1) /// /// \see scatter() template<class InputIterator, class MapIterator, class OutputIterator> inline void gather(MapIterator first, MapIterator last, InputIterator input, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<MapIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); detail::gather_kernel<InputIterator, MapIterator, OutputIterator> kernel; kernel.set_range(first, last, input, result); kernel.exec(queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_GATHER_HPP algorithm/reverse.hpp 0000644 00000004606 15125510617 0010727 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REVERSE_HPP #define BOOST_COMPUTE_ALGORITHM_REVERSE_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class Iterator> struct reverse_kernel : public meta_kernel { reverse_kernel(Iterator first, Iterator last) : meta_kernel("reverse") { typedef typename std::iterator_traits<Iterator>::value_type value_type; // store size of the range m_size = detail::iterator_range_size(first, last); add_set_arg<const cl_uint>("size", static_cast<const cl_uint>(m_size)); *this << decl<cl_uint>("i") << " = get_global_id(0);\n" << decl<cl_uint>("j") << " = size - get_global_id(0) - 1;\n" << decl<value_type>("tmp") << "=" << first[var<cl_uint>("i")] << ";\n" << first[var<cl_uint>("i")] << "=" << first[var<cl_uint>("j")] << ";\n" << first[var<cl_uint>("j")] << "= tmp;\n"; } void exec(command_queue &queue) { exec_1d(queue, 0, m_size / 2); } size_t m_size; }; } // end detail namespace /// Reverses the elements in the range [\p first, \p last). /// /// Space complexity: \Omega(1) /// /// \see reverse_copy() template<class Iterator> inline void reverse(Iterator first, Iterator last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<Iterator>::value); size_t count = detail::iterator_range_size(first, last); if(count < 2){ return; } detail::reverse_kernel<Iterator> kernel(first, last); kernel.exec(queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REVERSE_HPP algorithm/random_shuffle.hpp 0000644 00000005702 15125510617 0012246 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_RANDOM_SHUFFLE_HPP #define BOOST_COMPUTE_ALGORITHM_RANDOM_SHUFFLE_HPP #include <vector> #include <algorithm> #ifdef BOOST_COMPUTE_USE_CPP11 #include <random> #endif #include <boost/static_assert.hpp> #include <boost/range/algorithm_ext/iota.hpp> #include <boost/compute/system.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/algorithm/scatter.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Randomly shuffles the elements in the range [\p first, \p last). /// /// Space complexity: \Omega(2n) /// /// \see scatter() template<class Iterator> inline void random_shuffle(Iterator first, Iterator last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<Iterator>::value); typedef typename std::iterator_traits<Iterator>::value_type value_type; size_t count = detail::iterator_range_size(first, last); if(count == 0){ return; } // generate shuffled indices on the host std::vector<cl_uint> random_indices(count); boost::iota(random_indices, 0); #ifdef BOOST_COMPUTE_USE_CPP11 std::random_device nondeterministic_randomness; std::default_random_engine random_engine(nondeterministic_randomness()); std::shuffle(random_indices.begin(), random_indices.end(), random_engine); #else std::random_shuffle(random_indices.begin(), random_indices.end()); #endif // copy random indices to the device const context &context = queue.get_context(); vector<cl_uint> indices(count, context); ::boost::compute::copy(random_indices.begin(), random_indices.end(), indices.begin(), queue); // make a copy of the values on the device vector<value_type> tmp(count, context); ::boost::compute::copy(first, last, tmp.begin(), queue); // write values to their new locations ::boost::compute::scatter(tmp.begin(), tmp.end(), indices.begin(), first, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_RANDOM_SHUFFLE_HPP algorithm/max_element.hpp 0000644 00000005335 15125510617 0011552 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_MAX_ELEMENT_HPP #define BOOST_COMPUTE_ALGORITHM_MAX_ELEMENT_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/algorithm/detail/find_extrema.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns an iterator pointing to the element in the range /// [\p first, \p last) with the maximum value. /// /// \param first first element in the input range /// \param last last element in the input range /// \param compare comparison function object which returns true if the first /// argument is less than (i.e. is ordered before) the second. /// \param queue command queue to perform the operation /// /// For example, to find \c int2 value with maximum first component in given vector: /// \code /// // comparison function object /// BOOST_COMPUTE_FUNCTION(bool, compare_first, (const int2_ &a, const int2_ &b), /// { /// return a.x < b.x; /// }); /// /// // create vector /// boost::compute::vector<uint2_> data = ... /// /// boost::compute::vector<uint2_>::iterator max = /// boost::compute::max_element(data.begin(), data.end(), compare_first, queue); /// \endcode /// /// Space complexity on CPUs: \Omega(1)<br> /// Space complexity on GPUs: \Omega(N) /// /// \see min_element() template<class InputIterator, class Compare> inline InputIterator max_element(InputIterator first, InputIterator last, Compare compare, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); return detail::find_extrema(first, last, compare, false, queue); } ///\overload template<class InputIterator> inline InputIterator max_element(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type value_type; return ::boost::compute::max_element( first, last, ::boost::compute::less<value_type>(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_MAX_ELEMENT_HPP algorithm/min_element.hpp 0000644 00000005334 15125510617 0011547 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_MIN_ELEMENT_HPP #define BOOST_COMPUTE_ALGORITHM_MIN_ELEMENT_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/algorithm/detail/find_extrema.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns an iterator pointing to the element in range /// [\p first, \p last) with the minimum value. /// /// \param first first element in the input range /// \param last last element in the input range /// \param compare comparison function object which returns true if the first /// argument is less than (i.e. is ordered before) the second. /// \param queue command queue to perform the operation /// /// For example, to find \c int2 value with minimum first component in given vector: /// \code /// // comparison function object /// BOOST_COMPUTE_FUNCTION(bool, compare_first, (const int2_ &a, const int2_ &b), /// { /// return a.x < b.x; /// }); /// /// // create vector /// boost::compute::vector<uint2_> data = ... /// /// boost::compute::vector<uint2_>::iterator min = /// boost::compute::min_element(data.begin(), data.end(), compare_first, queue); /// \endcode /// /// Space complexity on CPUs: \Omega(1)<br> /// Space complexity on GPUs: \Omega(N) /// /// \see max_element() template<class InputIterator, class Compare> inline InputIterator min_element(InputIterator first, InputIterator last, Compare compare, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); return detail::find_extrema(first, last, compare, true, queue); } ///\overload template<class InputIterator> inline InputIterator min_element(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type value_type; return ::boost::compute::min_element( first, last, ::boost::compute::less<value_type>(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_MIN_ELEMENT_HPP algorithm/nth_element.hpp 0000644 00000005464 15125510617 0011561 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_NTH_ELEMENT_HPP #define BOOST_COMPUTE_ALGORITHM_NTH_ELEMENT_HPP #include <boost/static_assert.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/fill_n.hpp> #include <boost/compute/algorithm/find.hpp> #include <boost/compute/algorithm/partition.hpp> #include <boost/compute/algorithm/sort.hpp> #include <boost/compute/functional/bind.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Rearranges the elements in the range [\p first, \p last) such that /// the \p nth element would be in that position in a sorted sequence. /// /// Space complexity: \Omega(3n) template<class Iterator, class Compare> inline void nth_element(Iterator first, Iterator nth, Iterator last, Compare compare, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<Iterator>::value); if(nth == last) return; typedef typename std::iterator_traits<Iterator>::value_type value_type; while(1) { value_type value = nth.read(queue); using boost::compute::placeholders::_1; Iterator new_nth = partition( first, last, ::boost::compute::bind(compare, _1, value), queue ); Iterator old_nth = find(new_nth, last, value, queue); value_type new_value = new_nth.read(queue); fill_n(new_nth, 1, value, queue); fill_n(old_nth, 1, new_value, queue); new_value = nth.read(queue); if(value == new_value) break; if(std::distance(first, nth) < std::distance(first, new_nth)) { last = new_nth; } else { first = new_nth; } } } /// \overload template<class Iterator> inline void nth_element(Iterator first, Iterator nth, Iterator last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<Iterator>::value); if(nth == last) return; typedef typename std::iterator_traits<Iterator>::value_type value_type; less<value_type> less_than; return nth_element(first, nth, last, less_than, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_NTH_ELEMENT_HPP algorithm/copy_n.hpp 0000644 00000003624 15125510617 0010542 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_COPY_N_HPP #define BOOST_COMPUTE_ALGORITHM_COPY_N_HPP #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy.hpp> namespace boost { namespace compute { /// Copies \p count elements from \p first to \p result. /// /// For example, to copy four values from the host to the device: /// \code /// // values on the host and vector on the device /// float values[4] = { 1.f, 2.f, 3.f, 4.f }; /// boost::compute::vector<float> vec(4, context); /// /// // copy from the host to the device /// boost::compute::copy_n(values, 4, vec.begin(), queue); /// \endcode /// /// Space complexity: \Omega(1) /// /// \see copy() template<class InputIterator, class Size, class OutputIterator> inline OutputIterator copy_n(InputIterator first, Size count, OutputIterator result, command_queue &queue = system::default_queue(), const wait_list &events = wait_list()) { typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; return ::boost::compute::copy(first, first + static_cast<difference_type>(count), result, queue, events); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_COPY_N_HPP algorithm/next_permutation.hpp 0000644 00000013107 15125510617 0012655 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_NEXT_PERMUTATION_HPP #define BOOST_COMPUTE_ALGORITHM_NEXT_PERMUTATION_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/container/detail/scalar.hpp> #include <boost/compute/algorithm/reverse.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { /// /// \brief Helper function for next_permutation /// /// To find rightmost element which is smaller /// than its next element /// template<class InputIterator> inline InputIterator next_permutation_helper(InputIterator first, InputIterator last, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; size_t count = detail::iterator_range_size(first, last); if(count == 0 || count == 1){ return last; } count = count - 1; const context &context = queue.get_context(); detail::meta_kernel k("next_permutation"); size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index"); atomic_max<int_> atomic_max_int; k << k.decl<const int_>("i") << " = get_global_id(0);\n" << k.decl<const value_type>("cur_value") << "=" << first[k.var<const int_>("i")] << ";\n" << k.decl<const value_type>("next_value") << "=" << first[k.expr<const int_>("i+1")] << ";\n" << "if(cur_value < next_value){\n" << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n" << "}\n"; kernel kernel = k.compile(context); scalar<int_> index(context); kernel.set_arg(index_arg, index.get_buffer()); index.write(static_cast<int_>(-1), queue); queue.enqueue_1d_range_kernel(kernel, 0, count, 0); int result = static_cast<int>(index.read(queue)); if(result == -1) return last; else return first + result; } /// /// \brief Helper function for next_permutation /// /// To find the smallest element to the right of the element found above /// that is greater than it /// template<class InputIterator, class ValueType> inline InputIterator np_ceiling(InputIterator first, InputIterator last, ValueType value, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; size_t count = detail::iterator_range_size(first, last); if(count == 0){ return last; } const context &context = queue.get_context(); detail::meta_kernel k("np_ceiling"); size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index"); size_t value_arg = k.add_arg<value_type>(memory_object::private_memory, "value"); atomic_max<int_> atomic_max_int; k << k.decl<const int_>("i") << " = get_global_id(0);\n" << k.decl<const value_type>("cur_value") << "=" << first[k.var<const int_>("i")] << ";\n" << "if(cur_value <= " << first[k.expr<int_>("*index")] << " && cur_value > value){\n" << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n" << "}\n"; kernel kernel = k.compile(context); scalar<int_> index(context); kernel.set_arg(index_arg, index.get_buffer()); index.write(static_cast<int_>(0), queue); kernel.set_arg(value_arg, value); queue.enqueue_1d_range_kernel(kernel, 0, count, 0); int result = static_cast<int>(index.read(queue)); return first + result; } } // end detail namespace /// /// \brief Permutation generating algorithm /// /// Transforms the range [first, last) into the next permutation from the /// set of all permutations arranged in lexicographic order /// \return Boolean value signifying if the last permutation was crossed /// and the range was reset /// /// \param first Iterator pointing to start of range /// \param last Iterator pointing to end of range /// \param queue Queue on which to execute /// /// Space complexity: \Omega(1) template<class InputIterator> inline bool next_permutation(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type value_type; if(first == last) return false; InputIterator first_element = detail::next_permutation_helper(first, last, queue); if(first_element == last) { reverse(first, last, queue); return false; } value_type first_value = first_element.read(queue); InputIterator ceiling_element = detail::np_ceiling(first_element + 1, last, first_value, queue); value_type ceiling_value = ceiling_element.read(queue); first_element.write(ceiling_value, queue); ceiling_element.write(first_value, queue); reverse(first_element + 1, last, queue); return true; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_NEXT_PERMUTATION_HPP algorithm/for_each.hpp 0000644 00000004123 15125510617 0011014 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FOR_EACH_HPP #define BOOST_COMPUTE_ALGORITHM_FOR_EACH_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class Function> struct for_each_kernel : public meta_kernel { for_each_kernel(InputIterator first, InputIterator last, Function function) : meta_kernel("for_each") { // store range size m_count = detail::iterator_range_size(first, last); // setup kernel source *this << function(first[get_global_id(0)]) << ";\n"; } void exec(command_queue &queue) { exec_1d(queue, 0, m_count); } size_t m_count; }; } // end detail namespace /// Calls \p function on each element in the range [\p first, \p last). /// /// Space complexity: \Omega(1) /// /// \see transform() template<class InputIterator, class UnaryFunction> inline UnaryFunction for_each(InputIterator first, InputIterator last, UnaryFunction function, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); detail::for_each_kernel<InputIterator, UnaryFunction> kernel(first, last, function); kernel.exec(queue); return function; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FOR_EACH_HPP algorithm/is_partitioned.hpp 0000644 00000003371 15125510617 0012267 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_IS_PARTITIONED_HPP #define BOOST_COMPUTE_ALGORITHM_IS_PARTITIONED_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/find_if.hpp> #include <boost/compute/algorithm/find_if_not.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns \c true if the values in the range [\p first, \p last) /// are partitioned according to \p predicate. /// /// Space complexity: \Omega(1) template<class InputIterator, class UnaryPredicate> inline bool is_partitioned(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); return ::boost::compute::find_if( ::boost::compute::find_if_not(first, last, predicate, queue), last, predicate, queue) == last; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_PARTITION_HPP algorithm/count.hpp 0000644 00000004067 15125510617 0010405 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_COUNT_HPP #define BOOST_COMPUTE_ALGORITHM_COUNT_HPP #include <boost/static_assert.hpp> #include <boost/compute/lambda.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/count_if.hpp> #include <boost/compute/type_traits/vector_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns the number of occurrences of \p value in the range /// [\p first, \p last). /// /// Space complexity on CPUs: \Omega(1)<br> /// Space complexity on GPUs: \Omega(n) /// /// \see count_if() template<class InputIterator, class T> inline size_t count(InputIterator first, InputIterator last, const T &value, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type value_type; using ::boost::compute::_1; using ::boost::compute::lambda::all; if(vector_size<value_type>::value == 1){ return ::boost::compute::count_if(first, last, _1 == value, queue); } else { return ::boost::compute::count_if(first, last, all(_1 == value), queue); } } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_COUNT_HPP algorithm/equal.hpp 0000644 00000004241 15125510617 0010356 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_EQUAL_HPP #define BOOST_COMPUTE_ALGORITHM_EQUAL_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/mismatch.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns \c true if the range [\p first1, \p last1) and the range /// beginning at \p first2 are equal. /// /// Space complexity: \Omega(1) template<class InputIterator1, class InputIterator2> inline bool equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); return ::boost::compute::mismatch(first1, last1, first2, queue).first == last1; } /// \overload template<class InputIterator1, class InputIterator2> inline bool equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); if(std::distance(first1, last1) != std::distance(first2, last2)){ return false; } return ::boost::compute::equal(first1, last1, first2, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_EQUAL_HPP algorithm/transform_if.hpp 0000644 00000011201 15125510617 0011732 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_TRANSFORM_IF_HPP #define BOOST_COMPUTE_ALGORITHM_TRANSFORM_IF_HPP #include <boost/static_assert.hpp> #include <boost/compute/cl.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/count.hpp> #include <boost/compute/algorithm/count_if.hpp> #include <boost/compute/algorithm/exclusive_scan.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/iterator/discard_iterator.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { // Space complexity: O(2n) template<class InputIterator, class OutputIterator, class UnaryFunction, class Predicate> inline OutputIterator transform_if_impl(InputIterator first, InputIterator last, OutputIterator result, UnaryFunction function, Predicate predicate, bool copyIndex, command_queue &queue) { typedef typename std::iterator_traits<OutputIterator>::difference_type difference_type; size_t count = detail::iterator_range_size(first, last); if(count == 0){ return result; } const context &context = queue.get_context(); // storage for destination indices ::boost::compute::vector<cl_uint> indices(count, context); // write counts ::boost::compute::detail::meta_kernel k1("transform_if_write_counts"); k1 << indices.begin()[k1.get_global_id(0)] << " = " << predicate(first[k1.get_global_id(0)]) << " ? 1 : 0;\n"; k1.exec_1d(queue, 0, count); // scan indices size_t copied_element_count = (indices.cend() - 1).read(queue); ::boost::compute::exclusive_scan( indices.begin(), indices.end(), indices.begin(), queue ); copied_element_count += (indices.cend() - 1).read(queue); // last scan element plus last mask element // copy values ::boost::compute::detail::meta_kernel k2("transform_if_do_copy"); k2 << "if(" << predicate(first[k2.get_global_id(0)]) << ")" << " " << result[indices.begin()[k2.get_global_id(0)]] << "="; if(copyIndex){ k2 << k2.get_global_id(0) << ";\n"; } else { k2 << function(first[k2.get_global_id(0)]) << ";\n"; } k2.exec_1d(queue, 0, count); return result + static_cast<difference_type>(copied_element_count); } template<class InputIterator, class UnaryFunction, class Predicate> inline discard_iterator transform_if_impl(InputIterator first, InputIterator last, discard_iterator result, UnaryFunction function, Predicate predicate, bool copyIndex, command_queue &queue) { (void) function; (void) copyIndex; return result + count_if(first, last, predicate, queue); } } // end detail namespace /// Copies each element in the range [\p first, \p last) for which /// \p predicate returns \c true to the range beginning at \p result. /// /// Space complexity: O(2n) template<class InputIterator, class OutputIterator, class UnaryFunction, class Predicate> inline OutputIterator transform_if(InputIterator first, InputIterator last, OutputIterator result, UnaryFunction function, Predicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); return detail::transform_if_impl( first, last, result, function, predicate, false, queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_TRANSFORM_IF_HPP algorithm/none_of.hpp 0000644 00000002622 15125510617 0010673 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_NONE_OF_HPP #define BOOST_COMPUTE_ALGORITHM_NONE_OF_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/algorithm/find_if.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Returns \c true if \p predicate returns \c true for none of the elements in /// the range [\p first, \p last). /// /// Space complexity: \Omega(1) /// /// \see all_of(), any_of() template<class InputIterator, class UnaryPredicate> inline bool none_of(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); return ::boost::compute::find_if(first, last, predicate, queue) == last; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_NONE_OF_HPP algorithm/unique.hpp 0000644 00000005005 15125510617 0010554 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_UNIQUE_HPP #define BOOST_COMPUTE_ALGORITHM_UNIQUE_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/unique_copy.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/functional/operator.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Removes all consecutive duplicate elements (determined by \p op) from the /// range [first, last). If \p op is not provided, the equality operator is /// used. /// /// \param first first element in the input range /// \param last last element in the input range /// \param op binary operator used to check for uniqueness /// \param queue command queue to perform the operation /// /// \return \c InputIterator to the new logical end of the range /// /// Space complexity: \Omega(4n) /// /// \see unique_copy() template<class InputIterator, class BinaryPredicate> inline InputIterator unique(InputIterator first, InputIterator last, BinaryPredicate op, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type value_type; vector<value_type> temp(first, last, queue); return ::boost::compute::unique_copy( temp.begin(), temp.end(), first, op, queue ); } /// \overload template<class InputIterator> inline InputIterator unique(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type value_type; return ::boost::compute::unique( first, last, ::boost::compute::equal_to<value_type>(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_UNIQUE_HPP algorithm/merge.hpp 0000644 00000011655 15125510617 0010355 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_MERGE_HPP #define BOOST_COMPUTE_ALGORITHM_MERGE_HPP #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/algorithm/detail/merge_with_merge_path.hpp> #include <boost/compute/algorithm/detail/serial_merge.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/parameter_cache.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// Merges the sorted values in the range [\p first1, \p last1) with the sorted /// values in the range [\p first2, last2) and stores the result in the range /// beginning at \p result. Values are compared using the \p comp function. If /// no comparision function is given, \c less is used. /// /// \param first1 first element in the first range to merge /// \param last1 last element in the first range to merge /// \param first2 first element in the second range to merge /// \param last2 last element in the second range to merge /// \param result first element in the result range /// \param comp comparison function (by default \c less) /// \param queue command queue to perform the operation /// /// \return \c OutputIterator to the end of the result range /// /// Space complexity: \Omega(distance(\p first1, \p last1) + distance(\p first2, \p last2)) /// /// \see inplace_merge() template<class InputIterator1, class InputIterator2, class OutputIterator, class Compare> inline OutputIterator merge(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<InputIterator1>::value_type input1_type; typedef typename std::iterator_traits<InputIterator2>::value_type input2_type; typedef typename std::iterator_traits<OutputIterator>::value_type output_type; const device &device = queue.get_device(); std::string cache_key = std::string("__boost_merge_") + type_name<input1_type>() + "_" + type_name<input2_type>() + "_" + type_name<output_type>(); boost::shared_ptr<detail::parameter_cache> parameters = detail::parameter_cache::get_global_cache(device); // default serial merge threshold depends on device type size_t default_serial_merge_threshold = 32768; if(device.type() & device::gpu) { default_serial_merge_threshold = 2048; } // loading serial merge threshold parameter const size_t serial_merge_threshold = parameters->get(cache_key, "serial_merge_threshold", static_cast<uint_>(default_serial_merge_threshold)); // choosing merge algorithm const size_t total_count = detail::iterator_range_size(first1, last1) + detail::iterator_range_size(first2, last2); // for small inputs serial merge turns out to outperform // merge with merge path algorithm if(total_count <= serial_merge_threshold){ return detail::serial_merge(first1, last1, first2, last2, result, comp, queue); } return detail::merge_with_merge_path(first1, last1, first2, last2, result, comp, queue); } /// \overload template<class InputIterator1, class InputIterator2, class OutputIterator> inline OutputIterator merge(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator1>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputIterator2>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<InputIterator1>::value_type value_type; less<value_type> less_than; return merge(first1, last1, first2, last2, result, less_than, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_MERGE_HPP algorithm/reduce.hpp 0000644 00000026276 15125510617 0010532 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REDUCE_HPP #define BOOST_COMPUTE_ALGORITHM_REDUCE_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/container/array.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/algorithm/copy_n.hpp> #include <boost/compute/algorithm/detail/inplace_reduce.hpp> #include <boost/compute/algorithm/detail/reduce_on_gpu.hpp> #include <boost/compute/algorithm/detail/reduce_on_cpu.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/memory/local_buffer.hpp> #include <boost/compute/type_traits/result_of.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class OutputIterator, class BinaryFunction> size_t reduce(InputIterator first, size_t count, OutputIterator result, size_t block_size, BinaryFunction function, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type input_type; typedef typename boost::compute::result_of<BinaryFunction(input_type, input_type)>::type result_type; const context &context = queue.get_context(); size_t block_count = count / 2 / block_size; size_t total_block_count = static_cast<size_t>(std::ceil(float(count) / 2.f / float(block_size))); if(block_count != 0){ meta_kernel k("block_reduce"); size_t output_arg = k.add_arg<result_type *>(memory_object::global_memory, "output"); size_t block_arg = k.add_arg<input_type *>(memory_object::local_memory, "block"); k << "const uint gid = get_global_id(0);\n" << "const uint lid = get_local_id(0);\n" << // copy values to local memory "block[lid] = " << function(first[k.make_var<uint_>("gid*2+0")], first[k.make_var<uint_>("gid*2+1")]) << ";\n" << // perform reduction "for(uint i = 1; i < " << uint_(block_size) << "; i <<= 1){\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << " uint mask = (i << 1) - 1;\n" << " if((lid & mask) == 0){\n" << " block[lid] = " << function(k.expr<input_type>("block[lid]"), k.expr<input_type>("block[lid+i]")) << ";\n" << " }\n" << "}\n" << // write block result to global output "if(lid == 0)\n" << " output[get_group_id(0)] = block[0];\n"; kernel kernel = k.compile(context); kernel.set_arg(output_arg, result.get_buffer()); kernel.set_arg(block_arg, local_buffer<input_type>(block_size)); queue.enqueue_1d_range_kernel(kernel, 0, block_count * block_size, block_size); } // serially reduce any leftovers if(block_count * block_size * 2 < count){ size_t last_block_start = block_count * block_size * 2; meta_kernel k("extra_serial_reduce"); size_t count_arg = k.add_arg<uint_>("count"); size_t offset_arg = k.add_arg<uint_>("offset"); size_t output_arg = k.add_arg<result_type *>(memory_object::global_memory, "output"); size_t output_offset_arg = k.add_arg<uint_>("output_offset"); k << k.decl<result_type>("result") << " = \n" << first[k.expr<uint_>("offset")] << ";\n" << "for(uint i = offset + 1; i < count; i++)\n" << " result = " << function(k.var<result_type>("result"), first[k.var<uint_>("i")]) << ";\n" << "output[output_offset] = result;\n"; kernel kernel = k.compile(context); kernel.set_arg(count_arg, static_cast<uint_>(count)); kernel.set_arg(offset_arg, static_cast<uint_>(last_block_start)); kernel.set_arg(output_arg, result.get_buffer()); kernel.set_arg(output_offset_arg, static_cast<uint_>(block_count)); queue.enqueue_task(kernel); } return total_block_count; } template<class InputIterator, class BinaryFunction> inline vector< typename boost::compute::result_of< BinaryFunction( typename std::iterator_traits<InputIterator>::value_type, typename std::iterator_traits<InputIterator>::value_type ) >::type > block_reduce(InputIterator first, size_t count, size_t block_size, BinaryFunction function, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type input_type; typedef typename boost::compute::result_of<BinaryFunction(input_type, input_type)>::type result_type; const context &context = queue.get_context(); size_t total_block_count = static_cast<size_t>(std::ceil(float(count) / 2.f / float(block_size))); vector<result_type> result_vector(total_block_count, context); reduce(first, count, result_vector.begin(), block_size, function, queue); return result_vector; } // Space complexity: O( ceil(n / 2 / 256) ) template<class InputIterator, class OutputIterator, class BinaryFunction> inline void generic_reduce(InputIterator first, InputIterator last, OutputIterator result, BinaryFunction function, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type input_type; typedef typename boost::compute::result_of<BinaryFunction(input_type, input_type)>::type result_type; const device &device = queue.get_device(); const context &context = queue.get_context(); size_t count = detail::iterator_range_size(first, last); if(device.type() & device::cpu){ array<result_type, 1> value(context); detail::reduce_on_cpu(first, last, value.begin(), function, queue); boost::compute::copy_n(value.begin(), 1, result, queue); } else { size_t block_size = 256; // first pass vector<result_type> results = detail::block_reduce(first, count, block_size, function, queue); if(results.size() > 1){ detail::inplace_reduce(results.begin(), results.end(), function, queue); } boost::compute::copy_n(results.begin(), 1, result, queue); } } template<class InputIterator, class OutputIterator, class T> inline void dispatch_reduce(InputIterator first, InputIterator last, OutputIterator result, const plus<T> &function, command_queue &queue) { const context &context = queue.get_context(); const device &device = queue.get_device(); // reduce to temporary buffer on device array<T, 1> value(context); if(device.type() & device::cpu){ detail::reduce_on_cpu(first, last, value.begin(), function, queue); } else { reduce_on_gpu(first, last, value.begin(), function, queue); } // copy to result iterator copy_n(value.begin(), 1, result, queue); } template<class InputIterator, class OutputIterator, class BinaryFunction> inline void dispatch_reduce(InputIterator first, InputIterator last, OutputIterator result, BinaryFunction function, command_queue &queue) { generic_reduce(first, last, result, function, queue); } } // end detail namespace /// Returns the result of applying \p function to the elements in the /// range [\p first, \p last). /// /// If no function is specified, \c plus will be used. /// /// \param first first element in the input range /// \param last last element in the input range /// \param result iterator pointing to the output /// \param function binary reduction function /// \param queue command queue to perform the operation /// /// The \c reduce() algorithm assumes that the binary reduction function is /// associative. When used with non-associative functions the result may /// be non-deterministic and vary in precision. Notably this affects the /// \c plus<float>() function as floating-point addition is not associative /// and may produce slightly different results than a serial algorithm. /// /// This algorithm supports both host and device iterators for the /// result argument. This allows for values to be reduced and copied /// to the host all with a single function call. /// /// For example, to calculate the sum of the values in a device vector and /// copy the result to a value on the host: /// /// \snippet test/test_reduce.cpp sum_int /// /// Note that while the the \c reduce() algorithm is conceptually identical to /// the \c accumulate() algorithm, its implementation is substantially more /// efficient on parallel hardware. For more information, see the documentation /// on the \c accumulate() algorithm. /// /// Space complexity on GPUs: \Omega(n)<br> /// Space complexity on CPUs: \Omega(1) /// /// \see accumulate() template<class InputIterator, class OutputIterator, class BinaryFunction> inline void reduce(InputIterator first, InputIterator last, OutputIterator result, BinaryFunction function, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); if(first == last){ return; } detail::dispatch_reduce(first, last, result, function, queue); } /// \overload template<class InputIterator, class OutputIterator> inline void reduce(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type T; if(first == last){ return; } detail::dispatch_reduce(first, last, result, plus<T>(), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REDUCE_HPP algorithm/sort.hpp 0000644 00000014651 15125510617 0010244 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SORT_HPP #define BOOST_COMPUTE_ALGORITHM_SORT_HPP #include <iterator> #include <boost/utility/enable_if.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/detail/merge_sort_on_cpu.hpp> #include <boost/compute/algorithm/detail/merge_sort_on_gpu.hpp> #include <boost/compute/algorithm/detail/radix_sort.hpp> #include <boost/compute/algorithm/detail/insertion_sort.hpp> #include <boost/compute/algorithm/reverse.hpp> #include <boost/compute/container/mapped_view.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class T> inline void dispatch_gpu_sort(buffer_iterator<T> first, buffer_iterator<T> last, less<T>, command_queue &queue, typename boost::enable_if_c< is_radix_sortable<T>::value >::type* = 0) { size_t count = detail::iterator_range_size(first, last); if(count < 2){ // nothing to do return; } else if(count <= 32){ ::boost::compute::detail::serial_insertion_sort(first, last, queue); } else { ::boost::compute::detail::radix_sort(first, last, queue); } } template<class T> inline void dispatch_gpu_sort(buffer_iterator<T> first, buffer_iterator<T> last, greater<T> compare, command_queue &queue, typename boost::enable_if_c< is_radix_sortable<T>::value >::type* = 0) { size_t count = detail::iterator_range_size(first, last); if(count < 2){ // nothing to do return; } else if(count <= 32){ ::boost::compute::detail::serial_insertion_sort( first, last, compare, queue ); } else { // radix sorts in descending order ::boost::compute::detail::radix_sort(first, last, false, queue); } } template<class Iterator, class Compare> inline void dispatch_gpu_sort(Iterator first, Iterator last, Compare compare, command_queue &queue) { size_t count = detail::iterator_range_size(first, last); if(count < 2){ // nothing to do return; } else if(count <= 32){ ::boost::compute::detail::serial_insertion_sort( first, last, compare, queue ); } else { ::boost::compute::detail::merge_sort_on_gpu( first, last, compare, queue ); } } // sort() for device iterators template<class Iterator, class Compare> inline void dispatch_sort(Iterator first, Iterator last, Compare compare, command_queue &queue, typename boost::enable_if< is_device_iterator<Iterator> >::type* = 0) { if(queue.get_device().type() & device::gpu) { dispatch_gpu_sort(first, last, compare, queue); return; } ::boost::compute::detail::merge_sort_on_cpu(first, last, compare, queue); } // sort() for host iterators template<class Iterator, class Compare> inline void dispatch_sort(Iterator first, Iterator last, Compare compare, command_queue &queue, typename boost::disable_if< is_device_iterator<Iterator> >::type* = 0) { typedef typename std::iterator_traits<Iterator>::value_type T; size_t size = static_cast<size_t>(std::distance(first, last)); // create mapped buffer mapped_view<T> view( boost::addressof(*first), size, queue.get_context() ); // sort mapped buffer dispatch_sort(view.begin(), view.end(), compare, queue); // return results to host view.map(queue); } } // end detail namespace /// Sorts the values in the range [\p first, \p last) according to /// \p compare. /// /// \param first first element in the range to sort /// \param last last element in the range to sort /// \param compare comparison function (by default \c less) /// \param queue command queue to perform the operation /// /// For example, to sort a vector on the device: /// \code /// // create vector on the device with data /// float data[] = { 2.f, 4.f, 1.f, 3.f }; /// boost::compute::vector<float> vec(data, data + 4, queue); /// /// // sort the vector on the device /// boost::compute::sort(vec.begin(), vec.end(), queue); /// \endcode /// /// The sort() algorithm can also be directly used with host iterators. This /// example will automatically transfer the data to the device, sort it, and /// then transfer the data back to the host: /// \code /// std::vector<int> data = { 9, 3, 2, 5, 1, 4, 6, 7 }; /// /// boost::compute::sort(data.begin(), data.end(), queue); /// \endcode /// /// Space complexity: \Omega(n) /// /// \see is_sorted() template<class Iterator, class Compare> inline void sort(Iterator first, Iterator last, Compare compare, command_queue &queue = system::default_queue()) { ::boost::compute::detail::dispatch_sort(first, last, compare, queue); } /// \overload template<class Iterator> inline void sort(Iterator first, Iterator last, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits<Iterator>::value_type value_type; ::boost::compute::sort( first, last, ::boost::compute::less<value_type>(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SORT_HPP algorithm/scatter_if.hpp 0000644 00000011172 15125510617 0011373 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Pola <jakub.pola@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SCATTER_IF_HPP #define BOOST_COMPUTE_ALGORITHM_SCATTER_IF_HPP #include <boost/static_assert.hpp> #include <boost/algorithm/string/replace.hpp> #include <boost/compute/system.hpp> #include <boost/compute/exception.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class MapIterator, class StencilIterator, class OutputIterator, class Predicate> class scatter_if_kernel : meta_kernel { public: scatter_if_kernel() : meta_kernel("scatter_if") {} void set_range(InputIterator first, InputIterator last, MapIterator map, StencilIterator stencil, OutputIterator result, Predicate predicate) { m_count = iterator_range_size(first, last); m_input_offset = first.get_index(); m_output_offset = result.get_index(); m_input_offset_arg = add_arg<uint_>("input_offset"); m_output_offset_arg = add_arg<uint_>("output_offset"); *this << "const uint i = get_global_id(0);\n" << "uint i1 = " << map[expr<uint_>("i")] << " + output_offset;\n" << "uint i2 = i + input_offset;\n" << if_(predicate(stencil[expr<uint_>("i")])) << "\n" << result[expr<uint_>("i1")] << "=" << first[expr<uint_>("i2")] << ";\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } set_arg(m_input_offset_arg, uint_(m_input_offset)); set_arg(m_output_offset_arg, uint_(m_output_offset)); return exec_1d(queue, 0, m_count); } private: size_t m_count; size_t m_input_offset; size_t m_input_offset_arg; size_t m_output_offset; size_t m_output_offset_arg; }; } // end detail namespace /// Copies the elements from the range [\p first, \p last) to the range /// beginning at \p result using the output indices from the range beginning /// at \p map if stencil is resolved to true. By default the predicate is /// an identity /// /// Space complexity: \Omega(1) template<class InputIterator, class MapIterator, class StencilIterator, class OutputIterator, class Predicate> inline void scatter_if(InputIterator first, InputIterator last, MapIterator map, StencilIterator stencil, OutputIterator result, Predicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<MapIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<StencilIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); detail::scatter_if_kernel<InputIterator, MapIterator, StencilIterator, OutputIterator, Predicate> kernel; kernel.set_range(first, last, map, stencil, result, predicate); kernel.exec(queue); } template<class InputIterator, class MapIterator, class StencilIterator, class OutputIterator> inline void scatter_if(InputIterator first, InputIterator last, MapIterator map, StencilIterator stencil, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<MapIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<StencilIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); typedef typename std::iterator_traits<StencilIterator>::value_type T; scatter_if(first, last, map, stencil, result, identity<T>(), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SCATTER_IF_HPP algorithm/scatter.hpp 0000644 00000006553 15125510617 0010724 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SCATTER_HPP #define BOOST_COMPUTE_ALGORITHM_SCATTER_HPP #include <boost/static_assert.hpp> #include <boost/algorithm/string/replace.hpp> #include <boost/compute/system.hpp> #include <boost/compute/exception.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/iterator/buffer_iterator.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class InputIterator, class MapIterator, class OutputIterator> class scatter_kernel : meta_kernel { public: scatter_kernel() : meta_kernel("scatter") {} void set_range(InputIterator first, InputIterator last, MapIterator map, OutputIterator result) { m_count = iterator_range_size(first, last); m_input_offset = first.get_index(); m_output_offset = result.get_index(); m_input_offset_arg = add_arg<uint_>("input_offset"); m_output_offset_arg = add_arg<uint_>("output_offset"); *this << "const uint i = get_global_id(0);\n" << "uint i1 = " << map[expr<uint_>("i")] << " + output_offset;\n" << "uint i2 = i + input_offset;\n" << result[expr<uint_>("i1")] << "=" << first[expr<uint_>("i2")] << ";\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } set_arg(m_input_offset_arg, uint_(m_input_offset)); set_arg(m_output_offset_arg, uint_(m_output_offset)); return exec_1d(queue, 0, m_count); } private: size_t m_count; size_t m_input_offset; size_t m_input_offset_arg; size_t m_output_offset; size_t m_output_offset_arg; }; } // end detail namespace /// Copies the elements from the range [\p first, \p last) to the range /// beginning at \p result using the output indices from the range beginning /// at \p map. /// /// Space complexity: \Omega(1) /// /// \see gather() template<class InputIterator, class MapIterator, class OutputIterator> inline void scatter(InputIterator first, InputIterator last, MapIterator map, OutputIterator result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<MapIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputIterator>::value); detail::scatter_kernel<InputIterator, MapIterator, OutputIterator> kernel; kernel.set_range(first, last, map, result); kernel.exec(queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SCATTER_HPP algorithm/stable_sort_by_key.hpp 0000644 00000013667 15125510617 0013146 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2016 Jakub Szuppe <j.szuppe@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_STABLE_SORT_BY_KEY_HPP #define BOOST_COMPUTE_ALGORITHM_STABLE_SORT_BY_KEY_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/algorithm/sort_by_key.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { template<class KeyIterator, class ValueIterator> inline void dispatch_gpu_ssort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, less<typename std::iterator_traits<KeyIterator>::value_type> compare, command_queue &queue, typename boost::enable_if_c< is_radix_sortable< typename std::iterator_traits<KeyIterator>::value_type >::value >::type* = 0) { size_t count = detail::iterator_range_size(keys_first, keys_last); if(count < 32){ detail::serial_insertion_sort_by_key( keys_first, keys_last, values_first, compare, queue ); } else { detail::radix_sort_by_key( keys_first, keys_last, values_first, queue ); } } template<class KeyIterator, class ValueIterator> inline void dispatch_gpu_ssort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, greater<typename std::iterator_traits<KeyIterator>::value_type> compare, command_queue &queue, typename boost::enable_if_c< is_radix_sortable< typename std::iterator_traits<KeyIterator>::value_type >::value >::type* = 0) { size_t count = detail::iterator_range_size(keys_first, keys_last); if(count < 32){ detail::serial_insertion_sort_by_key( keys_first, keys_last, values_first, compare, queue ); } else { // radix sorts in descending order detail::radix_sort_by_key( keys_first, keys_last, values_first, false, queue ); } } template<class KeyIterator, class ValueIterator, class Compare> inline void dispatch_gpu_ssort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, command_queue &queue) { size_t count = detail::iterator_range_size(keys_first, keys_last); if(count < 32){ detail::serial_insertion_sort_by_key( keys_first, keys_last, values_first, compare, queue ); } else { detail::merge_sort_by_key_on_gpu( keys_first, keys_last, values_first, compare, true /* stable */, queue ); } } template<class KeyIterator, class ValueIterator, class Compare> inline void dispatch_ssort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, command_queue &queue) { if(queue.get_device().type() & device::gpu) { dispatch_gpu_ssort_by_key( keys_first, keys_last, values_first, compare, queue ); return; } ::boost::compute::detail::merge_sort_by_key_on_cpu( keys_first, keys_last, values_first, compare, queue ); } } // end detail namespace /// Performs a key-value stable sort using the keys in the range [\p keys_first, /// \p keys_last) on the values in the range [\p values_first, /// \p values_first \c + (\p keys_last \c - \p keys_first)) using \p compare. /// /// If no compare function is specified, \c less is used. /// /// Space complexity: \Omega(2n) /// /// \see sort() template<class KeyIterator, class ValueIterator, class Compare> inline void stable_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<KeyIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<ValueIterator>::value); ::boost::compute::detail::dispatch_ssort_by_key( keys_first, keys_last, values_first, compare, queue ); } /// \overload template<class KeyIterator, class ValueIterator> inline void stable_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<KeyIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<ValueIterator>::value); typedef typename std::iterator_traits<KeyIterator>::value_type key_type; ::boost::compute::stable_sort_by_key( keys_first, keys_last, values_first, less<key_type>(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_STABLE_SORT_BY_KEY_HPP algorithm/reduce_by_key.hpp 0000644 00000013455 15125510617 0012067 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REDUCE_BY_KEY_HPP #define BOOST_COMPUTE_ALGORITHM_REDUCE_BY_KEY_HPP #include <iterator> #include <utility> #include <boost/static_assert.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/device.hpp> #include <boost/compute/functional.hpp> #include <boost/compute/system.hpp> #include <boost/compute/algorithm/detail/reduce_by_key.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { /// The \c reduce_by_key() algorithm performs reduction for each contiguous /// subsequence of values determinate by equivalent keys. /// /// Returns a pair of iterators at the end of the ranges [\p keys_result, keys_result_last) /// and [\p values_result, \p values_result_last). /// /// If no function is specified, \c plus will be used. /// If no predicate is specified, \c equal_to will be used. /// /// \param keys_first the first key /// \param keys_last the last key /// \param values_first the first input value /// \param keys_result iterator pointing to the key output /// \param values_result iterator pointing to the reduced value output /// \param function binary reduction function /// \param predicate binary predicate which returns true only if two keys are equal /// \param queue command queue to perform the operation /// /// The \c reduce_by_key() algorithm assumes that the binary reduction function /// is associative. When used with non-associative functions the result may /// be non-deterministic and vary in precision. Notably this affects the /// \c plus<float>() function as floating-point addition is not associative /// and may produce slightly different results than a serial algorithm. /// /// For example, to calculate the sum of the values for each key: /// /// \snippet test/test_reduce_by_key.cpp reduce_by_key_int /// /// Space complexity on GPUs: \Omega(2n)<br> /// Space complexity on CPUs: \Omega(1) /// /// \see reduce() template<class InputKeyIterator, class InputValueIterator, class OutputKeyIterator, class OutputValueIterator, class BinaryFunction, class BinaryPredicate> inline std::pair<OutputKeyIterator, OutputValueIterator> reduce_by_key(InputKeyIterator keys_first, InputKeyIterator keys_last, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, BinaryFunction function, BinaryPredicate predicate, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputKeyIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputValueIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputKeyIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputValueIterator>::value); return detail::dispatch_reduce_by_key(keys_first, keys_last, values_first, keys_result, values_result, function, predicate, queue); } /// \overload template<class InputKeyIterator, class InputValueIterator, class OutputKeyIterator, class OutputValueIterator, class BinaryFunction> inline std::pair<OutputKeyIterator, OutputValueIterator> reduce_by_key(InputKeyIterator keys_first, InputKeyIterator keys_last, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, BinaryFunction function, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputKeyIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputValueIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputKeyIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputValueIterator>::value); typedef typename std::iterator_traits<InputKeyIterator>::value_type key_type; return reduce_by_key(keys_first, keys_last, values_first, keys_result, values_result, function, equal_to<key_type>(), queue); } /// \overload template<class InputKeyIterator, class InputValueIterator, class OutputKeyIterator, class OutputValueIterator> inline std::pair<OutputKeyIterator, OutputValueIterator> reduce_by_key(InputKeyIterator keys_first, InputKeyIterator keys_last, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputKeyIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<InputValueIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputKeyIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<OutputValueIterator>::value); typedef typename std::iterator_traits<InputKeyIterator>::value_type key_type; typedef typename std::iterator_traits<InputValueIterator>::value_type value_type; return reduce_by_key(keys_first, keys_last, values_first, keys_result, values_result, plus<value_type>(), equal_to<key_type>(), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REDUCE_BY_KEY_HPP algorithm/prev_permutation.hpp 0000644 00000013077 15125510617 0012661 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_PREV_PERMUTATION_HPP #define BOOST_COMPUTE_ALGORITHM_PREV_PERMUTATION_HPP #include <iterator> #include <boost/static_assert.hpp> #include <boost/compute/system.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/container/detail/scalar.hpp> #include <boost/compute/algorithm/reverse.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { /// /// \brief Helper function for prev_permutation /// /// To find rightmost element which is greater /// than its next element /// template<class InputIterator> inline InputIterator prev_permutation_helper(InputIterator first, InputIterator last, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; size_t count = detail::iterator_range_size(first, last); if(count == 0 || count == 1){ return last; } count = count - 1; const context &context = queue.get_context(); detail::meta_kernel k("prev_permutation"); size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index"); atomic_max<int_> atomic_max_int; k << k.decl<const int_>("i") << " = get_global_id(0);\n" << k.decl<const value_type>("cur_value") << "=" << first[k.var<const int_>("i")] << ";\n" << k.decl<const value_type>("next_value") << "=" << first[k.expr<const int_>("i+1")] << ";\n" << "if(cur_value > next_value){\n" << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n" << "}\n"; kernel kernel = k.compile(context); scalar<int_> index(context); kernel.set_arg(index_arg, index.get_buffer()); index.write(static_cast<int_>(-1), queue); queue.enqueue_1d_range_kernel(kernel, 0, count, 0); int result = static_cast<int>(index.read(queue)); if(result == -1) return last; else return first + result; } /// /// \brief Helper function for prev_permutation /// /// To find the largest element to the right of the element found above /// that is smaller than it /// template<class InputIterator, class ValueType> inline InputIterator pp_floor(InputIterator first, InputIterator last, ValueType value, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; size_t count = detail::iterator_range_size(first, last); if(count == 0){ return last; } const context &context = queue.get_context(); detail::meta_kernel k("pp_floor"); size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index"); size_t value_arg = k.add_arg<value_type>(memory_object::private_memory, "value"); atomic_max<int_> atomic_max_int; k << k.decl<const int_>("i") << " = get_global_id(0);\n" << k.decl<const value_type>("cur_value") << "=" << first[k.var<const int_>("i")] << ";\n" << "if(cur_value >= " << first[k.expr<int_>("*index")] << " && cur_value < value){\n" << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n" << "}\n"; kernel kernel = k.compile(context); scalar<int_> index(context); kernel.set_arg(index_arg, index.get_buffer()); index.write(static_cast<int_>(0), queue); kernel.set_arg(value_arg, value); queue.enqueue_1d_range_kernel(kernel, 0, count, 0); int result = static_cast<int>(index.read(queue)); return first + result; } } // end detail namespace /// /// \brief Permutation generating algorithm /// /// Transforms the range [first, last) into the previous permutation from /// the set of all permutations arranged in lexicographic order /// \return Boolean value signifying if the first permutation was crossed /// and the range was reset /// /// \param first Iterator pointing to start of range /// \param last Iterator pointing to end of range /// \param queue Queue on which to execute /// /// Space complexity: \Omega(1) template<class InputIterator> inline bool prev_permutation(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value); typedef typename std::iterator_traits<InputIterator>::value_type value_type; if(first == last) return false; InputIterator first_element = detail::prev_permutation_helper(first, last, queue); if(first_element == last) { reverse(first, last, queue); return false; } value_type first_value = first_element.read(queue); InputIterator ceiling_element = detail::pp_floor(first_element + 1, last, first_value, queue); value_type ceiling_value = ceiling_element.read(queue); first_element.write(ceiling_value, queue); ceiling_element.write(first_value, queue); reverse(first_element + 1, last, queue); return true; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_PREV_PERMUTATION_HPP algorithm/find_end.hpp 0000644 00000011244 15125510617 0011016 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FIND_END_HPP #define BOOST_COMPUTE_ALGORITHM_FIND_END_HPP #include <boost/static_assert.hpp> #include <boost/compute/algorithm/copy.hpp> #include <boost/compute/algorithm/detail/search_all.hpp> #include <boost/compute/container/detail/scalar.hpp> #include <boost/compute/container/vector.hpp> #include <boost/compute/detail/iterator_range_size.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/system.hpp> #include <boost/compute/type_traits/is_device_iterator.hpp> namespace boost { namespace compute { namespace detail { /// /// \brief Helper function for find_end /// /// Basically a copy of find_if which returns last occurrence /// instead of first occurrence /// template<class InputIterator, class UnaryPredicate> inline InputIterator find_end_helper(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue) { typedef typename std::iterator_traits<InputIterator>::value_type value_type; typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; size_t count = detail::iterator_range_size(first, last); if(count == 0){ return last; } const context &context = queue.get_context(); detail::meta_kernel k("find_end"); size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index"); atomic_max<int_> atomic_max_int; k << k.decl<const int_>("i") << " = get_global_id(0);\n" << k.decl<const value_type>("value") << "=" << first[k.var<const int_>("i")] << ";\n" << "if(" << predicate(k.var<const value_type>("value")) << "){\n" << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n" << "}\n"; kernel kernel = k.compile(context); scalar<int_> index(context); kernel.set_arg(index_arg, index.get_buffer()); index.write(static_cast<int_>(-1), queue); queue.enqueue_1d_range_kernel(kernel, 0, count, 0); int result = static_cast<int>(index.read(queue)); if(result == -1){ return last; } else { return first + static_cast<difference_type>(result); } } } // end detail namespace /// /// \brief Substring matching algorithm /// /// Searches for the last match of the pattern [p_first, p_last) /// in text [t_first, t_last). /// \return Iterator pointing to beginning of last occurence /// /// \param t_first Iterator pointing to start of text /// \param t_last Iterator pointing to end of text /// \param p_first Iterator pointing to start of pattern /// \param p_last Iterator pointing to end of pattern /// \param queue Queue on which to execute /// /// Space complexity: \Omega(n) /// template<class TextIterator, class PatternIterator> inline TextIterator find_end(TextIterator t_first, TextIterator t_last, PatternIterator p_first, PatternIterator p_last, command_queue &queue = system::default_queue()) { BOOST_STATIC_ASSERT(is_device_iterator<TextIterator>::value); BOOST_STATIC_ASSERT(is_device_iterator<PatternIterator>::value); const context &context = queue.get_context(); // there is no need to check if pattern starts at last n - 1 indices vector<uint_> matching_indices( detail::iterator_range_size(t_first, t_last) + 1 - detail::iterator_range_size(p_first, p_last), context ); detail::search_kernel<PatternIterator, TextIterator, vector<uint_>::iterator> kernel; kernel.set_range(p_first, p_last, t_first, t_last, matching_indices.begin()); kernel.exec(queue); using boost::compute::_1; vector<uint_>::iterator index = detail::find_end_helper( matching_indices.begin(), matching_indices.end(), _1 == 1, queue ); // pattern was not found if(index == matching_indices.end()) return t_last; return t_first + detail::iterator_range_size(matching_indices.begin(), index); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FIND_END_HPP lambda/placeholders.hpp 0000644 00000004700 15125510617 0011146 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_PLACEHOLDERS_HPP #define BOOST_COMPUTE_LAMBDA_PLACEHOLDERS_HPP #include <boost/mpl/has_xxx.hpp> #include <boost/compute/lambda/context.hpp> #include <boost/compute/lambda/result_of.hpp> namespace boost { namespace compute { namespace lambda { namespace mpl = boost::mpl; namespace proto = boost::proto; // lambda placeholders expression<proto::terminal<placeholder<0> >::type> const _1; expression<proto::terminal<placeholder<1> >::type> const _2; expression<proto::terminal<placeholder<2> >::type> const _3; namespace detail { BOOST_MPL_HAS_XXX_TRAIT_DEF(result_type) template<class T, bool HasResultType> struct terminal_type_impl; template<class T> struct terminal_type_impl<T, true> { typedef typename T::result_type type; }; template<class T> struct terminal_type_impl<T, false> { typedef T type; }; template<class T> struct terminal_type { typedef typename terminal_type_impl<T, has_result_type<T>::value>::type type; }; } // end detail namespace // result_of placeholders template<class Args> struct result_of<expression<proto::terminal<placeholder<0> >::type>, Args, proto::tag::terminal> { typedef typename boost::tuples::element<0, Args>::type arg_type; typedef typename detail::terminal_type<arg_type>::type type; }; template<class Args> struct result_of<expression<proto::terminal<placeholder<1> >::type>, Args, proto::tag::terminal> { typedef typename boost::tuples::element<1, Args>::type arg_type; typedef typename detail::terminal_type<arg_type>::type type; }; template<class Args> struct result_of<expression<proto::terminal<placeholder<2> >::type>, Args, proto::tag::terminal> { typedef typename boost::tuples::element<2, Args>::type arg_type; typedef typename detail::terminal_type<arg_type>::type type; }; } // end lambda namespace // lift lambda placeholders up to the boost::compute namespace using lambda::_1; using lambda::_2; using lambda::_3; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_PLACEHOLDERS_HPP lambda/get.hpp 0000644 00000010201 15125510617 0007251 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_GET_HPP #define BOOST_COMPUTE_LAMBDA_GET_HPP #include <boost/preprocessor/repetition.hpp> #include <boost/compute/config.hpp> #include <boost/compute/functional/get.hpp> #include <boost/compute/lambda/placeholder.hpp> namespace boost { namespace compute { namespace lambda { namespace detail { // function wrapper for get<N>() in lambda expressions template<size_t N> struct get_func { template<class Expr, class Args> struct lambda_result { typedef typename proto::result_of::child_c<Expr, 1>::type Arg; typedef typename ::boost::compute::lambda::result_of<Arg, Args>::type T; typedef typename ::boost::compute::detail::get_result_type<N, T>::type type; }; template<class Context, class Arg> struct make_get_result_type { typedef typename boost::remove_cv< typename boost::compute::lambda::result_of< Arg, typename Context::args_tuple >::type >::type type; }; // returns the suffix string for get<N>() in lambda expressions // (e.g. ".x" for get<0>() with float4) template<class T> struct make_get_suffix { static std::string value() { BOOST_STATIC_ASSERT(N < 16); std::stringstream stream; if(N < 10){ stream << ".s" << uint_(N); } else if(N < 16){ stream << ".s" << char('a' + (N - 10)); } return stream.str(); } }; // get<N>() specialization for std::pair<T1, T2> template<class T1, class T2> struct make_get_suffix<std::pair<T1, T2> > { static std::string value() { BOOST_STATIC_ASSERT(N < 2); if(N == 0){ return ".first"; } else { return ".second"; } }; }; // get<N>() specialization for boost::tuple<T...> #define BOOST_COMPUTE_LAMBDA_GET_MAKE_TUPLE_SUFFIX(z, n, unused) \ template<BOOST_PP_ENUM_PARAMS(n, class T)> \ struct make_get_suffix<boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > \ { \ static std::string value() \ { \ BOOST_STATIC_ASSERT(N < n); \ return ".v" + boost::lexical_cast<std::string>(N); \ } \ }; BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_LAMBDA_GET_MAKE_TUPLE_SUFFIX, ~) #undef BOOST_COMPUTE_LAMBDA_GET_MAKE_TUPLE_SUFFIX template<class Context, class Arg> static void dispatch_apply_terminal(Context &ctx, const Arg &arg) { typedef typename make_get_result_type<Context, Arg>::type T; proto::eval(arg, ctx); ctx.stream << make_get_suffix<T>::value(); } template<class Context, int I> static void dispatch_apply_terminal(Context &ctx, placeholder<I>) { ctx.stream << ::boost::compute::get<N>()(::boost::get<I>(ctx.args)); } template<class Context, class Arg> static void dispatch_apply(Context &ctx, const Arg &arg, proto::tag::terminal) { dispatch_apply_terminal(ctx, proto::value(arg)); } template<class Context, class Arg> static void apply(Context &ctx, const Arg &arg) { dispatch_apply(ctx, arg, typename proto::tag_of<Arg>::type()); } }; } // end detail namespace // get<N>() template<size_t N, class Arg> inline typename proto::result_of::make_expr< proto::tag::function, detail::get_func<N>, const Arg& >::type const get(const Arg &arg) { return proto::make_expr<proto::tag::function>( detail::get_func<N>(), ::boost::ref(arg) ); } } // end lambda namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_GET_HPP lambda/placeholder.hpp 0000644 00000001424 15125510617 0010763 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_PLACEHOLDER_HPP #define BOOST_COMPUTE_LAMBDA_PLACEHOLDER_HPP namespace boost { namespace compute { namespace lambda { // lambda placeholder type template<int I> struct placeholder { }; } // end lambda namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_PLACEHOLDER_HPP lambda/functional.hpp 0000644 00000060677 15125510617 0010662 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_FUNCTIONAL_HPP #define BOOST_COMPUTE_LAMBDA_FUNCTIONAL_HPP #include <boost/tuple/tuple.hpp> #include <boost/lexical_cast.hpp> #include <boost/proto/core.hpp> #include <boost/preprocessor/cat.hpp> #include <boost/preprocessor/stringize.hpp> #include <boost/compute/functional/get.hpp> #include <boost/compute/lambda/result_of.hpp> #include <boost/compute/lambda/placeholder.hpp> #include <boost/compute/types/fundamental.hpp> #include <boost/compute/type_traits/scalar_type.hpp> #include <boost/compute/type_traits/vector_size.hpp> #include <boost/compute/type_traits/make_vector_type.hpp> namespace boost { namespace compute { namespace lambda { namespace mpl = boost::mpl; namespace proto = boost::proto; // wraps a unary boolean function whose result type is an int_ when the argument // type is a scalar, and intN_ if the argument type is a vector of size N #define BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template<class Expr, class Args> \ struct lambda_result \ { \ typedef typename proto::result_of::child_c<Expr, 1>::type Arg; \ typedef typename ::boost::compute::lambda::result_of<Arg, Args>::type result_type; \ typedef typename ::boost::compute::make_vector_type< \ ::boost::compute::int_, \ ::boost::compute::vector_size<result_type>::value \ >::type type; \ }; \ \ template<class Context, class Arg> \ static void apply(Context &ctx, const Arg &arg) \ { \ ctx.stream << #name << "("; \ proto::eval(arg, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template<class Arg> \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg& \ >::type const \ name(const Arg &arg) \ { \ return proto::make_expr<proto::tag::function>( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg) \ ); \ } // wraps a unary function whose return type is the same as the argument type #define BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template<class Expr, class Args> \ struct lambda_result \ { \ typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \ typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type type; \ }; \ \ template<class Context, class Arg> \ static void apply(Context &ctx, const Arg &arg) \ { \ ctx.stream << #name << "("; \ proto::eval(arg, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template<class Arg> \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg& \ >::type const \ name(const Arg &arg) \ { \ return proto::make_expr<proto::tag::function>( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg) \ ); \ } // wraps a unary function whose result type is the scalar type of the first argument #define BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_ST(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template<class Expr, class Args> \ struct lambda_result \ { \ typedef typename proto::result_of::child_c<Expr, 1>::type Arg; \ typedef typename ::boost::compute::lambda::result_of<Arg, Args>::type result_type; \ typedef typename ::boost::compute::scalar_type<result_type>::type type; \ }; \ \ template<class Context, class Arg> \ static void apply(Context &ctx, const Arg &arg) \ { \ ctx.stream << #name << "("; \ proto::eval(arg, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template<class Arg> \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg& \ >::type const \ name(const Arg &arg) \ { \ return proto::make_expr<proto::tag::function>( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg) \ ); \ } // wraps a binary boolean function whose result type is an int_ when the first // argument type is a scalar, and intN_ if the first argument type is a vector // of size N #define BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template<class Expr, class Args> \ struct lambda_result \ { \ typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \ typedef typename ::boost::compute::make_vector_type< \ ::boost::compute::int_, \ ::boost::compute::vector_size<Arg1>::value \ >::type type; \ }; \ \ template<class Context, class Arg1, class Arg2> \ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \ { \ ctx.stream << #name << "("; \ proto::eval(arg1, ctx); \ ctx.stream << ", "; \ proto::eval(arg2, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template<class Arg1, class Arg2> \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \ >::type const \ name(const Arg1 &arg1, const Arg2 &arg2) \ { \ return proto::make_expr<proto::tag::function>( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \ ); \ } // wraps a binary function whose result type is the type of the first argument #define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template<class Expr, class Args> \ struct lambda_result \ { \ typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \ typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type type; \ }; \ \ template<class Context, class Arg1, class Arg2> \ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \ { \ ctx.stream << #name << "("; \ proto::eval(arg1, ctx); \ ctx.stream << ", "; \ proto::eval(arg2, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template<class Arg1, class Arg2> \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \ >::type const \ name(const Arg1 &arg1, const Arg2 &arg2) \ { \ return proto::make_expr<proto::tag::function>( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \ ); \ } // wraps a binary function whose result type is the type of the second argument #define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_2(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template<class Expr, class Args> \ struct lambda_result \ { \ typedef typename proto::result_of::child_c<Expr, 2>::type Arg2; \ typedef typename ::boost::compute::lambda::result_of<Arg2, Args>::type type; \ }; \ \ template<class Context, class Arg1, class Arg2> \ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \ { \ ctx.stream << #name << "("; \ proto::eval(arg1, ctx); \ ctx.stream << ", "; \ proto::eval(arg2, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template<class Arg1, class Arg2> \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \ >::type const \ name(const Arg1 &arg1, const Arg2 &arg2) \ { \ return proto::make_expr<proto::tag::function>( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \ ); \ } // wraps a binary function who's result type is the scalar type of the first argument #define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template<class Expr, class Args> \ struct lambda_result \ { \ typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \ typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type result_type; \ typedef typename ::boost::compute::scalar_type<result_type>::type type; \ }; \ \ template<class Context, class Arg1, class Arg2> \ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \ { \ ctx.stream << #name << "("; \ proto::eval(arg1, ctx); \ ctx.stream << ", "; \ proto::eval(arg2, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template<class Arg1, class Arg2> \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \ >::type const \ name(const Arg1 &arg1, const Arg2 &arg2) \ { \ return proto::make_expr<proto::tag::function>( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \ ); \ } // wraps a binary function whose result type is the type of the first argument // and the second argument is a pointer #define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template<class Expr, class Args> \ struct lambda_result \ { \ typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \ typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type type; \ }; \ \ template<class Context, class Arg1, class Arg2> \ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \ { \ ctx.stream << #name << "("; \ proto::eval(arg1, ctx); \ ctx.stream << ", &"; \ proto::eval(arg2, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template<class Arg1, class Arg2> \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \ >::type const \ name(const Arg1 &arg1, const Arg2 &arg2) \ { \ return proto::make_expr<proto::tag::function>( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \ ); \ } // wraps a ternary function #define BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template<class Expr, class Args> \ struct lambda_result \ { \ typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \ typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type type; \ }; \ \ template<class Context, class Arg1, class Arg2, class Arg3> \ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \ { \ ctx.stream << #name << "("; \ proto::eval(arg1, ctx); \ ctx.stream << ", "; \ proto::eval(arg2, ctx); \ ctx.stream << ", "; \ proto::eval(arg3, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template<class Arg1, class Arg2, class Arg3> \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2&, const Arg3& \ >::type const \ name(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \ { \ return proto::make_expr<proto::tag::function>( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2), ::boost::ref(arg3) \ ); \ } // wraps a ternary function whose result type is the type of the third argument #define BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION_3(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template<class Expr, class Args> \ struct lambda_result \ { \ typedef typename proto::result_of::child_c<Expr, 3>::type Arg3; \ typedef typename ::boost::compute::lambda::result_of<Arg3, Args>::type type; \ }; \ \ template<class Context, class Arg1, class Arg2, class Arg3> \ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \ { \ ctx.stream << #name << "("; \ proto::eval(arg1, ctx); \ ctx.stream << ", "; \ proto::eval(arg2, ctx); \ ctx.stream << ", "; \ proto::eval(arg3, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template<class Arg1, class Arg2, class Arg3> \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2&, const Arg3& \ >::type const \ name(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \ { \ return proto::make_expr<proto::tag::function>( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2), ::boost::ref(arg3) \ ); \ } // wraps a ternary function whose result type is the type of the first argument // and the third argument of the function is a pointer #define BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION_PTR(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template<class Expr, class Args> \ struct lambda_result \ { \ typedef typename proto::result_of::child_c<Expr, 3>::type Arg3; \ typedef typename ::boost::compute::lambda::result_of<Arg3, Args>::type type; \ }; \ \ template<class Context, class Arg1, class Arg2, class Arg3> \ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \ { \ ctx.stream << #name << "("; \ proto::eval(arg1, ctx); \ ctx.stream << ", "; \ proto::eval(arg2, ctx); \ ctx.stream << ", &"; \ proto::eval(arg3, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template<class Arg1, class Arg2, class Arg3> \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2&, const Arg3& \ >::type const \ name(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \ { \ return proto::make_expr<proto::tag::function>( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2), ::boost::ref(arg3) \ ); \ } // Common Built-In Functions BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(clamp) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(degrees) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(min) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(max) BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mix) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(radians) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sign) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_2(step) BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION_3(smoothstep) // Geometric Built-In Functions BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(cross) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(dot) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(distance) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_ST(length) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(normalize) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(fast_distance) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_ST(fast_length) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(fast_normalize) // Integer Built-In Functions BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(abs) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(abs_diff) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(add_sat) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(hadd) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(rhadd) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(clz) #ifdef BOOST_COMPUTE_CL_VERSION_2_0 BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(ctz) #endif // clamp() (since 1.1) already defined in common BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad_hi) BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad24) BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad_sat) // max() and min() functions are defined in common BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(mul_hi) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(mul24) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(rotate) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(sub_sat) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(upsample) #ifdef BOOST_COMPUTE_CL_VERSION_1_2 BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(popcount) #endif // Math Built-In Functions BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(acos) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(acosh) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(acospi) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(asin) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(asinh) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(asinpi) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(atan) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(atan2) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(atanh) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(atanpi) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(atan2pi) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cbrt) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(ceil) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(copysign) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cos) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cosh) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cospi) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(erfc) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(erf) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp2) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp10) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(expm1) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(fabs) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(fdim) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(floor) BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(fma) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(fmax) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(fmin) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(fmod) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(fract) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(frexp) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(hypot) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(ilogb) // ilogb returns intN_ BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(ldexp) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(lgamma) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(lgamma_r) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log2) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log10) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log1p) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(logb) BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad) #ifdef BOOST_COMPUTE_CL_VERSION_1_1 BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(maxmag) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(minmag) #endif BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(modf) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(nan) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(nextafter) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(pow) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(pown) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(powr) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(remainder) BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION_PTR(remquo) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(rint) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(rootn) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(round) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(rsqrt) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sin) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sincos) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sinh) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sinpi) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sqrt) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tan) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tanh) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tanpi) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tgamma) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(trunc) // Native Math Built-In Functions BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_cos) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(native_divide) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_exp) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_exp2) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_exp10) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_log) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_log2) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_log10) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(native_powr) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_recip) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_rsqrt) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_sin) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_sqrt) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_tan) // Half Math Built-In Functions BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_cos) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(half_divide) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_exp) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_exp2) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_exp10) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_log) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_log2) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_log10) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(half_powr) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_recip) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_rsqrt) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_sin) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_sqrt) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_tan) // Relational Built-In Functions BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isequal) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isnotequal) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isgreater) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isgreaterequal) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isless) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(islessequal) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(islessgreater) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isfinite) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isinf) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isnan) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isnormal) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isordered) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isunordered) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(singbit) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(all) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(any) BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(bitselect) BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(select) } // end lambda namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_FUNCTIONAL_HPP lambda/make_pair.hpp 0000644 00000004270 15125510617 0010433 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_MAKE_PAIR_HPP #define BOOST_COMPUTE_LAMBDA_MAKE_PAIR_HPP #include <boost/compute/types/pair.hpp> namespace boost { namespace compute { namespace lambda { namespace detail { // function wrapper for make_pair() in lambda expressions struct make_pair_func { template<class Expr, class Args> struct lambda_result { typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; typedef typename proto::result_of::child_c<Expr, 2>::type Arg2; typedef typename lambda::result_of<Arg1, Args>::type T1; typedef typename lambda::result_of<Arg2, Args>::type T2; typedef std::pair<T1, T2> type; }; template<class Context, class Arg1, class Arg2> static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) { typedef typename lambda::result_of<Arg1, typename Context::args_tuple>::type T1; typedef typename lambda::result_of<Arg2, typename Context::args_tuple>::type T2; ctx.stream << "boost_make_pair("; ctx.stream << type_name<T1>() << ", "; proto::eval(arg1, ctx); ctx.stream << ", "; ctx.stream << type_name<T2>() << ", "; proto::eval(arg2, ctx); ctx.stream << ")"; } }; } // end detail namespace // make_pair(first, second) template<class Arg1, class Arg2> inline typename proto::result_of::make_expr< proto::tag::function, detail::make_pair_func, const Arg1&, const Arg2& >::type const make_pair(const Arg1 &first, const Arg2 &second) { return proto::make_expr<proto::tag::function>( detail::make_pair_func(), ::boost::ref(first), ::boost::ref(second) ); } } // end lambda namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_MAKE_PAIR_HPP lambda/make_tuple.hpp 0000644 00000011462 15125510617 0010632 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_HPP #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_HPP #include <boost/preprocessor/repetition.hpp> #include <boost/compute/config.hpp> #include <boost/compute/types/tuple.hpp> namespace boost { namespace compute { namespace lambda { namespace detail { // function wrapper for make_tuple() in lambda expressions struct make_tuple_func { template<class Expr, class Args, int N> struct make_tuple_result_type; #define BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG(z, n, unused) \ typedef typename proto::result_of::child_c<Expr, BOOST_PP_INC(n)>::type BOOST_PP_CAT(Arg, n); #define BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG_TYPE(z, n, unused) \ typedef typename lambda::result_of<BOOST_PP_CAT(Arg, n), Args>::type BOOST_PP_CAT(T, n); #define BOOST_COMPUTE_MAKE_TUPLE_RESULT_TYPE(z, n, unused) \ template<class Expr, class Args> \ struct make_tuple_result_type<Expr, Args, n> \ { \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG, ~) \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG_TYPE, ~) \ typedef boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> type; \ }; BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_MAKE_TUPLE_RESULT_TYPE, ~) #undef BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG #undef BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG_TYPE #undef BOOST_COMPUTE_MAKE_TUPLE_RESULT_TYPE template<class Expr, class Args> struct lambda_result { typedef typename make_tuple_result_type< Expr, Args, proto::arity_of<Expr>::value - 1 >::type type; }; #define BOOST_COMPUTE_MAKE_TUPLE_GET_ARG_TYPE(z, n, unused) \ typedef typename lambda::result_of< \ BOOST_PP_CAT(Arg, n), typename Context::args_tuple \ >::type BOOST_PP_CAT(T, n); #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_ARG(z, n, unused) \ BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) BOOST_PP_CAT(&arg, n) #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_EVAL_ARG(z, n, unused) \ BOOST_PP_EXPR_IF(n, ctx.stream << ", ";) proto::eval(BOOST_PP_CAT(arg, n), ctx); #define BOOST_COMPUTE_MAKE_TUPLE_APPLY(z, n, unused) \ template<class Context, BOOST_PP_ENUM_PARAMS(n, class Arg)> \ static void apply(Context &ctx, BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_ARG, ~)) \ { \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_MAKE_TUPLE_GET_ARG_TYPE, ~) \ typedef typename boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> tuple_type; \ ctx.stream.template inject_type<tuple_type>(); \ ctx.stream << "((" << type_name<tuple_type>() << "){"; \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_EVAL_ARG, ~) \ ctx.stream << "})"; \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_MAKE_TUPLE_APPLY, ~) #undef BOOST_COMPUTE_MAKE_TUPLE_GET_ARG_TYPE #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_ARG #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_EVAL_ARG #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY }; } // end detail namespace #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG(z, n, unused) \ BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) BOOST_PP_CAT(&arg, n) #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG_TYPE(z, n, unused) \ BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) & #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_REF_ARG(z, n, unused) \ BOOST_PP_COMMA_IF(n) ::boost::ref(BOOST_PP_CAT(arg, n)) #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE(z, n, unused) \ template<BOOST_PP_ENUM_PARAMS(n, class Arg)> \ inline typename proto::result_of::make_expr< \ proto::tag::function, \ detail::make_tuple_func, \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG_TYPE, ~) \ >::type \ make_tuple(BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG, ~)) \ { \ return proto::make_expr<proto::tag::function>( \ detail::make_tuple_func(), \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_REF_ARG, ~) \ ); \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE, ~) #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG_TYPE #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_REF_ARG #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE } // end lambda namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_HPP lambda/context.hpp 0000644 00000025243 15125510617 0010172 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_CONTEXT_HPP #define BOOST_COMPUTE_LAMBDA_CONTEXT_HPP #include <boost/proto/core.hpp> #include <boost/proto/context.hpp> #include <boost/type_traits.hpp> #include <boost/preprocessor/repetition.hpp> #include <boost/compute/config.hpp> #include <boost/compute/function.hpp> #include <boost/compute/lambda/result_of.hpp> #include <boost/compute/lambda/functional.hpp> #include <boost/compute/type_traits/result_of.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/detail/meta_kernel.hpp> namespace boost { namespace compute { namespace lambda { namespace mpl = boost::mpl; namespace proto = boost::proto; #define BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(tag, op) \ template<class LHS, class RHS> \ void operator()(tag, const LHS &lhs, const RHS &rhs) \ { \ if(proto::arity_of<LHS>::value > 0){ \ stream << '('; \ proto::eval(lhs, *this); \ stream << ')'; \ } \ else { \ proto::eval(lhs, *this); \ } \ \ stream << op; \ \ if(proto::arity_of<RHS>::value > 0){ \ stream << '('; \ proto::eval(rhs, *this); \ stream << ')'; \ } \ else { \ proto::eval(rhs, *this); \ } \ } // lambda expression context template<class Args> struct context : proto::callable_context<context<Args> > { typedef void result_type; typedef Args args_tuple; // create a lambda context for kernel with args context(boost::compute::detail::meta_kernel &kernel, const Args &args_) : stream(kernel), args(args_) { } // handle terminals template<class T> void operator()(proto::tag::terminal, const T &x) { // terminal values in lambda expressions are always literals stream << stream.lit(x); } void operator()(proto::tag::terminal, const uchar_ &x) { stream << "(uchar)(" << stream.lit(uint_(x)) << "u)"; } void operator()(proto::tag::terminal, const char_ &x) { stream << "(char)(" << stream.lit(int_(x)) << ")"; } void operator()(proto::tag::terminal, const ushort_ &x) { stream << "(ushort)(" << stream.lit(x) << "u)"; } void operator()(proto::tag::terminal, const short_ &x) { stream << "(short)(" << stream.lit(x) << ")"; } void operator()(proto::tag::terminal, const uint_ &x) { stream << "(" << stream.lit(x) << "u)"; } void operator()(proto::tag::terminal, const ulong_ &x) { stream << "(" << stream.lit(x) << "ul)"; } void operator()(proto::tag::terminal, const long_ &x) { stream << "(" << stream.lit(x) << "l)"; } // handle placeholders template<int I> void operator()(proto::tag::terminal, placeholder<I>) { stream << boost::get<I>(args); } // handle functions #define BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION_ARG(z, n, unused) \ BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) BOOST_PP_CAT(&arg, n) #define BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION(z, n, unused) \ template<class F, BOOST_PP_ENUM_PARAMS(n, class Arg)> \ void operator()( \ proto::tag::function, \ const F &function, \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION_ARG, ~) \ ) \ { \ proto::value(function).apply(*this, BOOST_PP_ENUM_PARAMS(n, arg)); \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION, ~) #undef BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION // operators BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::plus, '+') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::minus, '-') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::multiplies, '*') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::divides, '/') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::modulus, '%') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::less, '<') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::greater, '>') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::less_equal, "<=") BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::greater_equal, ">=") BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::equal_to, "==") BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::not_equal_to, "!=") BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::logical_and, "&&") BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::logical_or, "||") BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::bitwise_and, '&') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::bitwise_or, '|') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::bitwise_xor, '^') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::assign, '=') // subscript operator template<class LHS, class RHS> void operator()(proto::tag::subscript, const LHS &lhs, const RHS &rhs) { proto::eval(lhs, *this); stream << '['; proto::eval(rhs, *this); stream << ']'; } // ternary conditional operator template<class Pred, class Arg1, class Arg2> void operator()(proto::tag::if_else_, const Pred &p, const Arg1 &x, const Arg2 &y) { proto::eval(p, *this); stream << '?'; proto::eval(x, *this); stream << ':'; proto::eval(y, *this); } boost::compute::detail::meta_kernel &stream; Args args; }; namespace detail { template<class Expr, class Arg> struct invoked_unary_expression { typedef typename ::boost::compute::result_of<Expr(Arg)>::type result_type; invoked_unary_expression(const Expr &expr, const Arg &arg) : m_expr(expr), m_arg(arg) { } Expr m_expr; Arg m_arg; }; template<class Expr, class Arg> boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel &kernel, const invoked_unary_expression<Expr, Arg> &expr) { context<boost::tuple<Arg> > ctx(kernel, boost::make_tuple(expr.m_arg)); proto::eval(expr.m_expr, ctx); return kernel; } template<class Expr, class Arg1, class Arg2> struct invoked_binary_expression { typedef typename ::boost::compute::result_of<Expr(Arg1, Arg2)>::type result_type; invoked_binary_expression(const Expr &expr, const Arg1 &arg1, const Arg2 &arg2) : m_expr(expr), m_arg1(arg1), m_arg2(arg2) { } Expr m_expr; Arg1 m_arg1; Arg2 m_arg2; }; template<class Expr, class Arg1, class Arg2> boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel &kernel, const invoked_binary_expression<Expr, Arg1, Arg2> &expr) { context<boost::tuple<Arg1, Arg2> > ctx( kernel, boost::make_tuple(expr.m_arg1, expr.m_arg2) ); proto::eval(expr.m_expr, ctx); return kernel; } } // end detail namespace // forward declare domain struct domain; // lambda expression wrapper template<class Expr> struct expression : proto::extends<Expr, expression<Expr>, domain> { typedef proto::extends<Expr, expression<Expr>, domain> base_type; BOOST_PROTO_EXTENDS_USING_ASSIGN(expression) expression(const Expr &expr = Expr()) : base_type(expr) { } // result_of protocol template<class Signature> struct result { }; template<class This> struct result<This()> { typedef typename ::boost::compute::lambda::result_of<Expr>::type type; }; template<class This, class Arg> struct result<This(Arg)> { typedef typename ::boost::compute::lambda::result_of< Expr, typename boost::tuple<Arg> >::type type; }; template<class This, class Arg1, class Arg2> struct result<This(Arg1, Arg2)> { typedef typename ::boost::compute::lambda::result_of< Expr, typename boost::tuple<Arg1, Arg2> >::type type; }; template<class Arg> detail::invoked_unary_expression<expression<Expr>, Arg> operator()(const Arg &x) const { return detail::invoked_unary_expression<expression<Expr>, Arg>(*this, x); } template<class Arg1, class Arg2> detail::invoked_binary_expression<expression<Expr>, Arg1, Arg2> operator()(const Arg1 &x, const Arg2 &y) const { return detail::invoked_binary_expression< expression<Expr>, Arg1, Arg2 >(*this, x, y); } // function<> conversion operator template<class R, class A1> operator function<R(A1)>() const { using ::boost::compute::detail::meta_kernel; std::stringstream source; ::boost::compute::detail::meta_kernel_variable<A1> arg1("x"); source << "inline " << type_name<R>() << " lambda" << ::boost::compute::detail::generate_argument_list<R(A1)>('x') << "{\n" << " return " << meta_kernel::expr_to_string((*this)(arg1)) << ";\n" << "}\n"; return make_function_from_source<R(A1)>("lambda", source.str()); } template<class R, class A1, class A2> operator function<R(A1, A2)>() const { using ::boost::compute::detail::meta_kernel; std::stringstream source; ::boost::compute::detail::meta_kernel_variable<A1> arg1("x"); ::boost::compute::detail::meta_kernel_variable<A1> arg2("y"); source << "inline " << type_name<R>() << " lambda" << ::boost::compute::detail::generate_argument_list<R(A1, A2)>('x') << "{\n" << " return " << meta_kernel::expr_to_string((*this)(arg1, arg2)) << ";\n" << "}\n"; return make_function_from_source<R(A1, A2)>("lambda", source.str()); } }; // lambda expression domain struct domain : proto::domain<proto::generator<expression> > { }; } // end lambda namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_CONTEXT_HPP lambda/result_of.hpp 0000644 00000010027 15125510617 0010502 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_RESULT_OF_HPP #define BOOST_COMPUTE_LAMBDA_RESULT_OF_HPP #include <boost/mpl/vector.hpp> #include <boost/proto/proto.hpp> #include <boost/compute/type_traits/common_type.hpp> namespace boost { namespace compute { namespace lambda { namespace mpl = boost::mpl; namespace proto = boost::proto; // meta-function returning the result type of a lambda expression template<class Expr, class Args = void, class Tags = typename proto::tag_of<Expr>::type> struct result_of { }; // terminals template<class Expr, class Args> struct result_of<Expr, Args, proto::tag::terminal> { typedef typename proto::result_of::value<Expr>::type type; }; // binary operators #define BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(tag) \ template<class Expr, class Args> \ struct result_of<Expr, Args, tag> \ { \ typedef typename proto::result_of::child_c<Expr, 0>::type left; \ typedef typename proto::result_of::child_c<Expr, 1>::type right; \ \ typedef typename boost::common_type< \ typename ::boost::compute::lambda::result_of< \ left, \ Args, \ typename proto::tag_of<left>::type>::type, \ typename ::boost::compute::lambda::result_of< \ right, \ Args, \ typename proto::tag_of<right>::type>::type \ >::type type; \ }; BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::plus) BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::minus) BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::multiplies) BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::divides) BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::modulus) BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::bitwise_and) BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::bitwise_or) BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::bitwise_xor) // comparision operators #define BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(tag) \ template<class Expr, class Args> \ struct result_of<Expr, Args, tag> \ { \ typedef bool type; \ }; BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::less) BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::greater) BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::less_equal) BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::greater_equal) BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::equal_to) BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::not_equal_to) BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::logical_and) BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::logical_or) // assignment operator template<class Expr, class Args> struct result_of<Expr, Args, proto::tag::assign> { typedef typename proto::result_of::child_c<Expr, 0>::type left; typedef typename proto::result_of::child_c<Expr, 1>::type right; typedef typename ::boost::compute::lambda::result_of< right, Args, typename proto::tag_of<right>::type >::type type; }; // functions template<class Expr, class Args> struct result_of<Expr, Args, proto::tag::function> { typedef typename proto::result_of::child_c<Expr, 0>::type func_expr; typedef typename proto::result_of::value<func_expr>::type func; typedef typename func::template lambda_result<Expr, Args>::type type; }; } // end lambda namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_RESULT_OF_HPP utility/invoke.hpp 0000644 00000005342 15125510617 0010262 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://kylelutz.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_UTILITY_INVOKE_HPP #define BOOST_COMPUTE_UTILITY_INVOKE_HPP #include <boost/preprocessor/enum.hpp> #include <boost/preprocessor/repetition.hpp> #include <boost/compute/config.hpp> #include <boost/compute/command_queue.hpp> #include <boost/compute/detail/meta_kernel.hpp> #include <boost/compute/container/detail/scalar.hpp> #include <boost/compute/type_traits/result_of.hpp> namespace boost { namespace compute { #define BOOST_COMPUTE_DETAIL_INVOKE_ARG(z, n, unused) \ BOOST_PP_COMMA_IF(n) k.var<BOOST_PP_CAT(T, n)>("arg" BOOST_PP_STRINGIZE(n)) #define BOOST_COMPUTE_DETAIL_INVOKE_ADD_ARG(z, n, unused) \ k.add_set_arg("arg" BOOST_PP_STRINGIZE(n), BOOST_PP_CAT(arg, n)); #define BOOST_COMPUTE_DETAIL_DEFINE_INVOKE(z, n, unused) \ template<class Function, BOOST_PP_ENUM_PARAMS(n, class T)> \ inline typename result_of<Function(BOOST_PP_ENUM_PARAMS(n, T))>::type \ invoke(const Function& function, command_queue& queue, BOOST_PP_ENUM_BINARY_PARAMS(n, const T, &arg)) \ { \ typedef typename result_of<Function(BOOST_PP_ENUM_PARAMS(n, T))>::type result_type; \ detail::meta_kernel k("invoke"); \ detail::scalar<result_type> result(queue.get_context()); \ const size_t result_arg = k.add_arg<result_type *>(memory_object::global_memory, "result"); \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_DETAIL_INVOKE_ADD_ARG, ~) \ k << "*result = " << function( \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_DETAIL_INVOKE_ARG, ~) \ ) << ";"; \ k.set_arg(result_arg, result.get_buffer()); \ k.exec(queue); \ return result.read(queue); \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_DETAIL_DEFINE_INVOKE, ~) #undef BOOST_COMPUTE_DETAIL_INVOKE_ARG #undef BOOST_COMPUTE_DETAIL_INVOKE_ADD_ARG #undef BOOST_COMPUTE_DETAIL_DEFINE_INVOKE #ifdef BOOST_COMPUTE_DOXYGEN_INVOKED /// Invokes \p function with \p args on \p queue. /// /// For example, to invoke the builtin abs() function: /// \code /// int result = invoke(abs<int>(), queue, -10); // returns 10 /// \endcode template<class Function, class... Args> inline typename result_of<Function(Args...)>::type invoke(const Function& function, command_queue& queue, const Args&... args); #endif // BOOST_COMPUTE_DOXYGEN_INVOKED } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_UTILITY_INVOKE_HPP utility/dim.hpp 0000644 00000004212 15125510617 0007533 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_UTILITY_DIM_HPP #define BOOST_COMPUTE_UTILITY_DIM_HPP #include <boost/compute/config.hpp> #include <boost/compute/utility/extents.hpp> namespace boost { namespace compute { #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES /// The variadic \c dim() function provides a concise syntax for creating /// \ref extents objects. /// /// For example, /// \code /// extents<2> region = dim(640, 480); // region == (640, 480) /// \endcode /// /// \see \ref extents "extents<N>" template<class... Args> inline extents<sizeof...(Args)> dim(Args... args) { return extents<sizeof...(Args)>({ static_cast<size_t>(args)... }); } #if BOOST_WORKAROUND(BOOST_MSVC, <= 1800) // for some inexplicable reason passing one parameter to 'dim' variadic template // generates compile error on msvc 2013 update 4 template<class T> inline extents<1> dim(T arg) { return extents<1>(static_cast<size_t>(arg)); } #endif // BOOST_WORKAROUND(BOOST_MSVC, <= 1800) #else // dim() function definitions for non-c++11 compilers #define BOOST_COMPUTE_DETAIL_ASSIGN_DIM(z, n, var) \ var[n] = BOOST_PP_CAT(e, n); #define BOOST_COMPUTE_DETAIL_DEFINE_DIM(z, n, var) \ inline extents<n> dim(BOOST_PP_ENUM_PARAMS(n, size_t e)) \ { \ extents<n> exts; \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_DETAIL_ASSIGN_DIM, exts) \ return exts; \ } BOOST_PP_REPEAT(BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_DETAIL_DEFINE_DIM, ~) #undef BOOST_COMPUTE_DETAIL_ASSIGN_DIM #undef BOOST_COMPUTE_DETAIL_DEFINE_DIM #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES /// \internal_ template<size_t N> inline extents<N> dim() { return extents<N>(); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_UTILITY_DIM_HPP utility/wait_list.hpp 0000644 00000013153 15125510617 0010765 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_UTILITY_WAIT_LIST_HPP #define BOOST_COMPUTE_UTILITY_WAIT_LIST_HPP #include <vector> #include <boost/compute/config.hpp> #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST #include <initializer_list> #endif #include <boost/compute/event.hpp> namespace boost { namespace compute { template<class T> class future; /// \class wait_list /// \brief Stores a list of events. /// /// The wait_list class stores a set of event objects and can be used to /// specify dependencies for OpenCL operations or to wait on the host until /// all of the events have completed. /// /// This class also provides convenience functions for interacting with /// OpenCL APIs which typically accept event dependencies as a \c cl_event* /// pointer and a \c cl_uint size. For example: /// \code /// wait_list events = ...; /// /// clEnqueueNDRangeKernel(..., events.get_event_ptr(), events.size(), ...); /// \endcode /// /// \see event, \ref future "future<T>" class wait_list { public: typedef std::vector<event>::iterator iterator; typedef std::vector<event>::const_iterator const_iterator; /// Creates an empty wait-list. wait_list() { } /// Creates a wait-list containing \p event. wait_list(const event &event) { insert(event); } /// Creates a new wait-list as a copy of \p other. wait_list(const wait_list &other) : m_events(other.m_events) { } #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST /// Creates a wait-list from \p events wait_list(std::initializer_list<event> events) : m_events(events) { } #endif // BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST /// Copies the events in the wait-list from \p other. wait_list& operator=(const wait_list &other) { if(this != &other){ m_events = other.m_events; } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new wait list object from \p other. wait_list(wait_list&& other) : m_events(std::move(other.m_events)) { } /// Move-assigns the wait list from \p other to \c *this. wait_list& operator=(wait_list&& other) { m_events = std::move(other.m_events); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the wait-list. ~wait_list() { } /// Returns \c true if the wait-list is empty. bool empty() const { return m_events.empty(); } /// Returns the number of events in the wait-list. uint_ size() const { return static_cast<uint_>(m_events.size()); } /// Removes all of the events from the wait-list. void clear() { m_events.clear(); } /// Returns a cl_event pointer to the first event in the wait-list. /// Returns \c 0 if the wait-list is empty. /// /// This can be used to pass the wait-list to OpenCL functions which /// expect a \c cl_event pointer to refer to a list of events. const cl_event* get_event_ptr() const { if(empty()){ return 0; } return reinterpret_cast<const cl_event *>(&m_events[0]); } /// Reserves a minimum length of storage for the wait list object. void reserve(size_t new_capacity) { m_events.reserve(new_capacity); } /// Inserts \p event into the wait-list. void insert(const event &event) { m_events.push_back(event); } /// Inserts the event from \p future into the wait-list. template<class T> void insert(const future<T> &future) { insert(future.get_event()); } /// Blocks until all of the events in the wait-list have completed. /// /// Does nothing if the wait-list is empty. void wait() const { if(!empty()){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clWaitForEvents(size(), get_event_ptr()) ); } } /// Returns a reference to the event at specified location \p pos. const event& operator[](size_t pos) const { return m_events[pos]; } /// Returns a reference to the event at specified location \p pos. event& operator[](size_t pos) { return m_events[pos]; } /// Returns an iterator to the first element of the wait-list. iterator begin() { return m_events.begin(); } /// Returns an iterator to the first element of the wait-list. const_iterator begin() const { return m_events.begin(); } /// Returns an iterator to the first element of the wait-list. const_iterator cbegin() const { return m_events.begin(); } /// Returns an iterator to the element following the last element of the wait-list. iterator end() { return m_events.end(); } /// Returns an iterator to the element following the last element of the wait-list. const_iterator end() const { return m_events.end(); } /// Returns an iterator to the element following the last element of the wait-list. const_iterator cend() const { return m_events.end(); } private: std::vector<event> m_events; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_UTILITY_WAIT_LIST_HPP utility/source.hpp 0000644 00000002457 15125510617 0010273 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_UTILITY_SOURCE_HPP #define BOOST_COMPUTE_UTILITY_SOURCE_HPP /// Stringizes OpenCL source code. /// /// For example, to create a simple kernel which squares each input value: /// \code /// const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( /// __kernel void square(const float *input, float *output) /// { /// const uint i = get_global_id(0); /// const float x = input[i]; /// output[i] = x * x; /// } /// ); /// /// // create and build square program /// program square_program = program::build_with_source(source, context); /// /// // create square kernel /// kernel square_kernel(square_program, "square"); /// \endcode #ifdef BOOST_COMPUTE_DOXYGEN_INVOKED #define BOOST_COMPUTE_STRINGIZE_SOURCE(source) #else #define BOOST_COMPUTE_STRINGIZE_SOURCE(...) #__VA_ARGS__ #endif #endif // BOOST_COMPUTE_UTILITY_SOURCE_HPP utility/program_cache.hpp 0000644 00000012565 15125510617 0011566 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_UTILITY_PROGRAM_CACHE_HPP #define BOOST_COMPUTE_UTILITY_PROGRAM_CACHE_HPP #include <string> #include <utility> #include <boost/shared_ptr.hpp> #include <boost/make_shared.hpp> #include <boost/noncopyable.hpp> #include <boost/compute/context.hpp> #include <boost/compute/program.hpp> #include <boost/compute/detail/lru_cache.hpp> #include <boost/compute/detail/global_static.hpp> namespace boost { namespace compute { /// The program_cache class stores \ref program objects in a LRU cache. /// /// This class can be used to help mitigate the overhead of OpenCL's run-time /// kernel compilation model. Commonly used programs can be stored persistently /// in the cache and only compiled once on their first use. /// /// Program objects are stored and retreived based on a user-defined cache key /// along with the options used to build the program (if any). /// /// For example, to insert a program into the cache: /// \code /// cache.insert("foo", foo_program); /// \endcode /// /// And to retreive the program later: /// \code /// boost::optional<program> p = cache.get("foo"); /// if(p){ /// // program found in cache /// } /// \endcode /// /// \see program class program_cache : boost::noncopyable { public: /// Creates a new program cache with space for \p capacity number of /// program objects. program_cache(size_t capacity) : m_cache(capacity) { } /// Destroys the program cache. ~program_cache() { } /// Returns the number of program objects currently stored in the cache. size_t size() const { return m_cache.size(); } /// Returns the total capacity of the cache. size_t capacity() const { return m_cache.capacity(); } /// Clears the program cache. void clear() { m_cache.clear(); } /// Returns the program object with \p key. Returns a null optional if no /// program with \p key exists in the cache. boost::optional<program> get(const std::string &key) { return m_cache.get(std::make_pair(key, std::string())); } /// Returns the program object with \p key and \p options. Returns a null /// optional if no program with \p key and \p options exists in the cache. boost::optional<program> get(const std::string &key, const std::string &options) { return m_cache.get(std::make_pair(key, options)); } /// Inserts \p program into the cache with \p key. void insert(const std::string &key, const program &program) { insert(key, std::string(), program); } /// Inserts \p program into the cache with \p key and \p options. void insert(const std::string &key, const std::string &options, const program &program) { m_cache.insert(std::make_pair(key, options), program); } /// Loads the program with \p key from the cache if it exists. Otherwise /// builds a new program with \p source and \p options, stores it in the /// cache, and returns it. /// /// This is a convenience function to simplify the common pattern of /// attempting to load a program from the cache and, if not present, /// building the program from source and storing it in the cache. /// /// Equivalent to: /// \code /// boost::optional<program> p = get(key, options); /// if(!p){ /// p = program::create_with_source(source, context); /// p->build(options); /// insert(key, options, *p); /// } /// return *p; /// \endcode program get_or_build(const std::string &key, const std::string &options, const std::string &source, const context &context) { boost::optional<program> p = get(key, options); if(!p){ p = program::build_with_source(source, context, options); insert(key, options, *p); } return *p; } /// Returns the global program cache for \p context. /// /// This global cache is used internally by Boost.Compute to store compiled /// program objects used by its algorithms. All Boost.Compute programs are /// stored with a cache key beginning with \c "__boost". User programs /// should avoid using the same prefix in order to prevent collisions. static boost::shared_ptr<program_cache> get_global_cache(const context &context) { typedef detail::lru_cache<cl_context, boost::shared_ptr<program_cache> > cache_map; BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(cache_map, caches, (8)); boost::optional<boost::shared_ptr<program_cache> > cache = caches.get(context.get()); if(!cache){ cache = boost::make_shared<program_cache>(64); caches.insert(context.get(), *cache); } return *cache; } private: detail::lru_cache<std::pair<std::string, std::string>, program> m_cache; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_UTILITY_PROGRAM_CACHE_HPP utility/extents.hpp 0000644 00000007444 15125510617 0010466 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_UTILITY_EXTENTS_HPP #define BOOST_COMPUTE_UTILITY_EXTENTS_HPP #include <functional> #include <numeric> #include <boost/compute/config.hpp> #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST #include <initializer_list> #endif #include <boost/array.hpp> namespace boost { namespace compute { /// The extents class contains an array of n-dimensional extents. /// /// \see dim() template<size_t N> class extents { public: typedef size_t size_type; static const size_type static_size = N; typedef boost::array<size_t, N> array_type; typedef typename array_type::iterator iterator; typedef typename array_type::const_iterator const_iterator; /// Creates an extents object with each component set to zero. /// /// For example: /// \code /// extents<3> exts(); // (0, 0, 0) /// \endcode extents() { m_extents.fill(0); } /// Creates an extents object with each component set to \p value. /// /// For example: /// \code /// extents<3> exts(1); // (1, 1, 1) /// \endcode explicit extents(size_t value) { m_extents.fill(value); } #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST /// Creates an extents object with \p values. extents(std::initializer_list<size_t> values) { BOOST_ASSERT(values.size() == N); std::copy(values.begin(), values.end(), m_extents.begin()); } #endif // BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST /// Returns the size (i.e. dimensionality) of the extents array. size_type size() const { return N; } /// Returns the linear size of the extents. This is equivalent to the /// product of each extent in each dimension. size_type linear() const { return std::accumulate( m_extents.begin(), m_extents.end(), 1, std::multiplies<size_type>() ); } /// Returns a pointer to the extents data array. /// /// This is useful for passing the extents data to OpenCL APIs which /// expect an array of \c size_t. size_t* data() { return m_extents.data(); } /// \overload const size_t* data() const { return m_extents.data(); } iterator begin() { return m_extents.begin(); } const_iterator begin() const { return m_extents.begin(); } const_iterator cbegin() const { return m_extents.cbegin(); } iterator end() { return m_extents.end(); } const_iterator end() const { return m_extents.end(); } const_iterator cend() const { return m_extents.cend(); } /// Returns a reference to the extent at \p index. size_t& operator[](size_t index) { return m_extents[index]; } /// \overload const size_t& operator[](size_t index) const { return m_extents[index]; } /// Returns \c true if the extents in \c *this are the same as \p other. bool operator==(const extents &other) const { return m_extents == other.m_extents; } /// Returns \c true if the extents in \c *this are not the same as \p other. bool operator!=(const extents &other) const { return m_extents != other.m_extents; } private: array_type m_extents; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_UTILITY_EXTENTS_HPP function.hpp 0000644 00000027552 15125510617 0007120 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTION_HPP #define BOOST_COMPUTE_FUNCTION_HPP #include <map> #include <string> #include <sstream> #include <vector> #include <boost/assert.hpp> #include <boost/config.hpp> #include <boost/function_types/parameter_types.hpp> #include <boost/preprocessor/repetition.hpp> #include <boost/mpl/for_each.hpp> #include <boost/mpl/size.hpp> #include <boost/mpl/transform.hpp> #include <boost/static_assert.hpp> #include <boost/tuple/tuple.hpp> #include <boost/type_traits/add_pointer.hpp> #include <boost/type_traits/function_traits.hpp> #include <boost/compute/cl.hpp> #include <boost/compute/config.hpp> #include <boost/compute/type_traits/type_name.hpp> namespace boost { namespace compute { namespace detail { template<class ResultType, class ArgTuple> class invoked_function { public: typedef ResultType result_type; BOOST_STATIC_CONSTANT( size_t, arity = boost::tuples::length<ArgTuple>::value ); invoked_function(const std::string &name, const std::string &source) : m_name(name), m_source(source) { } invoked_function(const std::string &name, const std::string &source, const std::map<std::string, std::string> &definitions) : m_name(name), m_source(source), m_definitions(definitions) { } invoked_function(const std::string &name, const std::string &source, const ArgTuple &args) : m_name(name), m_source(source), m_args(args) { } invoked_function(const std::string &name, const std::string &source, const std::map<std::string, std::string> &definitions, const ArgTuple &args) : m_name(name), m_source(source), m_definitions(definitions), m_args(args) { } std::string name() const { return m_name; } std::string source() const { return m_source; } const std::map<std::string, std::string>& definitions() const { return m_definitions; } const ArgTuple& args() const { return m_args; } private: std::string m_name; std::string m_source; std::map<std::string, std::string> m_definitions; ArgTuple m_args; }; } // end detail namespace /// \class function /// \brief A function object. template<class Signature> class function { public: /// \internal_ typedef typename boost::function_traits<Signature>::result_type result_type; /// \internal_ BOOST_STATIC_CONSTANT( size_t, arity = boost::function_traits<Signature>::arity ); /// \internal_ typedef Signature signature; /// Creates a new function object with \p name. function(const std::string &name) : m_name(name) { } /// Destroys the function object. ~function() { } /// \internal_ std::string name() const { return m_name; } /// \internal_ void set_source(const std::string &source) { m_source = source; } /// \internal_ std::string source() const { return m_source; } /// \internal_ void define(std::string name, std::string value = std::string()) { m_definitions[name] = value; } bool operator==(const function<Signature>& other) const { return (m_name == other.m_name) && (m_definitions == other.m_definitions) && (m_source == other.m_source); } bool operator!=(const function<Signature>& other) const { return !(*this == other); } /// \internal_ detail::invoked_function<result_type, boost::tuple<> > operator()() const { BOOST_STATIC_ASSERT_MSG( arity == 0, "Non-nullary function invoked with zero arguments" ); return detail::invoked_function<result_type, boost::tuple<> >( m_name, m_source, m_definitions ); } /// \internal_ template<class Arg1> detail::invoked_function<result_type, boost::tuple<Arg1> > operator()(const Arg1 &arg1) const { BOOST_STATIC_ASSERT_MSG( arity == 1, "Non-unary function invoked one argument" ); return detail::invoked_function<result_type, boost::tuple<Arg1> >( m_name, m_source, m_definitions, boost::make_tuple(arg1) ); } /// \internal_ template<class Arg1, class Arg2> detail::invoked_function<result_type, boost::tuple<Arg1, Arg2> > operator()(const Arg1 &arg1, const Arg2 &arg2) const { BOOST_STATIC_ASSERT_MSG( arity == 2, "Non-binary function invoked with two arguments" ); return detail::invoked_function<result_type, boost::tuple<Arg1, Arg2> >( m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2) ); } /// \internal_ template<class Arg1, class Arg2, class Arg3> detail::invoked_function<result_type, boost::tuple<Arg1, Arg2, Arg3> > operator()(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) const { BOOST_STATIC_ASSERT_MSG( arity == 3, "Non-ternary function invoked with three arguments" ); return detail::invoked_function<result_type, boost::tuple<Arg1, Arg2, Arg3> >( m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2, arg3) ); } private: std::string m_name; std::string m_source; std::map<std::string, std::string> m_definitions; }; /// Creates a function object given its \p name and \p source. /// /// \param name The function name. /// \param source The function source code. /// /// \see BOOST_COMPUTE_FUNCTION() template<class Signature> inline function<Signature> make_function_from_source(const std::string &name, const std::string &source) { function<Signature> f(name); f.set_source(source); return f; } namespace detail { // given a string containing the arguments declaration for a function // like: "(int a, const float b)", returns a vector containing the name // of each argument (e.g. ["a", "b"]). inline std::vector<std::string> parse_argument_names(const char *arguments) { BOOST_ASSERT_MSG( arguments[0] == '(' && arguments[std::strlen(arguments)-1] == ')', "Arguments should start and end with parentheses" ); std::vector<std::string> args; size_t last_space = 0; size_t skip_comma = 0; for(size_t i = 1; i < std::strlen(arguments) - 2; i++){ const char c = arguments[i]; if(c == ' '){ last_space = i; } else if(c == ',' && !skip_comma){ std::string name( arguments + last_space + 1, i - last_space - 1 ); args.push_back(name); } else if(c == '<'){ skip_comma++; } else if(c == '>'){ skip_comma--; } } std::string last_argument( arguments + last_space + 1, std::strlen(arguments) - last_space - 2 ); args.push_back(last_argument); return args; } struct signature_argument_inserter { signature_argument_inserter(std::stringstream &s_, const char *arguments, size_t last) : s(s_) { n = 0; m_last = last; m_argument_names = parse_argument_names(arguments); BOOST_ASSERT_MSG( m_argument_names.size() == last, "Wrong number of arguments" ); } template<class T> void operator()(const T*) { s << type_name<T>() << " " << m_argument_names[n]; if(n+1 < m_last){ s << ", "; } n++; } size_t n; size_t m_last; std::stringstream &s; std::vector<std::string> m_argument_names; }; template<class Signature> inline std::string make_function_declaration(const char *name, const char *arguments) { typedef typename boost::function_traits<Signature>::result_type result_type; typedef typename boost::function_types::parameter_types<Signature>::type parameter_types; typedef typename mpl::size<parameter_types>::type arity_type; std::stringstream s; s << "inline " << type_name<result_type>() << " " << name; s << "("; if(arity_type::value > 0){ signature_argument_inserter i(s, arguments, arity_type::value); mpl::for_each< typename mpl::transform<parameter_types, boost::add_pointer<mpl::_1> >::type>(i); } s << ")"; return s.str(); } struct argument_list_inserter { argument_list_inserter(std::stringstream &s_, const char first, size_t last) : s(s_) { n = 0; m_last = last; m_name = first; } template<class T> void operator()(const T*) { s << type_name<T>() << " " << m_name++; if(n+1 < m_last){ s << ", "; } n++; } size_t n; size_t m_last; char m_name; std::stringstream &s; }; template<class Signature> inline std::string generate_argument_list(const char first = 'a') { typedef typename boost::function_types::parameter_types<Signature>::type parameter_types; typedef typename mpl::size<parameter_types>::type arity_type; std::stringstream s; s << '('; if(arity_type::value > 0){ argument_list_inserter i(s, first, arity_type::value); mpl::for_each< typename mpl::transform<parameter_types, boost::add_pointer<mpl::_1> >::type>(i); } s << ')'; return s.str(); } // used by the BOOST_COMPUTE_FUNCTION() macro to create a function // with the given signature, name, arguments, and source. template<class Signature> inline function<Signature> make_function_impl(const char *name, const char *arguments, const char *source) { std::stringstream s; s << make_function_declaration<Signature>(name, arguments); s << source; return make_function_from_source<Signature>(name, s.str()); } } // end detail namespace } // end compute namespace } // end boost namespace /// Creates a function object with \p name and \p source. /// /// \param return_type The return type for the function. /// \param name The name of the function. /// \param arguments A list of arguments for the function. /// \param source The OpenCL C source code for the function. /// /// The function declaration and signature are automatically created using /// the \p return_type, \p name, and \p arguments macro parameters. /// /// The source code for the function is interpreted as OpenCL C99 source code /// which is stringified and passed to the OpenCL compiler when the function /// is invoked. /// /// For example, to create a function which squares a number: /// \code /// BOOST_COMPUTE_FUNCTION(float, square, (float x), /// { /// return x * x; /// }); /// \endcode /// /// And to create a function which sums two numbers: /// \code /// BOOST_COMPUTE_FUNCTION(int, sum_two, (int x, int y), /// { /// return x + y; /// }); /// \endcode /// /// \see BOOST_COMPUTE_CLOSURE() #ifdef BOOST_COMPUTE_DOXYGEN_INVOKED #define BOOST_COMPUTE_FUNCTION(return_type, name, arguments, source) #else #define BOOST_COMPUTE_FUNCTION(return_type, name, arguments, ...) \ ::boost::compute::function<return_type arguments> name = \ ::boost::compute::detail::make_function_impl<return_type arguments>( \ #name, #arguments, #__VA_ARGS__ \ ) #endif #endif // BOOST_COMPUTE_FUNCTION_HPP image3d.hpp 0000644 00000001045 15125510617 0006571 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // deprecated, use <boost/compute/image/image3d.hpp> instead #include <boost/compute/image/image3d.hpp> memory.hpp 0000644 00000001315 15125510617 0006570 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_MEMORY_HPP #define BOOST_COMPUTE_MEMORY_HPP /// \file /// /// Meta-header to include all Boost.Compute memory headers. #include <boost/compute/memory/local_buffer.hpp> #include <boost/compute/memory/svm_ptr.hpp> #endif // BOOST_COMPUTE_MEMORY_HPP async.hpp 0000644 00000001277 15125510617 0006404 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ASYNC_HPP #define BOOST_COMPUTE_ASYNC_HPP /// \file /// /// Meta-header to include all Boost.Compute async headers. #include <boost/compute/async/future.hpp> #include <boost/compute/async/wait_guard.hpp> #endif // BOOST_COMPUTE_ASYNC_HPP closure.hpp 0000644 00000023577 15125510617 0006752 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CLOSURE_HPP #define BOOST_COMPUTE_CLOSURE_HPP #include <string> #include <sstream> #include <boost/config.hpp> #include <boost/fusion/adapted/boost_tuple.hpp> #include <boost/fusion/algorithm/iteration/for_each.hpp> #include <boost/mpl/for_each.hpp> #include <boost/mpl/transform.hpp> #include <boost/typeof/typeof.hpp> #include <boost/static_assert.hpp> #include <boost/algorithm/string.hpp> #include <boost/tuple/tuple.hpp> #include <boost/type_traits/function_traits.hpp> #include <boost/compute/cl.hpp> #include <boost/compute/function.hpp> #include <boost/compute/type_traits/type_name.hpp> #include <boost/compute/type_traits/detail/capture_traits.hpp> namespace boost { namespace compute { namespace detail { template<class ResultType, class ArgTuple, class CaptureTuple> class invoked_closure { public: typedef ResultType result_type; BOOST_STATIC_CONSTANT( size_t, arity = boost::tuples::length<ArgTuple>::value ); invoked_closure(const std::string &name, const std::string &source, const std::map<std::string, std::string> &definitions, const ArgTuple &args, const CaptureTuple &capture) : m_name(name), m_source(source), m_definitions(definitions), m_args(args), m_capture(capture) { } std::string name() const { return m_name; } std::string source() const { return m_source; } const std::map<std::string, std::string>& definitions() const { return m_definitions; } const ArgTuple& args() const { return m_args; } const CaptureTuple& capture() const { return m_capture; } private: std::string m_name; std::string m_source; std::map<std::string, std::string> m_definitions; ArgTuple m_args; CaptureTuple m_capture; }; } // end detail namespace /// \internal_ template<class Signature, class CaptureTuple> class closure { public: typedef typename boost::function_traits<Signature>::result_type result_type; BOOST_STATIC_CONSTANT( size_t, arity = boost::function_traits<Signature>::arity ); closure(const std::string &name, const CaptureTuple &capture, const std::string &source) : m_name(name), m_source(source), m_capture(capture) { } ~closure() { } std::string name() const { return m_name; } /// \internal_ std::string source() const { return m_source; } /// \internal_ void define(std::string name, std::string value = std::string()) { m_definitions[name] = value; } /// \internal_ detail::invoked_closure<result_type, boost::tuple<>, CaptureTuple> operator()() const { BOOST_STATIC_ASSERT_MSG( arity == 0, "Non-nullary closure function invoked with zero arguments" ); return detail::invoked_closure<result_type, boost::tuple<>, CaptureTuple>( m_name, m_source, m_definitions, boost::make_tuple(), m_capture ); } /// \internal_ template<class Arg1> detail::invoked_closure<result_type, boost::tuple<Arg1>, CaptureTuple> operator()(const Arg1 &arg1) const { BOOST_STATIC_ASSERT_MSG( arity == 1, "Non-unary closure function invoked with one argument" ); return detail::invoked_closure<result_type, boost::tuple<Arg1>, CaptureTuple>( m_name, m_source, m_definitions, boost::make_tuple(arg1), m_capture ); } /// \internal_ template<class Arg1, class Arg2> detail::invoked_closure<result_type, boost::tuple<Arg1, Arg2>, CaptureTuple> operator()(const Arg1 &arg1, const Arg2 &arg2) const { BOOST_STATIC_ASSERT_MSG( arity == 2, "Non-binary closure function invoked with two arguments" ); return detail::invoked_closure<result_type, boost::tuple<Arg1, Arg2>, CaptureTuple>( m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2), m_capture ); } /// \internal_ template<class Arg1, class Arg2, class Arg3> detail::invoked_closure<result_type, boost::tuple<Arg1, Arg2, Arg3>, CaptureTuple> operator()(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) const { BOOST_STATIC_ASSERT_MSG( arity == 3, "Non-ternary closure function invoked with three arguments" ); return detail::invoked_closure<result_type, boost::tuple<Arg1, Arg2, Arg3>, CaptureTuple>( m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2, arg3), m_capture ); } private: std::string m_name; std::string m_source; std::map<std::string, std::string> m_definitions; CaptureTuple m_capture; }; namespace detail { struct closure_signature_argument_inserter { closure_signature_argument_inserter(std::stringstream &s_, const char *capture_string, size_t last) : s(s_) { n = 0; m_last = last; size_t capture_string_length = std::strlen(capture_string); BOOST_ASSERT(capture_string[0] == '(' && capture_string[capture_string_length-1] == ')'); std::string capture_string_(capture_string + 1, capture_string_length - 2); boost::split(m_capture_names, capture_string_ , boost::is_any_of(",")); } template<class T> void operator()(const T&) const { BOOST_ASSERT(n < m_capture_names.size()); // get captured variable name std::string variable_name = m_capture_names[n]; // remove leading and trailing whitespace from variable name boost::trim(variable_name); s << capture_traits<T>::type_name() << " " << variable_name; if(n+1 < m_last){ s << ", "; } n++; } mutable size_t n; size_t m_last; std::vector<std::string> m_capture_names; std::stringstream &s; }; template<class Signature, class CaptureTuple> inline std::string make_closure_declaration(const char *name, const char *arguments, const CaptureTuple &capture_tuple, const char *capture_string) { typedef typename boost::function_traits<Signature>::result_type result_type; typedef typename boost::function_types::parameter_types<Signature>::type parameter_types; typedef typename mpl::size<parameter_types>::type arity_type; std::stringstream s; s << "inline " << type_name<result_type>() << " " << name; s << "("; // insert function arguments signature_argument_inserter i(s, arguments, arity_type::value); mpl::for_each< typename mpl::transform<parameter_types, boost::add_pointer<mpl::_1> >::type>(i); s << ", "; // insert capture arguments closure_signature_argument_inserter j( s, capture_string, boost::tuples::length<CaptureTuple>::value ); fusion::for_each(capture_tuple, j); s << ")"; return s.str(); } // used by the BOOST_COMPUTE_CLOSURE() macro to create a closure // function with the given signature, name, capture, and source. template<class Signature, class CaptureTuple> inline closure<Signature, CaptureTuple> make_closure_impl(const char *name, const char *arguments, const CaptureTuple &capture, const char *capture_string, const std::string &source) { std::stringstream s; s << make_closure_declaration<Signature>(name, arguments, capture, capture_string); s << source; return closure<Signature, CaptureTuple>(name, capture, s.str()); } } // end detail namespace } // end compute namespace } // end boost namespace /// Creates a closure function object with \p name and \p source. /// /// \param return_type The return type for the function. /// \param name The name of the function. /// \param arguments A list of arguments for the function. /// \param capture A list of variables to capture. /// \param source The OpenCL C source code for the function. /// /// For example, to create a function which checks if a 2D point is /// contained in a circle of a given radius: /// \code /// // radius variable declared in C++ /// float radius = 1.5f; /// /// // create a closure function which returns true if the 2D point /// // argument is contained within a circle of the given radius /// BOOST_COMPUTE_CLOSURE(bool, is_in_circle, (const float2_ p), (radius), /// { /// return sqrt(p.x*p.x + p.y*p.y) < radius; /// }); /// /// // vector of 2D points /// boost::compute::vector<float2_> points = ... /// /// // count number of points in the circle /// size_t count = boost::compute::count_if( /// points.begin(), points.end(), is_in_circle, queue /// ); /// \endcode /// /// \see BOOST_COMPUTE_FUNCTION() #ifdef BOOST_COMPUTE_DOXYGEN_INVOKED #define BOOST_COMPUTE_CLOSURE(return_type, name, arguments, capture, source) #else #define BOOST_COMPUTE_CLOSURE(return_type, name, arguments, capture, ...) \ ::boost::compute::closure< \ return_type arguments, BOOST_TYPEOF(boost::tie capture) \ > name = \ ::boost::compute::detail::make_closure_impl< \ return_type arguments \ >( \ #name, #arguments, boost::tie capture, #capture, #__VA_ARGS__ \ ) #endif #endif // BOOST_COMPUTE_CLOSURE_HPP allocator/pinned_allocator.hpp 0000644 00000002560 15125510617 0012560 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALLOCATOR_PINNED_ALLOCATOR_HPP #define BOOST_COMPUTE_ALLOCATOR_PINNED_ALLOCATOR_HPP #include <boost/compute/allocator/buffer_allocator.hpp> namespace boost { namespace compute { template<class T> class pinned_allocator : public buffer_allocator<T> { public: explicit pinned_allocator(const context &context) : buffer_allocator<T>(context) { buffer_allocator<T>::set_mem_flags( buffer::read_write | buffer::alloc_host_ptr ); } pinned_allocator(const pinned_allocator<T> &other) : buffer_allocator<T>(other) { } pinned_allocator<T>& operator=(const pinned_allocator<T> &other) { if(this != &other){ buffer_allocator<T>::operator=(other); } return *this; } ~pinned_allocator() { } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALLOCATOR_PINNED_ALLOCATOR_HPP allocator/buffer_allocator.hpp 0000644 00000005656 15125510617 0012565 0 ustar 00 //---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALLOCATOR_BUFFER_ALLOCATOR_HPP #define BOOST_COMPUTE_ALLOCATOR_BUFFER_ALLOCATOR_HPP #include <boost/compute/buffer.hpp> #include <boost/compute/config.hpp> #include <boost/compute/context.hpp> #include <boost/compute/detail/device_ptr.hpp> namespace boost { namespace compute { /// \class buffer_allocator /// \brief The buffer_allocator class allocates memory with \ref buffer objects /// /// \see buffer template<class T> class buffer_allocator { public: typedef T value_type; typedef detail::device_ptr<T> pointer; typedef const detail::device_ptr<T> const_pointer; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; explicit buffer_allocator(const context &context) : m_context(context), m_mem_flags(buffer::read_write) { } buffer_allocator(const buffer_allocator<T> &other) : m_context(other.m_context), m_mem_flags(other.m_mem_flags) { } buffer_allocator<T>& operator=(const buffer_allocator<T> &other) { if(this != &other){ m_context = other.m_context; m_mem_flags = other.m_mem_flags; } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES buffer_allocator(buffer_allocator<T>&& other) BOOST_NOEXCEPT : m_context(std::move(other.m_context)), m_mem_flags(other.m_mem_flags) { } buffer_allocator<T>& operator=(buffer_allocator<T>&& other) BOOST_NOEXCEPT { m_context = std::move(other.m_context); m_mem_flags = other.m_mem_flags; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES ~buffer_allocator() { } pointer allocate(size_type n) { buffer buf(m_context, n * sizeof(T), m_mem_flags); clRetainMemObject(buf.get()); return detail::device_ptr<T>(buf); } void deallocate(pointer p, size_type n) { BOOST_ASSERT(p.get_buffer().get_context() == m_context); (void) n; clReleaseMemObject(p.get_buffer().get()); } size_type max_size() const { return m_context.get_device().max_memory_alloc_size() / sizeof(T); } context get_context() const { return m_context; } protected: void set_mem_flags(cl_mem_flags flags) { m_mem_flags = flags; } private: context m_context; cl_mem_flags m_mem_flags; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALLOCATOR_BUFFER_ALLOCATOR_HPP
| ver. 1.6 |
Github
|
.
| PHP 8.2.30 | ??????????? ?????????: 0.05 |
proxy
|
phpinfo
|
???????????