?????????? ????????? - ??????????????? - /home/agenciai/public_html/cd38d8/opencl.zip
???????
PK ���[�y�<� � misc.hppnu �[��� // Boost.uBLAS // // Copyright (c) 2018 Fady Essam // Copyright (c) 2018 Stefan Seefeld // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or // copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef boost_numeric_ublas_opencl_misc_hpp_ #define boost_numeric_ublas_opencl_misc_hpp_ #include <boost/numeric/ublas/opencl/library.hpp> #include <boost/numeric/ublas/opencl/vector.hpp> #include <boost/numeric/ublas/opencl/matrix.hpp> namespace boost { namespace numeric { namespace ublas { namespace opencl { template <typename T> typename std::enable_if<is_numeric<T>::value, T>::type a_sum(ublas::vector<T, opencl::storage> const &v, compute::command_queue& queue) { compute::vector<T> scratch_buffer(v.size(), queue.get_context()); compute::vector<T> result_buffer(1, queue.get_context()); cl_event event; if (std::is_same<T, float>::value) clblasSasum(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same<T, double>::value) clblasDasum(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same<T, std::complex<float>>::value) clblasScasum(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same<T, std::complex<double>>::value) clblasDzasum(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event clWaitForEvents(1, &event); return result_buffer[0]; } template <typename T, typename A> typename std::enable_if<is_numeric<T>::value, T>::type a_sum(ublas::vector<T, A> const &v, compute::command_queue& queue) { ublas::vector<T, opencl::storage> vdev(v, queue); return a_sum(vdev, queue); } template <typename T> typename std::enable_if<std::is_same<T, float>::value | std::is_same<T, double>::value, T>::type norm_1(ublas::vector<T, opencl::storage> const &v, compute::command_queue& queue) { return a_sum(v, queue); } template <typename T, typename A> typename std::enable_if<std::is_same<T, float>::value | std::is_same<T, double>::value, T>::type norm_1(ublas::vector<T, A> const &v, compute::command_queue& queue) { ublas::vector<T, opencl::storage> vdev(v, queue); return norm_1(vdev, queue); } template <typename T> typename std::enable_if<is_numeric<T>::value, T>::type norm_2(ublas::vector<T, opencl::storage> const &v, compute::command_queue& queue) { compute::vector<T> scratch_buffer(2*v.size(), queue.get_context()); compute::vector<T> result_buffer(1, queue.get_context()); cl_event event; if (std::is_same<T, float>::value) clblasSnrm2(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same<T, double>::value) clblasDnrm2(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same<T, std::complex<float>>::value) clblasScnrm2(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same<T, std::complex<double>>::value) clblasDznrm2(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event clWaitForEvents(1, &event); return result_buffer[0]; } template <typename T, typename A> typename std::enable_if<is_numeric<T>::value, T>::type norm_2(ublas::vector<T, A> const &v, compute::command_queue& queue) { ublas::vector<T, opencl::storage> vdev(v, queue); return norm_2(vdev, queue); } }}}} #endif PK ���[�x��0 0 operations.hppnu �[��� // Boost.uBLAS // // Copyright (c) 2018 Fady Essam // Copyright (c) 2018 Stefan Seefeld // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or // copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef boost_numeric_ublas_opencl_operations_hpp_ #define boost_numeric_ublas_opencl_operations_hpp_ #include <boost/numeric/ublas/opencl/transpose.hpp> #include <boost/numeric/ublas/opencl/prod.hpp> #include <boost/numeric/ublas/opencl/elementwise.hpp> #include <boost/numeric/ublas/opencl/misc.hpp> #endif PK ���[�1I4 I4 prod.hppnu �[��� // Boost.uBLAS // // Copyright (c) 2018 Fady Essam // Copyright (c) 2018 Stefan Seefeld // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or // copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef boost_numeric_ublas_opencl_prod_hpp_ #define boost_numeric_ublas_opencl_prod_hpp_ #include <boost/numeric/ublas/opencl/library.hpp> #include <boost/numeric/ublas/opencl/vector.hpp> #include <boost/numeric/ublas/opencl/matrix.hpp> #include <boost/numeric/ublas/opencl/transpose.hpp> #include <boost/compute/buffer.hpp> namespace boost { namespace numeric { namespace ublas { namespace opencl { #define ONE_DOUBLE_COMPLEX {{1.0, 00.0}} #define ONE_FLOAT_COMPLEX {{1.0f, 00.0f}} template <typename T, typename L1, typename L2> typename std::enable_if<is_numeric<T>::value>::type prod(ublas::matrix<T, L1, opencl::storage> const &a, ublas::matrix<T, L2, opencl::storage> const &b, ublas::matrix<T, L1, opencl::storage> &result, compute::command_queue &queue) { assert(a.device() == b.device() && a.device() == result.device() && a.device() == queue.get_device()); assert(a.size2() == b.size1()); result.fill(0, queue); //to hold matrix b with layout 1 if the b has different layout std::unique_ptr<ublas::matrix<T, L1, opencl::storage>> bl1; cl_event event = NULL; cl_mem buffer_a = a.begin().get_buffer().get(); cl_mem buffer_b = b.begin().get_buffer().get(); cl_mem buffer_result = result.begin().get_buffer().get(); if (!(std::is_same<L1, L2>::value)) { bl1.reset(new ublas::matrix<T, L1, opencl::storage>(b.size1(), b.size2(), queue.get_context())); change_layout(b, *bl1, queue); buffer_b = bl1->begin().get_buffer().get(); } clblasOrder Order = std::is_same<L1, ublas::basic_row_major<> >::value ? clblasRowMajor : clblasColumnMajor; size_t lda = Order == clblasRowMajor ? a.size2() : a.size1(); size_t ldb = Order == clblasRowMajor ? b.size2() : a.size2(); size_t ldc = Order == clblasRowMajor ? b.size2() : a.size1(); if (std::is_same<T, float>::value) clblasSgemm(Order, clblasNoTrans, clblasNoTrans, a.size1(), b.size2(), a.size2(), 1, buffer_a, 0, lda, buffer_b, 0, ldb, 1, buffer_result, 0, ldc, 1, &(queue.get()), 0, NULL, &event); else if (std::is_same<T, double>::value) clblasDgemm(Order, clblasNoTrans, clblasNoTrans, a.size1(), b.size2(), a.size2(), 1, buffer_a, 0, lda, buffer_b, 0, ldb, 1, buffer_result, 0, ldc, 1, &(queue.get()), 0, NULL, &event); else if (std::is_same<T, std::complex<float>>::value) clblasCgemm(Order, clblasNoTrans, clblasNoTrans, a.size1(), b.size2(), a.size2(), ONE_FLOAT_COMPLEX, buffer_a, 0, lda, buffer_b, 0, ldb, ONE_FLOAT_COMPLEX, buffer_result, 0, ldc, 1, &(queue.get()), 0, NULL, &event); else if (std::is_same<T, std::complex<double>>::value) clblasZgemm(Order, clblasNoTrans, clblasNoTrans, a.size1(), b.size2(), a.size2(), ONE_DOUBLE_COMPLEX, buffer_a, 0, lda, buffer_b, 0, ldb, ONE_DOUBLE_COMPLEX, buffer_result, 0, ldc, 1, &(queue.get()), 0, NULL, &event); clWaitForEvents(1, &event); } template <typename T, typename L1, typename L2, typename A> typename std::enable_if<is_numeric<T>::value>::type prod(ublas::matrix<T, L1, A> const &a, ublas::matrix<T, L2, A> const &b, ublas::matrix<T, L1, A> &result, compute::command_queue &queue) { ublas::matrix<T, L1, opencl::storage> adev(a, queue); ublas::matrix<T, L2, opencl::storage> bdev(b, queue); ublas::matrix<T, L1, opencl::storage> rdev(a.size1(), b.size2(), queue.get_context()); prod(adev, bdev, rdev, queue); rdev.to_host(result,queue); } template <typename T, typename L1, typename L2, typename A> typename std::enable_if<is_numeric<T>::value, ublas::matrix<T, L1, A>>::type prod(ublas::matrix<T, L1, A> const &a, ublas::matrix<T, L2, A> const &b, compute::command_queue &queue) { ublas::matrix<T, L1, A> result(a.size1(), b.size2()); prod(a, b, result, queue); return result; } template <typename T, typename L> typename std::enable_if<is_numeric<T>::value>::type prod(ublas::matrix<T, L, opencl::storage> const &a, ublas::vector<T, opencl::storage> const &b, ublas::vector<T, opencl::storage> &result, compute::command_queue &queue) { assert(a.device() == b.device() && a.device() == result.device() && a.device() == queue.get_device()); assert(a.size2() == b.size()); result.fill(0, queue); cl_event event = NULL; clblasOrder Order = std::is_same<L, ublas::basic_row_major<> >::value ? clblasRowMajor : clblasColumnMajor; int lda = Order == clblasRowMajor ? a.size2() : a.size1(); int ldb = Order == clblasRowMajor ? 1 : a.size2(); int ldc = Order == clblasRowMajor ? 1 : a.size1(); if (std::is_same<T, float>::value) clblasSgemm(Order, clblasNoTrans, clblasNoTrans, a.size1(), 1, a.size2(), 1, a.begin().get_buffer().get(), 0, lda, b.begin().get_buffer().get(), 0, ldb, 1, result.begin().get_buffer().get(), 0, ldc, 1, &(queue.get()), 0, NULL, &event); else if (std::is_same<T, double>::value) clblasDgemm(Order, clblasNoTrans, clblasNoTrans, a.size1(), 1, a.size2(), 1, a.begin().get_buffer().get(), 0, lda, b.begin().get_buffer().get(), 0, ldb, 1, result.begin().get_buffer().get(), 0, ldc, 1, &(queue.get()), 0, NULL, &event); else if (std::is_same<T, std::complex<float>>::value) clblasCgemm(Order, clblasNoTrans, clblasNoTrans, a.size1(), 1, a.size2(), ONE_FLOAT_COMPLEX, a.begin().get_buffer().get(), 0, lda, b.begin().get_buffer().get(), 0, ldb, ONE_FLOAT_COMPLEX, result.begin().get_buffer().get(), 0, ldc, 1, &(queue.get()), 0, NULL, &event); else if (std::is_same<T, std::complex<double>>::value) clblasZgemm(Order, clblasNoTrans, clblasNoTrans, a.size1(), 1, a.size2(), ONE_DOUBLE_COMPLEX, a.begin().get_buffer().get(), 0, lda, b.begin().get_buffer().get(), 0, ldb, ONE_DOUBLE_COMPLEX, result.begin().get_buffer().get(), 0, ldc, 1, &(queue.get()), 0, NULL, &event); clWaitForEvents(1, &event); } template <typename T, typename L, typename A> typename std::enable_if<is_numeric<T>::value>::type prod(ublas::matrix<T, L, A> const &a, ublas::vector<T, A> const &b, ublas::vector<T, A> &result, compute::command_queue &queue) { ublas::matrix<T, L, opencl::storage> adev(a, queue); ublas::vector<T, opencl::storage> bdev(b, queue); ublas::vector<T, opencl::storage> rdev(a.size1(), queue.get_context()); prod(adev, bdev, rdev, queue); rdev.to_host(result, queue); } template <typename T, typename L, typename A> typename std::enable_if<is_numeric<T>::value, ublas::vector<T, A>>::type prod(ublas::matrix<T, L, A> const &a, ublas::vector<T, A> const &b, compute::command_queue &queue) { ublas::vector<T, A> result(a.size1()); prod(a, b, result, queue); return result; } template <typename T, typename L> typename std::enable_if<is_numeric<T>::value>::type prod(ublas::vector<T, opencl::storage> const &a, ublas::matrix<T, L, opencl::storage> const &b, ublas::vector<T, opencl::storage> &result, compute::command_queue &queue) { assert(a.device() == b.device() && a.device() == result.device() && a.device() == queue.get_device()); assert(a.size() == b.size1()); result.fill(0, queue); cl_event event = NULL; clblasOrder Order = std::is_same<L, ublas::basic_row_major<> >::value ? clblasRowMajor : clblasColumnMajor; size_t lda = Order == clblasRowMajor ? a.size() : 1; size_t ldb = Order == clblasRowMajor ? b.size2() : a.size(); size_t ldc = Order == clblasRowMajor ? b.size2() : 1; if (std::is_same<T, float>::value) clblasSgemm(Order, clblasNoTrans, clblasNoTrans, 1, b.size2(), a.size(), 1, a.begin().get_buffer().get(), 0, lda, b.begin().get_buffer().get(), 0, ldb, 1, result.begin().get_buffer().get(), 0, ldc, 1, &(queue.get()), 0, NULL, &event); else if (std::is_same<T, double>::value) clblasDgemm(Order, clblasNoTrans, clblasNoTrans, 1, b.size2(), a.size(), 1, a.begin().get_buffer().get(), 0, lda, b.begin().get_buffer().get(), 0, ldb, 1, result.begin().get_buffer().get(), 0, ldc, 1, &(queue.get()), 0, NULL, &event); else if (std::is_same<T, std::complex<float>>::value) clblasCgemm(Order, clblasNoTrans, clblasNoTrans, 1, b.size2(), a.size(), ONE_FLOAT_COMPLEX, a.begin().get_buffer().get(), 0, lda, b.begin().get_buffer().get(), 0, ldb, ONE_FLOAT_COMPLEX, result.begin().get_buffer().get(), 0, ldc, 1, &(queue.get()), 0, NULL, &event); else if (std::is_same<T, std::complex<double>>::value) clblasZgemm(Order, clblasNoTrans, clblasNoTrans, 1, b.size2(), a.size(), ONE_DOUBLE_COMPLEX, a.begin().get_buffer().get(), 0, lda, b.begin().get_buffer().get(), 0, ldb, ONE_DOUBLE_COMPLEX, result.begin().get_buffer().get(), 0, ldc, 1, &(queue.get()), 0, NULL, &event); clWaitForEvents(1, &event); } template <class T, class L, class A> typename std::enable_if<is_numeric<T>::value>::type prod(ublas::vector<T, A> const &a, ublas::matrix<T, L, A> const &b, ublas::vector<T, A> &result, compute::command_queue &queue) { ublas::vector<T, opencl::storage> adev(a, queue); ublas::matrix<T, L, opencl::storage> bdev(b, queue); ublas::vector<T, opencl::storage> rdev(b.size2(), queue.get_context()); prod(adev, bdev, rdev, queue); rdev.to_host(result, queue); } template <class T, class L, class A> typename std::enable_if<is_numeric<T>::value, ublas::vector<T, A>>::type prod(ublas::vector<T, A> const &a, ublas::matrix<T, L, A> const &b, compute::command_queue &queue) { ublas::vector<T, A> result(b.size2()); prod(a, b, result, queue); return result; } template<class T> typename std::enable_if<std::is_fundamental<T>::value, T>::type inner_prod(ublas::vector<T, opencl::storage> const &a, ublas::vector<T, opencl::storage> const &b, compute::command_queue &queue) { assert(a.device() == b.device() && a.device() == queue.get_device()); assert(a.size() == b.size()); return compute::inner_product(a.begin(), a.end(), b.begin(), T(0), queue); } template<class T, class A> typename std::enable_if<std::is_fundamental<T>::value, T>::type inner_prod(ublas::vector<T, A> const &a, ublas::vector<T, A> const &b, compute::command_queue& queue) { ublas::vector<T, opencl::storage> adev(a, queue); ublas::vector<T, opencl::storage> bdev(b, queue); return inner_prod(adev, bdev, queue); } template <class T, class L> typename std::enable_if<is_numeric<T>::value>::type outer_prod(ublas::vector<T, opencl::storage> const &a, ublas::vector<T, opencl::storage> const &b, ublas::matrix<T, L, opencl::storage> &result, compute::command_queue & queue) { assert(a.device() == b.device() && a.device() == result.device() && a.device() == queue.get_device()); result.fill(0, queue); cl_event event = NULL; clblasOrder Order = std::is_same<L, ublas::basic_row_major<> >::value ? clblasRowMajor : clblasColumnMajor; size_t lda = Order == clblasRowMajor ? 1 : a.size(); size_t ldb = Order == clblasRowMajor ? b.size() : 1; size_t ldc = Order == clblasRowMajor ? b.size() : a.size(); if (std::is_same<T, float>::value) clblasSgemm(Order, clblasNoTrans, clblasNoTrans, a.size(), b.size(), 1, 1, a.begin().get_buffer().get(), 0, lda, b.begin().get_buffer().get(), 0, ldb, 1, result.begin().get_buffer().get(), 0, ldc, 1, &(queue.get()), 0, NULL, &event); else if (std::is_same<T, double>::value) clblasDgemm(Order, clblasNoTrans, clblasNoTrans, a.size(), b.size(), 1, 1, a.begin().get_buffer().get(), 0, lda, b.begin().get_buffer().get(), 0, ldb, 1, result.begin().get_buffer().get(), 0, ldc, 1, &(queue.get()), 0, NULL, &event); else if (std::is_same<T, std::complex<float>>::value) clblasCgemm(Order, clblasNoTrans, clblasNoTrans, a.size(), b.size(), 1, ONE_FLOAT_COMPLEX, a.begin().get_buffer().get(), 0, lda, b.begin().get_buffer().get(), 0, ldb, ONE_FLOAT_COMPLEX, result.begin().get_buffer().get(), 0, ldc, 1, &(queue.get()), 0, NULL, &event); else if (std::is_same<T, std::complex<double>>::value) clblasZgemm(Order, clblasNoTrans, clblasNoTrans, a.size(), b.size(), 1, ONE_DOUBLE_COMPLEX, a.begin().get_buffer().get(), 0, lda, b.begin().get_buffer().get(), 0, ldb, ONE_DOUBLE_COMPLEX, result.begin().get_buffer().get(), 0, ldc, 1, &(queue.get()), 0, NULL, &event); clWaitForEvents(1, &event); } template <class T, class L, class A> typename std::enable_if<is_numeric<T>::value>::type outer_prod(ublas::vector<T, A> const &a, ublas::vector<T, A> const &b, ublas::matrix<T, L, A> &result, compute::command_queue &queue) { ublas::vector<T, opencl::storage> adev(a, queue); ublas::vector<T, opencl::storage> bdev(b, queue); ublas::matrix<T, L, opencl::storage> rdev(a.size(), b.size(), queue.get_context()); outer_prod(adev, bdev, rdev, queue); rdev.to_host(result, queue); } template <class T,class L = ublas::basic_row_major<>, class A> typename std::enable_if<is_numeric<T>::value, ublas::matrix<T, L, A>>::type outer_prod(ublas::vector<T, A> const &a, ublas::vector<T, A> const &b, compute::command_queue &queue) { ublas::matrix<T, L, A> result(a.size(), b.size()); outer_prod(a, b, result, queue); return result; } #undef ONE_DOUBLE_COMPLEX #undef ONE_FLOAT_COMPLEX }}}} #endif PK ���[:ը�� � transpose.hppnu �[��� // Boost.uBLAS // // Copyright (c) 2018 Fady Essam // Copyright (c) 2018 Stefan Seefeld // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or // copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef boost_numeric_ublas_opencl_transpose_hpp_ #define boost_numeric_ublas_opencl_transpose_hpp_ #include <boost/numeric/ublas/opencl/library.hpp> #include <boost/numeric/ublas/opencl/vector.hpp> #include <boost/numeric/ublas/opencl/matrix.hpp> // Kernel for transposition of various data types #define OPENCL_TRANSPOSITION_KERNEL(DATA_TYPE) \ "__kernel void transpose(__global " #DATA_TYPE "* in, __global " #DATA_TYPE "* result, unsigned int width, unsigned int height) \n" \ "{ \n" \ " unsigned int column_index = get_global_id(0); \n" \ " unsigned int row_index = get_global_id(1); \n" \ " if (column_index < width && row_index < height) \n" \ " { \n" \ " unsigned int index_in = column_index + width * row_index; \n" \ " unsigned int index_result = row_index + height * column_index; \n" \ " result[index_result] = in[index_in]; \n" \ " } \n" \ "} \n" namespace boost { namespace numeric { namespace ublas { namespace opencl { template<class T, class L1, class L2> typename std::enable_if<is_numeric<T>::value>::type change_layout(ublas::matrix<T, L1, opencl::storage> const &m, ublas::matrix<T, L2, opencl::storage> &result, compute::command_queue& queue) { assert(m.size1() == result.size1() && m.size2() == result.size2()); assert(m.device() == result.device() && m.device() == queue.get_device()); assert(!(std::is_same<L1, L2>::value)); char const *kernel; if (std::is_same<T, float>::value) kernel = OPENCL_TRANSPOSITION_KERNEL(float); else if (std::is_same<T, double>::value) kernel = OPENCL_TRANSPOSITION_KERNEL(double); else if (std::is_same<T, std::complex<float>>::value) kernel = OPENCL_TRANSPOSITION_KERNEL(float2); else if (std::is_same<T, std::complex<double>>::value) kernel = OPENCL_TRANSPOSITION_KERNEL(double2); size_t len = strlen(kernel); cl_int err; cl_context c_context = queue.get_context().get(); cl_program program = clCreateProgramWithSource(c_context, 1, &kernel, &len, &err); clBuildProgram(program, 1, &queue.get_device().get(), NULL, NULL, NULL); cl_kernel c_kernel = clCreateKernel(program, "transpose", &err); size_t width = std::is_same < L1, ublas::basic_row_major<>>::value ? m.size2() : m.size1(); size_t height = std::is_same < L1, ublas::basic_row_major<>>::value ? m.size1() : m.size2(); size_t global_size[2] = { width , height }; clSetKernelArg(c_kernel, 0, sizeof(T*), &m.begin().get_buffer().get()); clSetKernelArg(c_kernel, 1, sizeof(T*), &result.begin().get_buffer().get()); clSetKernelArg(c_kernel, 2, sizeof(unsigned int), &width); clSetKernelArg(c_kernel, 3, sizeof(unsigned int), &height); cl_command_queue c_queue = queue.get(); cl_event event = NULL; clEnqueueNDRangeKernel(c_queue, c_kernel, 2, NULL, global_size, NULL, 0, NULL, &event); clWaitForEvents(1, &event); } template<class T, class L1, class L2, class A> typename std::enable_if<is_numeric<T>::value>::type change_layout(ublas::matrix<T, L1, A> const &m, ublas::matrix<T, L2, A> &result, compute::command_queue& queue) { ublas::matrix<T, L1, opencl::storage> mdev(m, queue); ublas::matrix<T, L2, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context()); change_layout(mdev, rdev, queue); rdev.to_host(result, queue); } template<class T, class L> typename std::enable_if<is_numeric<T>::value>::type trans(ublas::matrix<T, L, opencl::storage> const &m, ublas::matrix<T, L, opencl::storage> &result, compute::command_queue& queue) { assert(m.size1() == result.size2() && m.size2() == result.size1()); assert(m.device() == result.device() && m.device() == queue.get_device()); char const *kernel; if (std::is_same<T, float>::value) kernel = OPENCL_TRANSPOSITION_KERNEL(float); else if (std::is_same<T, double>::value) kernel = OPENCL_TRANSPOSITION_KERNEL(double); else if (std::is_same<T, std::complex<float>>::value) kernel = OPENCL_TRANSPOSITION_KERNEL(float2); else if (std::is_same<T, std::complex<double>>::value) kernel = OPENCL_TRANSPOSITION_KERNEL(double2); size_t len = strlen(kernel); cl_int err; cl_context c_context = queue.get_context().get(); cl_program program = clCreateProgramWithSource(c_context, 1, &kernel, &len, &err); clBuildProgram(program, 1, &queue.get_device().get(), NULL, NULL, NULL); cl_kernel c_kernel = clCreateKernel(program, "transpose", &err); size_t width = std::is_same <L, ublas::basic_row_major<>>::value ? m.size2() : m.size1(); size_t height = std::is_same <L, ublas::basic_row_major<>>::value ? m.size1() : m.size2(); size_t global_size[2] = { width , height }; clSetKernelArg(c_kernel, 0, sizeof(T*), &m.begin().get_buffer().get()); clSetKernelArg(c_kernel, 1, sizeof(T*), &result.begin().get_buffer().get()); clSetKernelArg(c_kernel, 2, sizeof(unsigned int), &width); clSetKernelArg(c_kernel, 3, sizeof(unsigned int), &height); cl_command_queue c_queue = queue.get(); cl_event event = NULL; clEnqueueNDRangeKernel(c_queue, c_kernel, 2, NULL, global_size, NULL, 0, NULL, &event); clWaitForEvents(1, &event); } template<class T, class L, class A> typename std::enable_if<is_numeric<T>::value>::type trans(ublas::matrix<T, L, A> const &m, ublas::matrix<T, L, A> &result, compute::command_queue& queue) { ublas::matrix<T, L, opencl::storage> mdev(m, queue); ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context()); trans(mdev, rdev, queue); rdev.to_host(result, queue); } template<class T, class L, class A> typename std::enable_if<is_numeric<T>::value, ublas::matrix<T, L, A>>::type trans(ublas::matrix<T, L, A>& m, compute::command_queue& queue) { ublas::matrix<T, L, A> result(m.size2(), m.size1()); trans(m, result, queue); return result; } }}}} #endif PK ���[�|�`� � vector.hppnu �[��� // Boost.uBLAS // // Copyright (c) 2018 Fady Essam // Copyright (c) 2018 Stefan Seefeld // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or // copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef boost_numeric_ublas_opencl_vector_hpp_ #define boost_numeric_ublas_opencl_vector_hpp_ #include <boost/numeric/ublas/opencl/library.hpp> #include <boost/numeric/ublas/functional.hpp> #include <boost/compute/core.hpp> #include <boost/compute/algorithm.hpp> #include <boost/compute/buffer.hpp> #include <boost/compute/container/vector.hpp> namespace boost { namespace numeric { namespace ublas { namespace opencl { class storage; namespace compute = boost::compute; } // namespace opencl template <class T> class vector<T, opencl::storage> : public boost::compute::vector<T> { typedef std::size_t size_type; public: vector() : compute::vector<T>() {} vector(size_type size, compute::context context) : compute::vector<T>(size, context) { device_ = context.get_device();} vector(size_type size, T value, compute::command_queue queue) : compute::vector<T>(size, value, queue.get_context()) { queue.finish(); device_ = queue.get_device(); } template <typename A> vector(vector<T, A> const &v, compute::command_queue &queue) : vector(v.size(), queue.get_context()) { this->from_host(v, queue); } const compute::device device() const { return device_;} compute::device device() { return device_;} template<class A> void from_host(ublas::vector<T, A> const &v, compute::command_queue & queue) { assert(this->device() == queue.get_device()); compute::copy(v.begin(), v.end(), this->begin(), queue); queue.finish(); } template<class A> void to_host(ublas::vector<T, A>& v, compute::command_queue& queue) const { assert(this->device() == queue.get_device()); compute::copy(this->begin(), this->end(), v.begin(), queue); queue.finish(); } void fill(T value, compute::command_queue & queue) { assert(this->device() == queue.get_device()); compute::fill(this->begin(), this->end(), value, queue); queue.finish(); } private: compute::device device_; }; }}} #endif PK ���[2�TE E library.hppnu �[��� // Boost.uBLAS // // Copyright (c) 2018 Fady Essam // Copyright (c) 2018 Stefan Seefeld // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or // copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef boost_numeric_ublas_opencl_library_hpp_ #define boost_numeric_ublas_opencl_library_hpp_ #include <clBLAS.h> #include <type_traits> #include <complex> namespace boost { namespace numeric { namespace ublas { namespace opencl { class library { public: library() { clblasSetup();} ~library() { clblasTeardown();} }; template <typename T> struct is_numeric { static bool const value = std::is_same<T, float>::value | std::is_same<T, double>::value | std::is_same<T, std::complex<float>>::value | std::is_same<T, std::complex<double>>::value; }; }}}} #endif PK ���[�bL&@ &@ elementwise.hppnu �[��� // Boost.uBLAS // // Copyright (c) 2018 Fady Essam // Copyright (c) 2018 Stefan Seefeld // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or // copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef boost_numeric_ublas_opencl_elementwise_hpp_ #define boost_numeric_ublas_opencl_elementwise_hpp_ #include <boost/numeric/ublas/opencl/library.hpp> #include <boost/numeric/ublas/opencl/vector.hpp> #include <boost/numeric/ublas/opencl/matrix.hpp> namespace boost { namespace numeric { namespace ublas { namespace opencl { namespace compute = boost::compute; namespace lambda = boost::compute::lambda; template <typename T, typename L1, typename L2, typename L3, class O> void element_wise(ublas::matrix<T, L1, opencl::storage> const &a, ublas::matrix<T, L2, opencl::storage> const &b, ublas::matrix<T, L3, opencl::storage> &result, O op, compute::command_queue& queue) { assert(a.device() == b.device() && a.device() == result.device() && a.device() == queue.get_device()); assert(a.size1() == b.size1() && a.size2() == b.size2()); compute::transform(a.begin(), a.end(), b.begin(), result.begin(), op, queue); queue.finish(); } template <typename T, typename L1, typename L2, typename L3, typename A, class O> void element_wise(ublas::matrix<T, L1, A> const &a, ublas::matrix<T, L2, A> const &b, ublas::matrix<T, L3, A> &result, O op, compute::command_queue &queue) { ublas::matrix<T, L1, opencl::storage> adev(a, queue); ublas::matrix<T, L2, opencl::storage> bdev(b, queue); ublas::matrix<T, L3, opencl::storage> rdev(a.size1(), b.size2(), queue.get_context()); element_wise(adev, bdev, rdev, op, queue); rdev.to_host(result, queue); } template <typename T, typename L1, typename L2, typename A, typename O> ublas::matrix<T, L1, A> element_wise(ublas::matrix<T, L1, A> const &a, ublas::matrix<T, L2, A> const &b, O op, compute::command_queue &queue) { ublas::matrix<T, L1, A> result(a.size1(), b.size2()); element_wise(a, b, result, op, queue); return result; } template <typename T, typename O> void element_wise(ublas::vector<T, opencl::storage> const &a, ublas::vector<T, opencl::storage> const &b, ublas::vector<T, opencl::storage> &result, O op, compute::command_queue& queue) { assert(a.device() == b.device() && a.device() == result.device() && a.device() == queue.get_device()); assert(a.size() == b.size()); compute::transform(a.begin(), a.end(), b.begin(), result.begin(), op, queue); queue.finish(); } template <typename T, typename A, typename O> void element_wise(ublas::vector<T, A> const &a, ublas::vector<T, A> const &b, ublas::vector<T, A>& result, O op, compute::command_queue &queue) { ublas::vector<T, opencl::storage> adev(a, queue); ublas::vector<T, opencl::storage> bdev(b, queue); ublas::vector<T, opencl::storage> rdev(a.size(), queue.get_context()); element_wise(adev, bdev, rdev, op, queue); rdev.to_host(result, queue); } template <typename T, typename A, typename O> ublas::vector<T, A> element_wise(ublas::vector<T, A> const &a, ublas::vector<T, A> const &b, O op, compute::command_queue &queue) { ublas::vector<T, A> result(a.size()); element_wise(a, b, result, op, queue); return result; } template <typename T, typename L1, typename L2, typename L3> void element_add(ublas::matrix<T, L1, opencl::storage> const &a, ublas::matrix<T, L2, opencl::storage> const &b, ublas::matrix<T, L3, opencl::storage> &result, compute::command_queue &queue) { element_wise(a, b, result, compute::plus<T>(), queue); } template <typename T, typename L1, typename L2, typename L3, typename A> void element_add(ublas::matrix<T, L1, A> const &a, ublas::matrix<T, L2, A> const &b, ublas::matrix<T, L3, A> &result, compute::command_queue &queue) { element_wise(a, b, result, compute::plus<T>(), queue); } template <typename T, typename L1, typename L2, typename A> ublas::matrix<T, L1, A> element_add(ublas::matrix<T, L1, A> const &a, ublas::matrix<T, L2, A> const &b, compute::command_queue &queue) { return element_wise(a, b, compute::plus<T>(), queue); } template <typename T> void element_add(ublas::vector<T, opencl::storage> const &a, ublas::vector<T, opencl::storage> const &b, ublas::vector<T, opencl::storage> &result, compute::command_queue& queue) { element_wise(a, b, result, compute::plus<T>(), queue); } template <typename T, typename A> void element_add(ublas::vector<T, A> const &a, ublas::vector<T, A> const &b, ublas::vector<T, A> &result, compute::command_queue &queue) { element_wise(a, b, result, compute::plus<T>(), queue); } template <typename T, typename A> ublas::vector<T, A> element_add(ublas::vector<T, A> const &a, ublas::vector<T, A> const &b, compute::command_queue &queue) { return element_wise(a, b, compute::plus<T>(), queue); } template<typename T, typename L> void element_add(ublas::matrix<T, L, opencl::storage> const &m, T value, ublas::matrix<T, L, opencl::storage> &result, compute::command_queue& queue) { assert(m.device() == result.device() && m.device() == queue.get_device()); assert(m.size1() == result.size1() && m.size2() == result.size2()); compute::transform(m.begin(), m.end(), result.begin(), lambda::_1 + value, queue); queue.finish(); } template<typename T, typename L, typename A> void element_add(ublas::matrix<T, L, A> const &m, T value, ublas::matrix<T, L, A> &result, compute::command_queue& queue) { ublas::matrix<T, L, opencl::storage> mdev(m, queue); ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context()); element_add(mdev, value, rdev, queue); rdev.to_host(result, queue); } template<typename T, typename L, typename A> ublas::matrix<T, L, A> element_add(ublas::matrix<T, L, A> const &m, T value, compute::command_queue& queue) { ublas::matrix<T, L, A> result(m.size1(), m.size2()); element_add(m, value, result, queue); return result; } template<typename T> void element_add(ublas::vector<T, opencl::storage> const &v, T value, ublas::vector<T, opencl::storage> &result, compute::command_queue& queue) { assert(v.device() == result.device() && v.device() == queue.get_device()); assert(v.size() == result.size()); compute::transform(v.begin(), v.end(), result.begin(), lambda::_1 + value, queue); queue.finish(); } template<typename T, typename A> void element_add(ublas::vector<T, A> const &v, T value, ublas::vector<T, A> &result, compute::command_queue& queue) { ublas::vector<T, opencl::storage> vdev(v, queue); ublas::vector<T, opencl::storage> rdev(v.size(), queue.get_context()); element_add(vdev, value, rdev, queue); rdev.to_host(result, queue); } template <typename T, typename A> ublas::vector<T, A> element_add(ublas::vector<T, A> const &v, T value, compute::command_queue& queue) { ublas::vector<T, A> result(v.size()); element_add(v, value, result, queue); return result; } template <typename T, typename L1, typename L2, typename L3> void element_sub(ublas::matrix<T, L1, opencl::storage> const &a, ublas::matrix<T, L2, opencl::storage> const &b, ublas::matrix<T, L3, opencl::storage> &result, compute::command_queue& queue) { element_wise(a, b, compute::minus<T>(), result, queue); } template <typename T, typename L1, typename L2, typename L3, typename A> void element_sub(ublas::matrix<T, L1, A> const &a, ublas::matrix<T, L2, A> const &b, ublas::matrix<T, L3, A> &result, compute::command_queue &queue) { element_wise(a, b, result, compute::minus<T>(), queue); } template <typename T, typename L1, typename L2, typename A> ublas::matrix<T, L1, A> element_sub(ublas::matrix<T, L1, A> const &a, ublas::matrix<T, L2, A> const &b, compute::command_queue &queue) { return element_wise(a, b, compute::minus<T>(), queue); } template <typename T> void element_sub(ublas::vector<T, opencl::storage> const &a, ublas::vector<T, opencl::storage> const &b, ublas::vector<T, opencl::storage> &result, compute::command_queue& queue) { element_wise(a, b, result, compute::minus<T>(), queue); } template <typename T, typename A> void element_sub(ublas::vector<T, A> const &a, ublas::vector<T, A> const &b, ublas::vector<T, A> &result, compute::command_queue &queue) { element_wise(a, b, result, compute::minus<T>(), queue); } template <typename T, typename A> ublas::vector<T, A> element_sub(ublas::vector<T, A> const &a, ublas::vector<T, A> const &b, compute::command_queue &queue) { return element_wise(a, b, compute::minus<T>(), queue); } template <typename T, typename L> void element_sub(ublas::matrix<T, L, opencl::storage> const &m, T value, ublas::matrix<T, L, opencl::storage> &result, compute::command_queue& queue) { assert(m.device() == result.device() && m.device() == queue.get_device()); assert(m.size1() == result.size1() && m.size2() == result.size2()); compute::transform(m.begin(), m.end(), result.begin(), lambda::_1 - value, queue); queue.finish(); } template <typename T, typename L, typename A> void element_sub(ublas::matrix<T, L, A> const &m, T value, ublas::matrix<T, L, A> &result, compute::command_queue& queue) { ublas::matrix<T, L, opencl::storage> mdev(m, queue); ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context()); element_sub(mdev, value, rdev, queue); rdev.to_host(result, queue); } template <typename T, typename L, typename A> ublas::matrix<T, L, A> element_sub(ublas::matrix<T, L, A> const &m, T value, compute::command_queue& queue) { ublas::matrix<T, L, A> result(m.size1(), m.size2()); element_sub(m, value, result, queue); return result; } template <typename T> void element_sub(ublas::vector<T, opencl::storage> const &v, T value, ublas::vector<T, opencl::storage> &result, compute::command_queue& queue) { assert(v.device() == result.device() && v.device() == queue.get_device()); assert(v.size() == result.size()); compute::transform(v.begin(), v.end(), result.begin(), lambda::_1 - value, queue); queue.finish(); } template <typename T, typename A> void element_sub(ublas::vector<T, A> const &v, T value, ublas::vector<T, A> &result, compute::command_queue& queue) { ublas::vector<T, opencl::storage> vdev(v, queue); ublas::vector<T, opencl::storage> rdev(v.size(), queue.get_context()); element_sub(vdev, value, rdev, queue); rdev.to_host(result, queue); } template <typename T, typename A> ublas::vector<T, A> element_sub(ublas::vector<T, A> const &v, T value, compute::command_queue& queue) { ublas::vector<T, A> result(v.size()); element_sub(v, value, result, queue); return result; } template <typename T, typename L1, typename L2, typename L3> void element_prod(ublas::matrix<T, L1, opencl::storage> const &a, ublas::matrix<T, L2, opencl::storage> const &b, ublas::matrix<T, L3, opencl::storage> &result, compute::command_queue& queue) { element_wise(a, b, result, compute::multiplies<T>(), queue); } template <typename T, typename L1, typename L2, typename L3, typename A> void element_prod(ublas::matrix<T, L1, A> const &a, ublas::matrix<T, L2, A> const &b, ublas::matrix<T, L3, A> &result, compute::command_queue &queue) { element_wise(a, b, result, compute::multiplies<T>(), queue); } template <typename T, typename L1, typename L2, typename A> ublas::matrix<T, L1, A> element_prod(ublas::matrix<T, L1, A> const &a, ublas::matrix<T, L2, A> const &b, compute::command_queue &queue) { return element_wise(a, b, compute::multiplies<T>(), queue); } template <typename T> void element_prod(ublas::vector<T, opencl::storage> const &a, ublas::vector<T, opencl::storage> const &b, ublas::vector<T, opencl::storage> &result, compute::command_queue& queue) { element_wise(a, b, result, compute::multiplies<T>(), queue); } template <typename T, typename A> void element_prod(ublas::vector<T, A> const &a, ublas::vector<T, A> const &b, ublas::vector<T, A> &result, compute::command_queue &queue) { element_wise(a, b, result, compute::multiplies<T>(), queue); } template <typename T, typename A> ublas::vector<T, A> element_prod(ublas::vector<T, A> const &a, ublas::vector<T, A> const &b, compute::command_queue &queue) { return element_wise(a, b, compute::multiplies<T>(), queue); } template <typename T, typename L> void element_scale(ublas::matrix<T, L, opencl::storage> const &m, T value, ublas::matrix<T, L, opencl::storage> &result, compute::command_queue& queue) { assert(m.device() == result.device() && m.device() == queue.get_device()); assert(m.size1() == result.size1() && m.size2() == result.size2()); compute::transform(m.begin(), m.end(), result.begin(), lambda::_1 * value, queue); queue.finish(); } template <typename T, typename L, typename A> void element_scale(ublas::matrix<T, L, A> const &m, T value, ublas::matrix<T, L, A> &result, compute::command_queue& queue) { ublas::matrix<T, L, opencl::storage> mdev(m, queue); ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context()); element_scale(mdev, value, rdev, queue); rdev.to_host(result, queue); } template <typename T, typename L, typename A> ublas::matrix<T, L, A> element_scale(ublas::matrix<T, L, A> const &m, T value, compute::command_queue& queue) { ublas::matrix<T, L, A> result(m.size1(), m.size2()); element_scale(m, value, result, queue); return result; } template <typename T> void element_scale(ublas::vector<T, opencl::storage> const &v, T value, ublas::vector<T, opencl::storage> &result, compute::command_queue& queue) { assert(v.device() == result.device() && v.device() == queue.get_device()); assert(v.size() == result.size()); compute::transform(v.begin(), v.end(), result.begin(), lambda::_1 * value, queue); queue.finish(); } template <typename T, typename A> void element_scale(ublas::vector<T, A> const &v, T value, ublas::vector<T, A> & result, compute::command_queue& queue) { ublas::vector<T, opencl::storage> vdev(v, queue); ublas::vector<T, opencl::storage> rdev(v.size(), queue.get_context()); element_scale(vdev, value, rdev, queue); rdev.to_host(result, queue); } template <typename T, typename A> ublas::vector<T,A> element_scale(ublas::vector<T, A> const &v, T value, compute::command_queue& queue) { ublas::vector<T, A> result(v.size()); element_scale(v, value, result, queue); return result; } template <typename T, typename L1, typename L2, typename L3> void element_div(ublas::matrix<T, L1, opencl::storage> const &a, ublas::matrix<T, L2, opencl::storage> const &b, ublas::matrix<T, L3, opencl::storage> &result, compute::command_queue& queue) { element_wise(a, b, result, compute::divides<T>(), queue); } template <typename T, typename L1, typename L2, typename L3, typename A> void element_div(ublas::matrix<T, L1, A> const &a, ublas::matrix<T, L2, A> const &b, ublas::matrix<T, L3, A> &result, compute::command_queue &queue) { element_wise(a, b, result, compute::divides<T>(), queue); } template <typename T, typename L1, typename L2, typename A> ublas::matrix<T, L1, A> element_div(ublas::matrix<T, L1, A> const &a, ublas::matrix<T, L2, A> const &b, compute::command_queue &queue) { return element_wise(a, b, compute::divides<T>(), queue); } template <typename T> void element_div(ublas::vector<T, opencl::storage> const &a, ublas::vector<T, opencl::storage> const &b, ublas::vector<T, opencl::storage> &result, compute::command_queue& queue) { element_wise(a, b, result, compute::divides<T>(), queue); } template <typename T, typename A> void element_div(ublas::vector<T, A> const &a, ublas::vector<T, A> const &b, ublas::vector<T, A> &result, compute::command_queue &queue) { element_wise(a, b, result, compute::divides<T>(), queue); } template <typename T, typename A> ublas::vector<T, A> element_div(ublas::vector<T, A> const &a, ublas::vector<T, A> const &b, compute::command_queue &queue) { return element_wise(a, b, compute::divides<T>(), queue); } }}}} #endif PK ���[Mg@�Q Q matrix.hppnu �[��� // Boost.uBLAS // // Copyright (c) 2018 Fady Essam // Copyright (c) 2018 Stefan Seefeld // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or // copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef boost_numeric_ublas_opencl_matrix_hpp_ #define boost_numeric_ublas_opencl_matrix_hpp_ #include <boost/numeric/ublas/opencl/library.hpp> #include <boost/numeric/ublas/matrix.hpp> #include <boost/numeric/ublas/functional.hpp> #include <boost/compute/core.hpp> #include <boost/compute/algorithm.hpp> #include <boost/compute/buffer.hpp> namespace boost { namespace numeric { namespace ublas { namespace opencl { class storage; namespace compute = boost::compute; } // namespace opencl template<class T, class L> class matrix<T, L, opencl::storage> : public matrix_container<matrix<T, L, opencl::storage> > { typedef typename boost::compute::buffer_allocator<T>::size_type size_type; typedef L layout_type; typedef matrix<T, L, opencl::storage> self_type; public: matrix() : matrix_container<self_type>(), size1_(0), size2_(0), data_() , device_() {} matrix(size_type size1, size_type size2, compute::context c) : matrix_container<self_type>(), size1_(size1), size2_(size2), device_(c.get_device()) { compute::buffer_allocator<T> allocator(c); data_ = allocator.allocate(layout_type::storage_size(size1, size2)).get_buffer(); } matrix(size_type size1, size_type size2, T const &value, compute::command_queue &q) : matrix_container<self_type>(), size1_(size1), size2_(size2), device_(q.get_device()) { compute::buffer_allocator<T> allocator(q.get_context()); data_ = allocator.allocate(layout_type::storage_size(size1, size2)).get_buffer(); compute::fill(this->begin(), this->end(), value, q); q.finish(); } template <typename A> matrix(matrix<T, L, A> const &m, compute::command_queue &queue) : matrix(m.size1(), m.size2(), queue.get_context()) { this->from_host(m, queue); } size_type size1() const { return size1_;} size_type size2() const { return size2_;} const compute::buffer_iterator<T> begin() const { return compute::make_buffer_iterator<T>(data_);} compute::buffer_iterator<T> begin() { return compute::make_buffer_iterator<T>(data_);} compute::buffer_iterator<T> end() { return compute::make_buffer_iterator<T>(data_, layout_type::storage_size(size1_, size2_));} const compute::buffer_iterator<T> end() const { return compute::make_buffer_iterator<T>(data_, layout_type::storage_size(size1_, size2_));} const compute::device &device() const { return device_;} compute::device &device() { return device_;} void fill(T value, compute::command_queue &queue) { assert(device_ == queue.get_device()); compute::fill(this->begin(), this->end(), value, queue); queue.finish(); } /** Copies a matrix to a device * \param m is a matrix that is not on the device _device and it is copied to it * \param queue is the command queue that will execute the operation */ template<class A> void from_host(ublas::matrix<T, L, A> const &m, compute::command_queue &queue) { assert(device_ == queue.get_device()); compute::copy(m.data().begin(), m.data().end(), this->begin(), queue); queue.finish(); } /** Copies a matrix from a device * \param m is a matrix that will be reized to (size1_,size2) and the values of (*this) will be copied in it * \param queue is the command queue that will execute the operation */ template<class A> void to_host(ublas::matrix<T, L, A> &m, compute::command_queue &queue) const { assert(device_ == queue.get_device()); compute::copy(this->begin(), this->end(), m.data().begin(), queue); queue.finish(); } private: size_type size1_; size_type size2_; compute::buffer data_; compute::device device_; }; }}} #endif PK ���[�y�<� � misc.hppnu �[��� PK ���[�x��0 0 operations.hppnu �[��� PK ���[�1I4 I4 q prod.hppnu �[��� PK ���[:ը�� � �N transpose.hppnu �[��� PK ���[�|�`� � g vector.hppnu �[��� PK ���[2�TE E p library.hppnu �[��� PK ���[�bL&@ &@ �s elementwise.hppnu �[��� PK ���[Mg@�Q Q �� matrix.hppnu �[��� PK Y ��
| ver. 1.6 |
Github
|
.
| PHP 8.2.30 | ??????????? ?????????: 0 |
proxy
|
phpinfo
|
???????????