Holds all CUDA compute kernels used by ViennaCL. More...
Namespaces | |
| detail | |
| Helper functions for the CUDA linear algebra backend. | |
Classes | |
| struct | mat_mult_matrix_index |
| Helper struct for accessing an element of a row- or column-major matrix. More... | |
Functions | |
| template<typename NumericT > | |
| void | bisectSmall (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataSmall< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision) |
| template<typename NumericT > | |
| void | bisectLarge (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT lg, const NumericT ug, const NumericT precision) |
| template<typename NumericT > | |
| void | bisectLarge_OneIntervals (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision) |
| template<typename NumericT > | |
| void | bisectLarge_MultIntervals (const viennacl::linalg::detail::InputData< NumericT > &input, viennacl::linalg::detail::ResultDataLarge< NumericT > &result, const unsigned int mat_size, const NumericT precision) |
| template<typename NumericT > | |
| __device__ void | writeToGmem (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_blocks_mult, NumericT *g_left_one, NumericT *g_right_one, unsigned int *g_pos_one, NumericT *g_left_mult, NumericT *g_right_mult, unsigned int *g_left_count_mult, unsigned int *g_right_count_mult, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, unsigned int *g_blocks_mult, unsigned int *g_blocks_mult_sum, unsigned short *s_compaction_list, unsigned short *s_cl_helper, unsigned int offset_mult_lambda) |
| Write data to global memory. More... | |
| template<typename NumericT > | |
| __device__ void | compactStreamsFinal (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, unsigned int &offset_mult_lambda, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, unsigned short *s_cl_one, unsigned short *s_cl_mult, unsigned short *s_cl_blocking, unsigned short *s_cl_helper, unsigned int is_one_lambda, unsigned int is_one_lambda_2, NumericT &left, NumericT &right, NumericT &left_2, NumericT &right_2, unsigned int &left_count, unsigned int &right_count, unsigned int &left_count_2, unsigned int &right_count_2, unsigned int c_block_iend, unsigned int c_sum_block, unsigned int c_block_iend_2, unsigned int c_sum_block_2) |
| Perform final stream compaction before writing data to global memory. More... | |
| __device__ void | scanCompactBlocksStartAddress (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_compaction, unsigned short *s_cl_blocking, unsigned short *s_cl_helper) |
| Compute addresses to obtain compact list of block start addresses. More... | |
| __device__ void | scanSumBlocks (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_threads_compaction, unsigned short *s_cl_blocking, unsigned short *s_cl_helper) |
| Perform scan to obtain number of eigenvalues before a specific block. More... | |
| __device__ void | scanInitial (const unsigned int tid, const unsigned int tid_2, const unsigned int num_threads_active, const unsigned int num_threads_compaction, unsigned short *s_cl_one, unsigned short *s_cl_mult, unsigned short *s_cl_blocking, unsigned short *s_cl_helper) |
| template<typename NumericT > | |
| __device__ void | storeNonEmptyIntervalsLarge (unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, unsigned short *s_left_count, unsigned short *s_right_count, NumericT left, NumericT mid, NumericT right, const unsigned short left_count, const unsigned short mid_count, const unsigned short right_count, NumericT epsilon, unsigned int &compact_second_chunk, unsigned short *s_compaction_list, unsigned int &is_active_second) |
| template<typename NumericT > | |
| __global__ void | bisectKernelLarge (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT lg, const NumericT ug, const unsigned int lg_eig_count, const unsigned int ug_eig_count, NumericT epsilon, unsigned int *g_num_one, unsigned int *g_num_blocks_mult, NumericT *g_left_one, NumericT *g_right_one, unsigned int *g_pos_one, NumericT *g_left_mult, NumericT *g_right_mult, unsigned int *g_left_count_mult, unsigned int *g_right_count_mult, unsigned int *g_blocks_mult, unsigned int *g_blocks_mult_sum) |
| Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix g_d diagonal elements in global memory g_s superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed and equals 0 n size of matrix lg lower bound of input interval (e.g. Gerschgorin interval) ug upper bound of input interval (e.g. Gerschgorin interval) lg_eig_count number of eigenvalues that are smaller than lg lu_eig_count number of eigenvalues that are smaller than lu epsilon desired accuracy of eigenvalues to compute. More... | |
| template<typename NumericT > | |
| __global__ void | bisectKernelLarge_MultIntervals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, unsigned int *blocks_mult, unsigned int *blocks_mult_sum, NumericT *g_left, NumericT *g_right, unsigned int *g_left_count, unsigned int *g_right_count, NumericT *g_lambda, unsigned int *g_pos, NumericT precision) |
| template<typename NumericT > | |
| __global__ void | bisectKernelLarge_OneIntervals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, unsigned int num_intervals, NumericT *g_left, NumericT *g_right, unsigned int *g_pos, NumericT precision) |
| template<typename NumericT > | |
| __global__ void | bisectKernelSmall (const NumericT *g_d, const NumericT *g_s, const unsigned int n, NumericT *g_left, NumericT *g_right, unsigned int *g_left_count, unsigned int *g_right_count, const NumericT lg, const NumericT ug, const unsigned int lg_eig_count, const unsigned int ug_eig_count, NumericT epsilon) |
| Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix. More... | |
| __device__ int | floorPow2 (int n) |
| __device__ int | ceilPow2 (int n) |
| template<typename NumericT > | |
| __device__ NumericT | computeMidpoint (const NumericT left, const NumericT right) |
| template<class S , class T , class NumericT > | |
| __device__ void | storeInterval (unsigned int addr, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT right, S left_count, S right_count, NumericT precision) |
| template<typename NumericT > | |
| __device__ unsigned int | computeNumSmallerEigenvals (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged) |
| template<typename NumericT > | |
| __device__ unsigned int | computeNumSmallerEigenvalsLarge (const NumericT *g_d, const NumericT *g_s, const unsigned int n, const NumericT x, const unsigned int tid, const unsigned int num_intervals_active, NumericT *s_d, NumericT *s_s, unsigned int converged) |
| template<class S , class T , class NumericT > | |
| __device__ void | storeNonEmptyIntervals (unsigned int addr, const unsigned int num_threads_active, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT left, NumericT mid, NumericT right, const S left_count, const S mid_count, const S right_count, NumericT precision, unsigned int &compact_second_chunk, T *s_compaction_list_exc, unsigned int &is_active_second) |
| Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread. More... | |
| template<class T > | |
| __device__ void | createIndicesCompaction (T *s_compaction_list_exc, unsigned int num_threads_compaction) |
| template<class T , class NumericT > | |
| __device__ void | compactIntervals (NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT mid, NumericT right, unsigned int mid_count, unsigned int right_count, T *s_compaction_list, unsigned int num_threads_active, unsigned int is_active_second) |
| Perform stream compaction for second child intervals. More... | |
| template<class T , class S , class NumericT > | |
| __device__ void | storeIntervalConverged (NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, NumericT &left, NumericT &mid, NumericT &right, S &left_count, S &mid_count, S &right_count, T *s_compaction_list_exc, unsigned int &compact_second_chunk, const unsigned int num_threads_active, unsigned int &is_active_second) |
| template<class T , class NumericT > | |
| __device__ void | subdivideActiveInterval (const unsigned int tid, NumericT *s_left, NumericT *s_right, T *s_left_count, T *s_right_count, const unsigned int num_threads_active, NumericT &left, NumericT &right, unsigned int &left_count, unsigned int &right_count, NumericT &mid, unsigned int &all_threads_converged) |
| Subdivide interval if active and not already converged. More... | |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_upper_solve_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool unit_diagonal) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_lower_solve_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool unit_diagonal) |
| template<typename NumericT , typename SolverTagT > | |
| void | inplace_solve (matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT tag) |
| Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation). More... | |
| template<typename NumericT > | |
| __global__ void | triangular_substitute_inplace_row_kernel (NumericT const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options) |
| template<typename NumericT > | |
| __global__ void | triangular_substitute_inplace_col_kernel (NumericT const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options) |
| template<typename NumericT , typename SolverTagT > | |
| void | inplace_solve (matrix_base< NumericT > const &mat, vector_base< NumericT > &vec, SolverTagT) |
| Direct inplace solver for dense triangular systems (non-transposed version) More... | |
| __host__ __device__ float2 | operator+ (float2 a, float2 b) |
| __host__ __device__ float2 | operator- (float2 a, float2 b) |
| template<typename SCALARTYPE > | |
| __device__ float2 | operator/ (float2 a, SCALARTYPE b) |
| __device__ float2 | operator* (float2 in1, float2 in2) |
| __host__ __device__ double2 | operator+ (double2 a, double2 b) |
| __host__ __device__ double2 | operator- (double2 a, double2 b) |
| template<typename SCALARTYPE > | |
| __host__ __device__ double2 | operator/ (double2 a, SCALARTYPE b) |
| __host__ __device__ double2 | operator* (double2 in1, double2 in2) |
| __device__ unsigned int | get_reorder_num (unsigned int v, unsigned int bit_size) |
| template<typename Numeric2T , typename NumericT > | |
| __global__ void | fft_direct (const Numeric2T *input, Numeric2T *output, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | direct (viennacl::vector< NumericT, AlignmentV > const &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
| Direct 1D algorithm for computing Fourier transformation. More... | |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | direct (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &in, viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &out, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
| Direct 2D algorithm for computing Fourier transformation. More... | |
| template<typename NumericT > | |
| __global__ void | fft_reorder (NumericT *input, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, bool is_row_major) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | reorder (viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t bits_datasize, vcl_size_t batch_num, viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
| template<typename Numeric2T , typename NumericT > | |
| __global__ void | fft_radix2_local (Numeric2T *input, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major) |
| template<typename Numeric2T , typename NumericT > | |
| __global__ void | fft_radix2 (Numeric2T *input, unsigned int s, unsigned int bit_size, unsigned int size, unsigned int stride, unsigned int batch_num, NumericT sign, bool is_row_major) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | radix2 (viennacl::vector< NumericT, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
| Radix-2 1D algorithm for computing Fourier transformation. More... | |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | radix2 (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &in, vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign=NumericT(-1), viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order=viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) |
| Radix-2 2D algorithm for computing Fourier transformation. More... | |
| template<typename Numeric2T , typename NumericT > | |
| __global__ void | bluestein_post (Numeric2T *Z, Numeric2T *out, unsigned int size, NumericT sign) |
| template<typename Numeric2T , typename NumericT > | |
| __global__ void | bluestein_pre (Numeric2T *input, Numeric2T *A, Numeric2T *B, unsigned int size, unsigned int ext_size, NumericT sign) |
| template<typename NumericT > | |
| __global__ void | zero2 (NumericT *input1, NumericT *input2, unsigned int size) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | bluestein (viennacl::vector< NumericT, AlignmentV > &in, viennacl::vector< NumericT, AlignmentV > &out, vcl_size_t) |
| Bluestein's algorithm for computing Fourier transformation. More... | |
| template<typename NumericT > | |
| __global__ void | fft_mult_vec (const NumericT *input1, const NumericT *input2, NumericT *output, unsigned int size) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | multiply_complex (viennacl::vector< NumericT, AlignmentV > const &input1, viennacl::vector< NumericT, AlignmentV > const &input2, viennacl::vector< NumericT, AlignmentV > &output) |
| Mutiply two complex vectors and store result in output. More... | |
| template<typename Numeric2T , typename NumericT > | |
| __global__ void | fft_div_vec_scalar (Numeric2T *input1, unsigned int size, NumericT factor) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | normalize (viennacl::vector< NumericT, AlignmentV > &input) |
| Normalize vector on with his own size. More... | |
| template<typename NumericT > | |
| __global__ void | transpose (const NumericT *input, NumericT *output, unsigned int row_num, unsigned int col_num) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | transpose (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const &input, viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &output) |
| Transpose matrix. More... | |
| template<typename NumericT > | |
| __global__ void | transpose_inplace (NumericT *input, unsigned int row_num, unsigned int col_num) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | transpose (viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > &input) |
| Inplace_transpose matrix. More... | |
| template<typename RealT , typename ComplexT > | |
| __global__ void | real_to_complex (const RealT *in, ComplexT *out, unsigned int size) |
| template<typename NumericT > | |
| void | real_to_complex (viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size) |
| Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part) More... | |
| template<typename ComplexT , typename RealT > | |
| __global__ void | complex_to_real (const ComplexT *in, RealT *out, unsigned int size) |
| template<typename NumericT > | |
| void | complex_to_real (viennacl::vector_base< NumericT > const &in, viennacl::vector_base< NumericT > &out, vcl_size_t size) |
| Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part) More... | |
| template<typename NumericT > | |
| __global__ void | reverse_inplace (NumericT *vec, uint size) |
| template<typename NumericT > | |
| void | reverse (viennacl::vector_base< NumericT > &in) |
| Reverse vector to oposite order and save it in input vector. More... | |
| template<typename NumericT > | |
| __global__ void | pipelined_cg_vector_kernel (NumericT *result, NumericT alpha, NumericT *p, NumericT *r, NumericT const *Ap, NumericT beta, NumericT *inner_prod_buffer, unsigned int size) |
| template<typename NumericT > | |
| void | pipelined_cg_vector_update (vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, NumericT beta, vector_base< NumericT > &inner_prod_buffer) |
| template<typename NumericT > | |
| __global__ void | pipelined_cg_csr_vec_mul_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
| template<typename NumericT > | |
| void | pipelined_cg_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
| template<typename NumericT > | |
| __global__ void | pipelined_cg_coo_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
| template<typename NumericT > | |
| void | pipelined_cg_prod (coordinate_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
| template<typename NumericT > | |
| __global__ void | pipelined_cg_ell_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
| template<typename NumericT > | |
| void | pipelined_cg_prod (ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
| template<typename NumericT > | |
| __global__ void | pipelined_cg_sliced_ell_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
| template<typename NumericT > | |
| void | pipelined_cg_prod (sliced_ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
| template<typename NumericT > | |
| __global__ void | pipelined_cg_hyb_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size) |
| template<typename NumericT > | |
| void | pipelined_cg_prod (hyb_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer) |
| template<typename NumericT > | |
| __global__ void | pipelined_bicgstab_update_s_kernel (NumericT *s, NumericT const *residual, NumericT const *Ap, unsigned int size, NumericT *inner_prod_buffer, unsigned int chunk_size, unsigned int chunk_offset) |
| template<typename NumericT > | |
| void | pipelined_bicgstab_update_s (vector_base< NumericT > &s, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
| template<typename NumericT > | |
| __global__ void | pipelined_bicgstab_vector_kernel (NumericT *result, NumericT alpha, NumericT *p, NumericT omega, NumericT const *s, NumericT *residual, NumericT const *As, NumericT beta, NumericT const *Ap, NumericT const *r0star, NumericT *inner_prod_buffer, unsigned int size) |
| template<typename NumericT > | |
| void | pipelined_bicgstab_vector_update (vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, NumericT omega, vector_base< NumericT > const &s, vector_base< NumericT > &residual, vector_base< NumericT > const &As, NumericT beta, vector_base< NumericT > const &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size) |
| template<typename NumericT > | |
| __global__ void | pipelined_bicgstab_csr_vec_mul_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
| template<typename NumericT > | |
| void | pipelined_bicgstab_prod (compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
| template<typename NumericT > | |
| __global__ void | pipelined_bicgstab_coo_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
| template<typename NumericT > | |
| void | pipelined_bicgstab_prod (coordinate_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
| template<typename NumericT > | |
| __global__ void | pipelined_bicgstab_ell_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
| template<typename NumericT > | |
| void | pipelined_bicgstab_prod (ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
| template<typename NumericT > | |
| __global__ void | pipelined_bicgstab_sliced_ell_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
| template<typename NumericT > | |
| void | pipelined_bicgstab_prod (sliced_ell_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
| template<typename NumericT > | |
| __global__ void | pipelined_bicgstab_hyb_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int internal_row_num, unsigned int items_per_row, const NumericT *p, NumericT *Ap, const NumericT *r0star, unsigned int size, NumericT *inner_prod_buffer, unsigned int buffer_size, unsigned int buffer_offset) |
| template<typename NumericT > | |
| void | pipelined_bicgstab_prod (hyb_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
| template<typename T > | |
| __global__ void | pipelined_gmres_normalize_vk_kernel (T *vk, unsigned int vk_offset, T const *residual, T *R_buffer, unsigned int R_offset, T const *inner_prod_buffer, unsigned int chunk_size, T *r_dot_vk_buffer, unsigned int chunk_offset, unsigned int size) |
| template<typename T > | |
| void | pipelined_gmres_normalize_vk (vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset) |
| Performs a vector normalization needed for an efficient pipelined GMRES algorithm. More... | |
| template<typename T > | |
| __global__ void | pipelined_gmres_gram_schmidt_stage1_kernel (T const *krylov_basis, unsigned int size, unsigned int internal_size, unsigned int k, T *vi_in_vk_buffer, unsigned int chunk_size) |
| template<typename T > | |
| void | pipelined_gmres_gram_schmidt_stage1 (vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size) |
| template<typename T > | |
| __global__ void | pipelined_gmres_gram_schmidt_stage2_kernel (T *krylov_basis, unsigned int size, unsigned int internal_size, unsigned int k, T const *vi_in_vk_buffer, unsigned int chunk_size, T *R_buffer, unsigned int krylov_dim, T *inner_prod_buffer) |
| template<typename T > | |
| void | pipelined_gmres_gram_schmidt_stage2 (vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size) |
| template<typename T > | |
| __global__ void | pipelined_gmres_update_result_kernel (T *result, T const *residual, T const *krylov_basis, unsigned int size, unsigned int internal_size, T const *coefficients, unsigned int k) |
| template<typename T > | |
| void | pipelined_gmres_update_result (vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t param_k) |
| template<typename T > | |
| void | pipelined_gmres_prod (compressed_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer) |
| template<typename T > | |
| void | pipelined_gmres_prod (coordinate_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer) |
| template<typename T > | |
| void | pipelined_gmres_prod (ell_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer) |
| template<typename T > | |
| void | pipelined_gmres_prod (sliced_ell_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer) |
| template<typename T > | |
| void | pipelined_gmres_prod (hyb_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer) |
| template<typename NumericT , typename SizeT , typename DistanceT > | |
| void | trans (matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans) |
| template<typename NumericT , typename ScalarT > | |
| void | am (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) |
| template<typename NumericT , typename ScalarT1 , typename ScalarT2 > | |
| void | ambm (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
| template<typename NumericT , typename ScalarT1 , typename ScalarT2 > | |
| void | ambm_m (matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
| template<typename NumericT > | |
| void | matrix_assign (matrix_base< NumericT > &mat, NumericT s, bool clear=false) |
| template<typename NumericT > | |
| void | matrix_diagonal_assign (matrix_base< NumericT > &mat, NumericT s) |
| template<typename NumericT > | |
| void | matrix_diag_from_vector (const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat) |
| template<typename NumericT > | |
| void | matrix_diag_to_vector (matrix_base< NumericT > const &mat, int k, vector_base< NumericT > &vec) |
| template<typename NumericT > | |
| void | matrix_row (matrix_base< NumericT > const &mat, unsigned int i, vector_base< NumericT > &vec) |
| template<typename NumericT > | |
| void | matrix_column (const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec) |
| template<typename NumericT , typename SizeT , typename OpT > | |
| void | element_op (matrix_base< NumericT, SizeT > &A, matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const &proxy) |
| template<typename SizeT , typename OpT > | |
| void | element_op (matrix_base< float, SizeT > &A, matrix_expression< const matrix_base< float, SizeT >, const matrix_base< float, SizeT >, op_element_binary< OpT > > const &proxy) |
| template<typename SizeT , typename OpT > | |
| void | element_op (matrix_base< double, SizeT > &A, matrix_expression< const matrix_base< double, SizeT >, const matrix_base< double, SizeT >, op_element_binary< OpT > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_abs > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_acos > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_asin > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_atan > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_ceil > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cos > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cosh > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_exp > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_fabs > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_floor > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log10 > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sin > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sinh > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sqrt > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tan > > const &proxy) |
| template<typename NumericT > | |
| void | element_op (matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tanh > > const &proxy) |
| template<typename NumericT > | |
| void | prod_impl (const matrix_base< NumericT > &mat, bool mat_transpose, const vector_base< NumericT > &vec, vector_base< NumericT > &result) |
| Carries out matrix-vector multiplication. More... | |
| template<typename NumericT , typename ScalarT > | |
| void | prod_impl (const matrix_base< NumericT > &A, bool trans_A, const matrix_base< NumericT > &B, bool trans_B, matrix_base< NumericT > &C, ScalarT alpha, ScalarT beta) |
| Carries out matrix-matrix multiplication. More... | |
| template<typename NumericT , typename ScalarT > | |
| void | scaled_rank_1_update (matrix_base< NumericT > &mat1, ScalarT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2) |
| The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update. More... | |
| template<typename NumericT , typename VectorType > | |
| void | bidiag_pack (matrix_base< NumericT > &A, VectorType &dh, VectorType &sh) |
| This function stores the diagonal and the superdiagonal of a matrix in two vectors. More... | |
| template<typename NumericT > | |
| void | copy_vec (matrix_base< NumericT > &A, vector_base< NumericT > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col) |
| This function copies a row or a column from a matrix to a vector. More... | |
| template<typename NumericT > | |
| void | house_update_A_left (matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start) |
| This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P. More... | |
| template<typename NumericT > | |
| void | house_update_A_right (matrix_base< NumericT > &A, vector_base< NumericT > &D) |
| This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P. More... | |
| template<typename NumericT > | |
| void | house_update_QL (matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1) |
| This function updates the matrix Q, which is needed for the computation of the eigenvectors. More... | |
| template<typename NumericT > | |
| void | givens_next (matrix_base< NumericT > &Q, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m) |
| This function updates the matrix Q. It is part of the tql2 algorithm. More... | |
| template<typename NumericT > | |
| void | inclusive_scan (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2) |
| This function implements an inclusive scan. More... | |
| template<typename NumericT , typename F > | |
| void | exclusive_scan (vector_base< NumericT, F > &vec1, vector_base< NumericT, F > &vec2) |
| This function implements an exclusive scan. More... | |
| template<typename NumericT > | |
| __global__ void | am_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | am_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_m_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha) |
| template<typename NumericT > | |
| __global__ void | matrix_col_diagonal_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha) |
| template<typename NumericT > | |
| __global__ void | element_op_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
| template<typename NumericT > | |
| __global__ void | element_op_int_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_abs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_acos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_asin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_atan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_ceil_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_cos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_cosh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_exp_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_fabs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_floor_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_log_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_log10_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_sin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_sinh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_sqrt_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_tan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_col_element_tanh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | vec_mul_col_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
| template<typename NumericT > | |
| __global__ void | trans_vec_mul_col_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
| template<typename NumericT > | |
| __global__ void | scaled_rank1_update_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
| template<typename NumericT > | |
| __global__ void | scaled_rank1_update_col_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
| template<typename T > | |
| __global__ void | bidiag_pack_row_major_kernel (T *A, T *D, T *S, uint size1, uint size2, uint stride) |
| template<typename T > | |
| __global__ void | bidiag_pack_column_major_kernel (T *A, T *D, T *S, uint size1, uint size2, uint stride) |
| template<typename T > | |
| __global__ void | copy_col_row_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size, uint stride) |
| template<typename T > | |
| __global__ void | copy_col_column_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size, uint stride) |
| template<typename T > | |
| __global__ void | copy_row_row_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size, uint stride) |
| template<typename T > | |
| __global__ void | copy_row_column_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size, uint stride) |
| template<typename T > | |
| __global__ void | house_update_A_left_row_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size1, uint size2, uint stride) |
| template<typename T > | |
| __global__ void | house_update_A_left_column_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size1, uint size2, uint stride) |
| template<typename T > | |
| __global__ void | house_update_A_right_row_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size1, uint size2, uint stride) |
| template<typename T > | |
| __global__ void | house_update_A_right_column_major_kernel (T *A, T *V, uint row_start, uint col_start, uint size1, uint size2, uint stride) |
| template<typename T > | |
| __device__ void | col_reduce_lcl_array (T *sums, uint th_Idx, uint bl_Dim) |
| template<typename T > | |
| __global__ void | house_update_QL_row_major_kernel (T *QL, T *V, uint size1, uint strideQ) |
| template<typename T > | |
| __global__ void | house_update_QL_column_major_kernel (T *QL, T *V, uint size1, uint strideQ) |
| template<typename T > | |
| __global__ void | givens_next_row_major_kernel (T *matr, T *cs, T *ss, uint size, uint stride, uint start_i, uint end_i) |
| template<typename T > | |
| __global__ void | givens_next_column_major_kernel (T *matr, T *cs, T *ss, uint size, uint stride, uint start_i, uint end_i) |
| template<typename T > | |
| __global__ void | inclusive_scan_kernel_1 (T *X, unsigned int startX, unsigned int incX, unsigned int InputSize, T *Y, unsigned int startY, unsigned int incY, T *S, unsigned int startS, unsigned int incS) |
| template<typename T > | |
| __global__ void | exclusive_scan_kernel_1 (T *X, unsigned int startX, unsigned int incX, unsigned int InputSize, T *Y, unsigned int startY, unsigned int incY, T *S, unsigned int startS, unsigned int incS) |
| template<typename T > | |
| __global__ void | scan_kernel_2 (T *S_ref, unsigned int startS_ref, unsigned int incS_ref, T *S, unsigned int startS, unsigned int incS, unsigned int InputSize) |
| template<typename T > | |
| __global__ void | scan_kernel_3 (T *S_ref, unsigned int startS_ref, unsigned int incS_ref, T *S, unsigned int startS, unsigned int incS) |
| template<typename T > | |
| __global__ void | scan_kernel_4 (T *S, unsigned int startS, unsigned int incS, T *Y, unsigned int startY, unsigned int incY, unsigned int OutputSize) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_col_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_col_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_col_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_col_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_col_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_col_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_col_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_col_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_col_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_col_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_col_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_col_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_col_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_col_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_col_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_col_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_row_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_row_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_row_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_row_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_row_col_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_row_col_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_row_col_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_row_col_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_row_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_row_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_row_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_col_row_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_row_row_prod_AA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_row_row_prod_AT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_row_row_prod_TA_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | matrix_matrix_row_row_row_prod_TT_kernel (NumericT alpha, const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, NumericT beta, NumericT *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
| template<typename NumericT > | |
| __global__ void | trans_kernel (const NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_internal_size1, unsigned int A_internal_size2, unsigned int A_size1, unsigned int A_size2, unsigned int A_stride1, unsigned int A_stride2, NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_internal_size1, unsigned int B_internal_size2, unsigned int B_stride1, unsigned int B_stride2, bool data_major) |
| template<typename NumericT > | |
| __global__ void | am_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | am_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, NumericT fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | ambm_m_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *fac2, unsigned int options2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *fac3, unsigned int options3, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha) |
| template<typename NumericT > | |
| __global__ void | matrix_row_diagonal_assign_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT alpha) |
| template<typename NumericT > | |
| __global__ void | element_op_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
| template<typename NumericT > | |
| __global__ void | element_op_int_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const NumericT *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_abs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_acos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_asin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_atan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_ceil_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_cos_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_cosh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_exp_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_fabs_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_floor_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_log_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_log10_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_sin_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_sinh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_sqrt_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_tan_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | matrix_row_element_tanh_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
| template<typename NumericT > | |
| __global__ void | vec_mul_row_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
| template<typename NumericT > | |
| __global__ void | trans_vec_mul_row_kernel (const NumericT *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const NumericT *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, NumericT *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
| template<typename NumericT > | |
| __global__ void | scaled_rank1_update_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, NumericT val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
| template<typename NumericT > | |
| __global__ void | scaled_rank1_update_row_kernel (NumericT *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const NumericT *val, unsigned int options2, const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
| template<typename NumericT > | |
| __global__ void | el_wise_mul_div (NumericT *matrix1, NumericT const *matrix2, NumericT const *matrix3, unsigned int size) |
| Main CUDA kernel for nonnegative matrix factorization of a dense matrices. More... | |
| template<typename NumericT > | |
| void | nmf (viennacl::matrix_base< NumericT > const &V, viennacl::matrix_base< NumericT > &W, viennacl::matrix_base< NumericT > &H, viennacl::linalg::nmf_config const &conf) |
| The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized. More... | |
| template<typename NumericT > | |
| __global__ void | as_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2) |
| template<typename NumericT > | |
| __global__ void | as_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2) |
| template<typename ScalarT1 , typename ScalarT2 , typename NumericT > | |
| viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_any_scalar< NumericT >::value >::type | as (ScalarT1 &s1, ScalarT2 const &s2, NumericT const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) |
| template<typename NumericT > | |
| __global__ void | asbs_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3) |
| template<typename NumericT > | |
| __global__ void | asbs_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT const *fac3, unsigned int options3, const NumericT *s3) |
| template<typename NumericT > | |
| __global__ void | asbs_kernel (NumericT *s1, NumericT const *fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3) |
| template<typename NumericT > | |
| __global__ void | asbs_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3) |
| template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 > | |
| viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_scalar< ScalarT3 >::value &&viennacl::is_any_scalar< NumericT1 >::value &&viennacl::is_any_scalar< NumericT2 >::value >::type | asbs (ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
| template<typename NumericT > | |
| __global__ void | asbs_s_kernel (NumericT *s1, const NumericT *fac2, unsigned int options2, const NumericT *s2, const NumericT *fac3, unsigned int options3, const NumericT *s3) |
| template<typename NumericT > | |
| __global__ void | asbs_s_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT const *fac3, unsigned int options3, const NumericT *s3) |
| template<typename NumericT > | |
| __global__ void | asbs_s_kernel (NumericT *s1, NumericT const *fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3) |
| template<typename NumericT > | |
| __global__ void | asbs_s_kernel (NumericT *s1, NumericT fac2, unsigned int options2, const NumericT *s2, NumericT fac3, unsigned int options3, const NumericT *s3) |
| template<typename ScalarT1 , typename ScalarT2 , typename NumericT1 , typename ScalarT3 , typename NumericT2 > | |
| viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value &&viennacl::is_scalar< ScalarT3 >::value &&viennacl::is_any_scalar< NumericT1 >::value &&viennacl::is_any_scalar< NumericT2 >::value >::type | asbs_s (ScalarT1 &s1, ScalarT2 const &s2, NumericT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, ScalarT3 const &s3, NumericT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
| template<typename NumericT > | |
| __global__ void | scalar_swap_kernel (NumericT *s1, NumericT *s2) |
| template<typename ScalarT1 , typename ScalarT2 > | |
| viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type | swap (ScalarT1 &s1, ScalarT2 &s2) |
| Swaps the contents of two scalars, data is copied. More... | |
| template<typename NumericT > | |
| __global__ void | compressed_matrix_vec_mul_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result) |
| template<typename NumericT > | |
| __global__ void | compressed_matrix_vec_mul_adaptive_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const unsigned int *row_blocks, const NumericT *elements, unsigned int num_blocks, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result) |
| template<class NumericT , unsigned int AlignmentV> | |
| void | prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
| Carries out matrix-vector multiplication with a compressed_matrix. More... | |
| template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
| __global__ void | compressed_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const NumericT *sp_mat_elements, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result) |
| Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed. More... | |
| template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
| __global__ void | compressed_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const NumericT *sp_mat_elements, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | prod_impl (const viennacl::compressed_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result) |
| Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed. More... | |
| template<typename NumericT > | |
| __global__ void | compressed_matrix_diagonal_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *result, unsigned int size) |
| template<typename SparseMatrixT , typename NumericT > | |
| viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag) |
| Carries out triangular inplace solves. More... | |
| template<typename SparseMatrixT , typename NumericT > | |
| viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::lower_tag) |
| Carries out triangular inplace solves. More... | |
| template<typename SparseMatrixT , typename NumericT > | |
| viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_upper_tag) |
| Carries out triangular inplace solves. More... | |
| template<typename SparseMatrixT , typename NumericT > | |
| viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const SparseMatrixT &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::upper_tag) |
| Carries out triangular inplace solves. More... | |
| template<typename SparseMatrixT , typename NumericT > | |
| viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_lower_tag) |
| Carries out triangular inplace solves. More... | |
| template<typename SparseMatrixT , typename NumericT > | |
| viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::lower_tag) |
| Carries out triangular inplace solves. More... | |
| template<typename SparseMatrixT , typename NumericT > | |
| viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::unit_upper_tag) |
| Carries out triangular inplace solves. More... | |
| template<typename SparseMatrixT , typename NumericT > | |
| viennacl::enable_if< viennacl::is_any_sparse_matrix< SparseMatrixT >::value >::type | inplace_solve (const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > &mat, viennacl::vector_base< NumericT > &vec, viennacl::linalg::upper_tag) |
| Carries out triangular inplace solves. More... | |
| template<typename NumericT > | |
| __global__ void | compressed_compressed_matrix_vec_mul_kernel (const unsigned int *row_jumper, const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, unsigned int nonzero_rows, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result) |
| template<typename NumericT > | |
| void | prod_impl (const viennacl::compressed_compressed_matrix< NumericT > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
| Carries out matrix-vector multiplication with a compressed_compressed_matrix. More... | |
| template<typename NumericT > | |
| __global__ void | coordinate_matrix_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
| Carries out matrix-vector multiplication with a coordinate_matrix. More... | |
| template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
| __global__ void | coordinate_matrix_d_mat_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result) |
| Carries out Compressed Matrix(COO)-Dense Matrix multiplication. More... | |
| template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
| __global__ void | coordinate_matrix_d_tr_mat_mul_kernel (const unsigned int *coords, const NumericT *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | prod_impl (const viennacl::coordinate_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result) |
| Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication. More... | |
| template<typename NumericT > | |
| __global__ void | ell_matrix_vec_mul_kernel (const unsigned int *coords, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int row_num, unsigned int col_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
| Carries out matrix-vector multiplication with a ell_matrix. More... | |
| template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
| __global__ void | ell_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_coords, const NumericT *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result) |
| Carries out Sparse Matrix(ELL)-Dense Matrix multiplication. More... | |
| template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
| __global__ void | ell_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_coords, const NumericT *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | prod_impl (const viennacl::ell_matrix< NumericT, AlignmentV > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result) |
| Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication. More... | |
| template<typename NumericT > | |
| __global__ void | sliced_ell_matrix_vec_mul_kernel (const unsigned int *columns_per_block, const unsigned int *column_indices, const unsigned int *block_start, const NumericT *elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, unsigned int size_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result) |
| template<typename NumericT , typename IndexT > | |
| void | prod_impl (const viennacl::sliced_ell_matrix< NumericT, IndexT > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
| Carries out matrix-vector multiplication with a sliced_ell_matrix. More... | |
| template<typename NumericT > | |
| __global__ void | hyb_matrix_vec_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, const NumericT *x, unsigned int start_x, unsigned int inc_x, NumericT *result, unsigned int start_result, unsigned int inc_result, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::vector_base< NumericT > &vec, viennacl::vector_base< NumericT > &result) |
| Carries out matrix-vector multiplication with a hyb_matrix. More... | |
| template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
| __global__ void | hyb_matrix_d_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::matrix_base< NumericT > &d_mat, viennacl::matrix_base< NumericT > &result) |
| Carries out matrix-vector multiplication with a hyb_matrix. More... | |
| template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
| __global__ void | hyb_matrix_d_tr_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
| template<typename NumericT , unsigned int AlignmentV> | |
| void | prod_impl (const viennacl::hyb_matrix< NumericT, AlignmentV > &mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT > &result) |
| Carries out matrix-vector multiplication with a hyb_matrix. More... | |
| template<typename NumericT > | |
| __global__ void | csr_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
| template<typename NumericT > | |
| __global__ void | csr_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
| template<typename NumericT > | |
| __global__ void | csr_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
| template<typename NumericT > | |
| __global__ void | csr_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
| template<typename NumericT > | |
| __global__ void | csr_trans_lu_forward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
| template<typename NumericT > | |
| __global__ void | csr_trans_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
| template<typename NumericT > | |
| __global__ void | csr_trans_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size) |
| template<typename NumericT > | |
| __global__ void | csr_trans_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, NumericT *vector, unsigned int size) |
| template<typename NumericT > | |
| __global__ void | csr_trans_lu_backward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size) |
| template<typename NumericT > | |
| __global__ void | csr_trans_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const NumericT *elements, const NumericT *diagonal_entries, NumericT *vector, unsigned int size) |
| template<typename NumericT > | |
| __global__ void | csr_block_trans_unit_lu_forward (const unsigned int *row_jumper_L, const unsigned int *column_indices_L, const NumericT *elements_L, const unsigned int *block_offsets, NumericT *result, unsigned int size) |
| template<typename NumericT > | |
| __global__ void | csr_block_trans_lu_backward (const unsigned int *row_jumper_U, const unsigned int *column_indices_U, const NumericT *elements_U, const NumericT *diagonal_U, const unsigned int *block_offsets, NumericT *result, unsigned int size) |
| template<typename NumericT > | |
| __global__ void | av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| __global__ void | av_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT , typename ScalarType1 > | |
| void | av (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) |
| template<typename NumericT > | |
| __global__ void | avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
| template<typename NumericT > | |
| __global__ void | avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
| template<typename NumericT > | |
| __global__ void | avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
| template<typename NumericT > | |
| __global__ void | avbv_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
| template<typename NumericT , typename ScalarT1 , typename ScalarT2 > | |
| void | avbv (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
| template<typename NumericT > | |
| __global__ void | avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
| template<typename NumericT > | |
| __global__ void | avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, const NumericT *fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
| template<typename NumericT > | |
| __global__ void | avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
| template<typename NumericT > | |
| __global__ void | avbv_v_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT fac2, unsigned int options2, const NumericT *vec2, unsigned int start2, unsigned int inc2, NumericT fac3, unsigned int options3, const NumericT *vec3, unsigned int start3, unsigned int inc3) |
| template<typename NumericT , typename ScalarT1 , typename ScalarT2 > | |
| void | avbv_v (vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
| template<typename NumericT > | |
| __global__ void | vector_assign_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int internal_size1, NumericT alpha) |
| template<typename NumericT , typename ScalarT1 > | |
| void | vector_assign (vector_base< NumericT > &vec1, ScalarT1 const &alpha, bool up_to_internal_size=false) |
| Assign a constant value to a vector (-range/-slice) More... | |
| template<typename NumericT > | |
| __global__ void | vector_swap_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | vector_swap (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2) |
| Swaps the contents of two vectors, data is copied. More... | |
| template<typename NumericT > | |
| __global__ void | element_op_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type) |
| template<typename NumericT > | |
| __global__ void | element_op_int_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2, NumericT const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type) |
| template<typename NumericT , typename OpT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_binary< OpT > > const &proxy) |
| Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax) More... | |
| template<typename OpT > | |
| void | element_op (vector_base< float > &vec1, vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OpT > > const &proxy) |
| template<typename OpT > | |
| void | element_op (vector_base< double > &vec1, vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OpT > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_acos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_acos > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_asin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_asin > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_atan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_atan > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_ceil_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_ceil > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_cos_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cos > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_cosh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cosh > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_exp_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_exp > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_fabs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_fabs > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_abs_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_abs > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_floor_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_floor > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_log_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_log10_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log10 > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_sin_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sin > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_sinh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sinh > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_sqrt_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sqrt > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_tan_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tan > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | vec_element_tanh_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT const *vec2, unsigned int start2, unsigned int inc2) |
| template<typename NumericT > | |
| void | element_op (vector_base< NumericT > &vec1, vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tanh > > const &proxy) |
| template<typename NumericT > | |
| __global__ void | inner_prod_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT *group_buffer) |
| template<typename NumericT > | |
| __global__ void | vector_sum_kernel_floats (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
| template<typename NumericT > | |
| __global__ void | vector_sum_kernel_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
| template<typename NumericT > | |
| __global__ void | vector_sum_kernel_unsigned_integers (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
| template<typename NumericT , typename ScalarT > | |
| void | inner_prod_impl (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result) |
| Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More... | |
| template<typename NumericT > | |
| void | inner_prod_cpu (vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, NumericT &result) |
| Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). More... | |
| template<typename NumericT > | |
| __global__ void | inner_prod_2_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, NumericT *group_results) |
| template<typename NumericT > | |
| __global__ void | inner_prod_3_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, NumericT *group_results) |
| template<typename NumericT > | |
| __global__ void | inner_prod_4_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, NumericT *group_results) |
| template<typename NumericT > | |
| __global__ void | inner_prod_8_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, const NumericT *y4, unsigned int start4, unsigned int stride4, const NumericT *y5, unsigned int start5, unsigned int stride5, const NumericT *y6, unsigned int start6, unsigned int stride6, const NumericT *y7, unsigned int start7, unsigned int stride7, NumericT *group_results) |
| template<typename NumericT > | |
| __global__ void | vector_multi_sum_kernel (NumericT const *vec1, NumericT *result, unsigned int start_result, unsigned int inc_result) |
| template<typename NumericT > | |
| void | inner_prod_impl (vector_base< NumericT > const &x, vector_tuple< NumericT > const &vec_tuple, vector_base< NumericT > &result) |
| template<typename NumericT > | |
| __global__ void | norm_kernel_floats (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer) |
| template<typename NumericT > | |
| __global__ void | norm_kernel_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer) |
| template<typename NumericT > | |
| __global__ void | norm_kernel_unsigned_integers (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, NumericT *group_buffer) |
| template<typename NumericT > | |
| void | norm_1_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
| Computes the l^1-norm of a vector. More... | |
| template<typename NumericT > | |
| void | norm_1_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
| Computes the l^1-norm of a vector. More... | |
| template<typename NumericT > | |
| void | norm_2_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
| Computes the l^2-norm of a vector - implementation. More... | |
| template<typename NumericT > | |
| void | norm_2_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
| Computes the l^2-norm of a vector - implementation. More... | |
| template<typename NumericT > | |
| void | norm_inf_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
| Computes the supremum-norm of a vector. More... | |
| template<typename NumericT > | |
| void | norm_inf_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
| Computes the supremum-norm of a vector. More... | |
| template<typename NumericT > | |
| __global__ void | vector_maxmin_kernel (const NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, NumericT *result) |
| template<typename NumericT > | |
| void | max_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
| Computes the maximum of a vector, both reduction stages run on the GPU. More... | |
| template<typename NumericT > | |
| void | max_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
| Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More... | |
| template<typename NumericT > | |
| void | min_impl (vector_base< NumericT > const &vec1, scalar< NumericT > &result) |
| Computes the maximum of a vector, both reduction stages run on the GPU. More... | |
| template<typename NumericT > | |
| void | min_cpu (vector_base< NumericT > const &vec1, NumericT &result) |
| Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU. More... | |
| template<typename NumericT > | |
| __device__ NumericT | cuda_abs (NumericT val) |
| __device__ unsigned long | cuda_abs (unsigned long val) |
| __device__ unsigned int | cuda_abs (unsigned int val) |
| __device__ unsigned short | cuda_abs (unsigned short val) |
| __device__ unsigned char | cuda_abs (unsigned char val) |
| template<typename NumericT > | |
| __global__ void | index_norm_inf_kernel (const NumericT *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int *result) |
| template<typename NumericT > | |
| vcl_size_t | index_norm_inf (vector_base< NumericT > const &vec1) |
| Computes the index of the first entry that is equal to the supremum-norm in modulus. More... | |
| template<typename NumericT > | |
| __global__ void | plane_rotation_kernel (NumericT *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, NumericT *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, NumericT alpha, NumericT beta) |
| template<typename NumericT > | |
| void | plane_rotation (vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta) |
| Computes a plane rotation of two vectors. More... | |
Holds all CUDA compute kernels used by ViennaCL.
| void viennacl::linalg::cuda::am | ( | matrix_base< NumericT > & | mat1, |
| matrix_base< NumericT > const & | mat2, | ||
| ScalarT const & | alpha, | ||
| vcl_size_t | len_alpha, | ||
| bool | reciprocal_alpha, | ||
| bool | flip_sign_alpha | ||
| ) |
Definition at line 76 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::am_col_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 38 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::am_col_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 74 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::am_row_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 66 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::am_row_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 103 of file matrix_operations_row.hpp.
| void viennacl::linalg::cuda::ambm | ( | matrix_base< NumericT > & | mat1, |
| matrix_base< NumericT > const & | mat2, | ||
| ScalarT1 const & | alpha, | ||
| vcl_size_t | len_alpha, | ||
| bool | reciprocal_alpha, | ||
| bool | flip_sign_alpha, | ||
| matrix_base< NumericT > const & | mat3, | ||
| ScalarT2 const & | beta, | ||
| vcl_size_t | len_beta, | ||
| bool | reciprocal_beta, | ||
| bool | flip_sign_beta | ||
| ) |
Definition at line 127 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 115 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| const NumericT * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 189 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 262 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| const NumericT * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 336 of file matrix_operations_col.hpp.
| void viennacl::linalg::cuda::ambm_m | ( | matrix_base< NumericT > & | mat1, |
| matrix_base< NumericT > const & | mat2, | ||
| ScalarT1 const & | alpha, | ||
| vcl_size_t | len_alpha, | ||
| bool | reciprocal_alpha, | ||
| bool | flip_sign_alpha, | ||
| matrix_base< NumericT > const & | mat3, | ||
| ScalarT2 const & | beta, | ||
| vcl_size_t | len_beta, | ||
| bool | reciprocal_beta, | ||
| bool | flip_sign_beta | ||
| ) |
Definition at line 202 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 415 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| const NumericT * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 490 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 564 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| const NumericT * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 639 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 448 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| const NumericT * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 523 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 597 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| const NumericT * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 672 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 145 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| const NumericT * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 220 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 294 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| const NumericT * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2 | ||
| ) |
Definition at line 369 of file matrix_operations_row.hpp.
| viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_any_scalar<NumericT>::value >::type viennacl::linalg::cuda::as | ( | ScalarT1 & | s1, |
| ScalarT2 const & | s2, | ||
| NumericT const & | alpha, | ||
| vcl_size_t | len_alpha, | ||
| bool | reciprocal_alpha, | ||
| bool | flip_sign_alpha | ||
| ) |
Definition at line 77 of file scalar_operations.hpp.
| __global__ void viennacl::linalg::cuda::as_kernel | ( | NumericT * | s1, |
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | s2 | ||
| ) |
Definition at line 48 of file scalar_operations.hpp.
| __global__ void viennacl::linalg::cuda::as_kernel | ( | NumericT * | s1, |
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | s2 | ||
| ) |
Definition at line 60 of file scalar_operations.hpp.
| viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_scalar<ScalarT3>::value && viennacl::is_any_scalar<NumericT1>::value && viennacl::is_any_scalar<NumericT2>::value >::type viennacl::linalg::cuda::asbs | ( | ScalarT1 & | s1, |
| ScalarT2 const & | s2, | ||
| NumericT1 const & | alpha, | ||
| vcl_size_t | len_alpha, | ||
| bool | reciprocal_alpha, | ||
| bool | flip_sign_alpha, | ||
| ScalarT3 const & | s3, | ||
| NumericT2 const & | beta, | ||
| vcl_size_t | len_beta, | ||
| bool | reciprocal_beta, | ||
| bool | flip_sign_beta | ||
| ) |
Definition at line 191 of file scalar_operations.hpp.
| __global__ void viennacl::linalg::cuda::asbs_kernel | ( | NumericT * | s1, |
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | s2, | ||
| const NumericT * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | s3 | ||
| ) |
Definition at line 99 of file scalar_operations.hpp.
| __global__ void viennacl::linalg::cuda::asbs_kernel | ( | NumericT * | s1, |
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | s2, | ||
| NumericT const * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | s3 | ||
| ) |
Definition at line 120 of file scalar_operations.hpp.
| __global__ void viennacl::linalg::cuda::asbs_kernel | ( | NumericT * | s1, |
| NumericT const * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | s2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | s3 | ||
| ) |
Definition at line 141 of file scalar_operations.hpp.
| __global__ void viennacl::linalg::cuda::asbs_kernel | ( | NumericT * | s1, |
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | s2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | s3 | ||
| ) |
Definition at line 162 of file scalar_operations.hpp.
| viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value && viennacl::is_scalar<ScalarT3>::value && viennacl::is_any_scalar<NumericT1>::value && viennacl::is_any_scalar<NumericT2>::value >::type viennacl::linalg::cuda::asbs_s | ( | ScalarT1 & | s1, |
| ScalarT2 const & | s2, | ||
| NumericT1 const & | alpha, | ||
| vcl_size_t | len_alpha, | ||
| bool | reciprocal_alpha, | ||
| bool | flip_sign_alpha, | ||
| ScalarT3 const & | s3, | ||
| NumericT2 const & | beta, | ||
| vcl_size_t | len_beta, | ||
| bool | reciprocal_beta, | ||
| bool | flip_sign_beta | ||
| ) |
Definition at line 314 of file scalar_operations.hpp.
| __global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | NumericT * | s1, |
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | s2, | ||
| const NumericT * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | s3 | ||
| ) |
Definition at line 222 of file scalar_operations.hpp.
| __global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | NumericT * | s1, |
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | s2, | ||
| NumericT const * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | s3 | ||
| ) |
Definition at line 243 of file scalar_operations.hpp.
| __global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | NumericT * | s1, |
| NumericT const * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | s2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | s3 | ||
| ) |
Definition at line 264 of file scalar_operations.hpp.
| __global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | NumericT * | s1, |
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | s2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | s3 | ||
| ) |
Definition at line 285 of file scalar_operations.hpp.
| void viennacl::linalg::cuda::av | ( | vector_base< NumericT > & | vec1, |
| vector_base< NumericT > const & | vec2, | ||
| ScalarType1 const & | alpha, | ||
| vcl_size_t | len_alpha, | ||
| bool | reciprocal_alpha, | ||
| bool | flip_sign_alpha | ||
| ) |
Definition at line 118 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::av_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 51 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::av_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 84 of file vector_operations.hpp.
| void viennacl::linalg::cuda::avbv | ( | vector_base< NumericT > & | vec1, |
| vector_base< NumericT > const & | vec2, | ||
| ScalarT1 const & | alpha, | ||
| vcl_size_t | len_alpha, | ||
| bool | reciprocal_alpha, | ||
| bool | flip_sign_alpha, | ||
| vector_base< NumericT > const & | vec3, | ||
| ScalarT2 const & | beta, | ||
| vcl_size_t | len_beta, | ||
| bool | reciprocal_beta, | ||
| bool | flip_sign_beta | ||
| ) |
Definition at line 407 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::avbv_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| const NumericT * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | vec3, | ||
| unsigned int | start3, | ||
| unsigned int | inc3 | ||
| ) |
Definition at line 153 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::avbv_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| const NumericT * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | vec3, | ||
| unsigned int | start3, | ||
| unsigned int | inc3 | ||
| ) |
Definition at line 216 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::avbv_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | vec3, | ||
| unsigned int | start3, | ||
| unsigned int | inc3 | ||
| ) |
Definition at line 279 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::avbv_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | vec3, | ||
| unsigned int | start3, | ||
| unsigned int | inc3 | ||
| ) |
Definition at line 342 of file vector_operations.hpp.
| void viennacl::linalg::cuda::avbv_v | ( | vector_base< NumericT > & | vec1, |
| vector_base< NumericT > const & | vec2, | ||
| ScalarT1 const & | alpha, | ||
| vcl_size_t | len_alpha, | ||
| bool | reciprocal_alpha, | ||
| bool | flip_sign_alpha, | ||
| vector_base< NumericT > const & | vec3, | ||
| ScalarT2 const & | beta, | ||
| vcl_size_t | len_beta, | ||
| bool | reciprocal_beta, | ||
| bool | flip_sign_beta | ||
| ) |
Definition at line 709 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| const NumericT * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | vec3, | ||
| unsigned int | start3, | ||
| unsigned int | inc3 | ||
| ) |
Definition at line 457 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| const NumericT * | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | vec3, | ||
| unsigned int | start3, | ||
| unsigned int | inc3 | ||
| ) |
Definition at line 520 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| const NumericT * | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | vec3, | ||
| unsigned int | start3, | ||
| unsigned int | inc3 | ||
| ) |
Definition at line 583 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT | fac2, | ||
| unsigned int | options2, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| NumericT | fac3, | ||
| unsigned int | options3, | ||
| const NumericT * | vec3, | ||
| unsigned int | start3, | ||
| unsigned int | inc3 | ||
| ) |
Definition at line 646 of file vector_operations.hpp.
| void viennacl::linalg::cuda::bidiag_pack | ( | matrix_base< NumericT > & | A, |
| VectorType & | dh, | ||
| VectorType & | sh | ||
| ) |
This function stores the diagonal and the superdiagonal of a matrix in two vectors.
| A | The matrix from which the vectors will be extracted of. |
| dh | The vector in which the diagonal of the matrix will be stored in. |
| sh | The vector in which the superdiagonal of the matrix will be stored in. |
Definition at line 2490 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::bidiag_pack_column_major_kernel | ( | T * | A, |
| T * | D, | ||
| T * | S, | ||
| uint | size1, | ||
| uint | size2, | ||
| uint | stride | ||
| ) |
Definition at line 1435 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::bidiag_pack_row_major_kernel | ( | T * | A, |
| T * | D, | ||
| T * | S, | ||
| uint | size1, | ||
| uint | size2, | ||
| uint | stride | ||
| ) |
Definition at line 1413 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::bisectKernelLarge | ( | const NumericT * | g_d, |
| const NumericT * | g_s, | ||
| const unsigned int | n, | ||
| const NumericT | lg, | ||
| const NumericT | ug, | ||
| const unsigned int | lg_eig_count, | ||
| const unsigned int | ug_eig_count, | ||
| NumericT | epsilon, | ||
| unsigned int * | g_num_one, | ||
| unsigned int * | g_num_blocks_mult, | ||
| NumericT * | g_left_one, | ||
| NumericT * | g_right_one, | ||
| unsigned int * | g_pos_one, | ||
| NumericT * | g_left_mult, | ||
| NumericT * | g_right_mult, | ||
| unsigned int * | g_left_count_mult, | ||
| unsigned int * | g_right_count_mult, | ||
| unsigned int * | g_blocks_mult, | ||
| unsigned int * | g_blocks_mult_sum | ||
| ) |
Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix g_d diagonal elements in global memory g_s superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed and equals 0 n size of matrix lg lower bound of input interval (e.g. Gerschgorin interval) ug upper bound of input interval (e.g. Gerschgorin interval) lg_eig_count number of eigenvalues that are smaller than lg lu_eig_count number of eigenvalues that are smaller than lu epsilon desired accuracy of eigenvalues to compute.
Definition at line 536 of file bisect_kernel_large.hpp.
| __global__ void viennacl::linalg::cuda::bisectKernelLarge_MultIntervals | ( | const NumericT * | g_d, |
| const NumericT * | g_s, | ||
| const unsigned int | n, | ||
| unsigned int * | blocks_mult, | ||
| unsigned int * | blocks_mult_sum, | ||
| NumericT * | g_left, | ||
| NumericT * | g_right, | ||
| unsigned int * | g_left_count, | ||
| unsigned int * | g_right_count, | ||
| NumericT * | g_lambda, | ||
| unsigned int * | g_pos, | ||
| NumericT | precision | ||
| ) |
Perform second step of bisection algorithm for large matrices for intervals that after the first step contained more than one eigenvalue
| g_d | diagonal elements of symmetric, tridiagonal matrix |
| g_s | superdiagonal elements of symmetric, tridiagonal matrix |
| n | matrix size |
| blocks_mult | start addresses of blocks of intervals that are processed by one block of threads, each of the intervals contains more than one eigenvalue |
| blocks_mult_sum | total number of eigenvalues / singleton intervals in one block of intervals |
| g_left | left limits of intervals |
| g_right | right limits of intervals |
| g_left_count | number of eigenvalues less than left limits |
| g_right_count | number of eigenvalues less than right limits |
| g_lambda | final eigenvalue |
| g_pos | index of eigenvalue (in ascending order) |
| precision | desired precision of eigenvalues |
Definition at line 68 of file bisect_kernel_large_multi.hpp.
| __global__ void viennacl::linalg::cuda::bisectKernelLarge_OneIntervals | ( | const NumericT * | g_d, |
| const NumericT * | g_s, | ||
| const unsigned int | n, | ||
| unsigned int | num_intervals, | ||
| NumericT * | g_left, | ||
| NumericT * | g_right, | ||
| unsigned int * | g_pos, | ||
| NumericT | precision | ||
| ) |
Determine eigenvalues for large matrices for intervals that after the first step contained one eigenvalue
| g_d | diagonal elements of symmetric, tridiagonal matrix |
| g_s | superdiagonal elements of symmetric, tridiagonal matrix |
| n | matrix size |
| num_intervals | total number of intervals containing one eigenvalue after the first step |
| g_left | left interval limits |
| g_right | right interval limits |
| g_pos | index of interval / number of intervals that are smaller than right interval limit |
| precision | desired precision of eigenvalues |
Definition at line 59 of file bisect_kernel_large_onei.hpp.
| __global__ void viennacl::linalg::cuda::bisectKernelSmall | ( | const NumericT * | g_d, |
| const NumericT * | g_s, | ||
| const unsigned int | n, | ||
| NumericT * | g_left, | ||
| NumericT * | g_right, | ||
| unsigned int * | g_left_count, | ||
| unsigned int * | g_right_count, | ||
| const NumericT | lg, | ||
| const NumericT | ug, | ||
| const unsigned int | lg_eig_count, | ||
| const unsigned int | ug_eig_count, | ||
| NumericT | epsilon | ||
| ) |
Bisection to find eigenvalues of a real, symmetric, and tridiagonal matrix.
| g_d | diagonal elements in global memory |
| g_s | superdiagonal elements in global elements (stored so that the element *(g_s - 1) can be accessed an equals 0 |
| n | size of matrix |
| g_left | helper array |
| g_right | helper array |
| g_left_count | helper array |
| g_right_count | helper array |
| lg | lower bound of input interval (e.g. Gerschgorin interval) |
| ug | upper bound of input interval (e.g. Gerschgorin interval) |
| lg_eig_count | number of eigenvalues that are smaller than lg |
| ug_eig_count | number of eigenvalues that are smaller than lu |
| epsilon | desired accuracy of eigenvalues to compute |
Definition at line 61 of file bisect_kernel_small.hpp.
| void viennacl::linalg::cuda::bisectLarge | ( | const viennacl::linalg::detail::InputData< NumericT > & | input, |
| viennacl::linalg::detail::ResultDataLarge< NumericT > & | result, | ||
| const unsigned int | mat_size, | ||
| const NumericT | lg, | ||
| const NumericT | ug, | ||
| const NumericT | precision | ||
| ) |
Definition at line 69 of file bisect_kernel_calls.hpp.
| void viennacl::linalg::cuda::bisectLarge_MultIntervals | ( | const viennacl::linalg::detail::InputData< NumericT > & | input, |
| viennacl::linalg::detail::ResultDataLarge< NumericT > & | result, | ||
| const unsigned int | mat_size, | ||
| const NumericT | precision | ||
| ) |
Definition at line 130 of file bisect_kernel_calls.hpp.
| void viennacl::linalg::cuda::bisectLarge_OneIntervals | ( | const viennacl::linalg::detail::InputData< NumericT > & | input, |
| viennacl::linalg::detail::ResultDataLarge< NumericT > & | result, | ||
| const unsigned int | mat_size, | ||
| const NumericT | precision | ||
| ) |
Definition at line 101 of file bisect_kernel_calls.hpp.
| void viennacl::linalg::cuda::bisectSmall | ( | const viennacl::linalg::detail::InputData< NumericT > & | input, |
| viennacl::linalg::detail::ResultDataSmall< NumericT > & | result, | ||
| const unsigned int | mat_size, | ||
| const NumericT | lg, | ||
| const NumericT | ug, | ||
| const NumericT | precision | ||
| ) |
Definition at line 43 of file bisect_kernel_calls.hpp.
| void viennacl::linalg::cuda::bluestein | ( | viennacl::vector< NumericT, AlignmentV > & | in, |
| viennacl::vector< NumericT, AlignmentV > & | out, | ||
| vcl_size_t | |||
| ) |
Bluestein's algorithm for computing Fourier transformation.
Currently, Works only for sizes of input data which less than 2^16. Uses a lot of additional memory, but should be fast for any size of data. Serial implementation has something about o(n * lg n) complexity
Definition at line 621 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::bluestein_post | ( | Numeric2T * | Z, |
| Numeric2T * | out, | ||
| unsigned int | size, | ||
| NumericT | sign | ||
| ) |
Definition at line 537 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::bluestein_pre | ( | Numeric2T * | input, |
| Numeric2T * | A, | ||
| Numeric2T * | B, | ||
| unsigned int | size, | ||
| unsigned int | ext_size, | ||
| NumericT | sign | ||
| ) |
Definition at line 563 of file fft_operations.hpp.
|
inline |
Compute the next higher power of two of n
| n | number for which next higher power of two is seeked |
Definition at line 66 of file bisect_util.hpp.
| __device__ void viennacl::linalg::cuda::col_reduce_lcl_array | ( | T * | sums, |
| uint | th_Idx, | ||
| uint | bl_Dim | ||
| ) |
Definition at line 1651 of file matrix_operations_col.hpp.
| __device__ void viennacl::linalg::cuda::compactIntervals | ( | NumericT * | s_left, |
| NumericT * | s_right, | ||
| T * | s_left_count, | ||
| T * | s_right_count, | ||
| NumericT | mid, | ||
| NumericT | right, | ||
| unsigned int | mid_count, | ||
| unsigned int | right_count, | ||
| T * | s_compaction_list, | ||
| unsigned int | num_threads_active, | ||
| unsigned int | is_active_second | ||
| ) |
Perform stream compaction for second child intervals.
| s_left | shared memory storage for left interval limits |
| s_right | shared memory storage for right interval limits |
| s_left_count | shared memory storage for number of eigenvalues less than left interval limits |
| s_right_count | shared memory storage for number of eigenvalues less than right interval limits |
| mid | midpoint of current interval (left of new interval) |
| right | upper limit of interval |
| mid_count | eigenvalues less than mid |
| right_count | eigenvalues less than right |
| s_compaction_list | list containing the indices where the data has to be stored |
| num_threads_active | number of active threads / intervals |
| is_active_second | mark is thread has a second non-empty child interval |
Definition at line 440 of file bisect_util.hpp.
| __device__ void viennacl::linalg::cuda::compactStreamsFinal | ( | const unsigned int | tid, |
| const unsigned int | tid_2, | ||
| const unsigned int | num_threads_active, | ||
| unsigned int & | offset_mult_lambda, | ||
| NumericT * | s_left, | ||
| NumericT * | s_right, | ||
| unsigned short * | s_left_count, | ||
| unsigned short * | s_right_count, | ||
| unsigned short * | s_cl_one, | ||
| unsigned short * | s_cl_mult, | ||
| unsigned short * | s_cl_blocking, | ||
| unsigned short * | s_cl_helper, | ||
| unsigned int | is_one_lambda, | ||
| unsigned int | is_one_lambda_2, | ||
| NumericT & | left, | ||
| NumericT & | right, | ||
| NumericT & | left_2, | ||
| NumericT & | right_2, | ||
| unsigned int & | left_count, | ||
| unsigned int & | right_count, | ||
| unsigned int & | left_count_2, | ||
| unsigned int & | right_count_2, | ||
| unsigned int | c_block_iend, | ||
| unsigned int | c_sum_block, | ||
| unsigned int | c_block_iend_2, | ||
| unsigned int | c_sum_block_2 | ||
| ) |
Perform final stream compaction before writing data to global memory.
Definition at line 134 of file bisect_kernel_large.hpp.
| __global__ void viennacl::linalg::cuda::complex_to_real | ( | const ComplexT * | in, |
| RealT * | out, | ||
| unsigned int | size | ||
| ) |
Definition at line 808 of file fft_operations.hpp.
| void viennacl::linalg::cuda::complex_to_real | ( | viennacl::vector_base< NumericT > const & | in, |
| viennacl::vector_base< NumericT > & | out, | ||
| vcl_size_t | size | ||
| ) |
Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part)
Definition at line 818 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::compressed_compressed_matrix_vec_mul_kernel | ( | const unsigned int * | row_jumper, |
| const unsigned int * | row_indices, | ||
| const unsigned int * | column_indices, | ||
| const NumericT * | elements, | ||
| unsigned int | nonzero_rows, | ||
| const NumericT * | x, | ||
| unsigned int | start_x, | ||
| unsigned int | inc_x, | ||
| NumericT * | result, | ||
| unsigned int | start_result, | ||
| unsigned int | inc_result, | ||
| unsigned int | size_result | ||
| ) |
Definition at line 863 of file sparse_matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::compressed_matrix_d_mat_mul_kernel | ( | const unsigned int * | sp_mat_row_indices, |
| const unsigned int * | sp_mat_col_indices, | ||
| const NumericT * | sp_mat_elements, | ||
| const NumericT * | d_mat, | ||
| unsigned int | d_mat_row_start, | ||
| unsigned int | d_mat_col_start, | ||
| unsigned int | d_mat_row_inc, | ||
| unsigned int | d_mat_col_inc, | ||
| unsigned int | d_mat_row_size, | ||
| unsigned int | d_mat_col_size, | ||
| unsigned int | d_mat_internal_rows, | ||
| unsigned int | d_mat_internal_cols, | ||
| NumericT * | result, | ||
| unsigned int | result_row_start, | ||
| unsigned int | result_col_start, | ||
| unsigned int | result_row_inc, | ||
| unsigned int | result_col_inc, | ||
| unsigned int | result_row_size, | ||
| unsigned int | result_col_size, | ||
| unsigned int | result_internal_rows, | ||
| unsigned int | result_internal_cols | ||
| ) |
Definition at line 277 of file sparse_matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::compressed_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | sp_mat_row_indices, |
| const unsigned int * | sp_mat_col_indices, | ||
| const NumericT * | sp_mat_elements, | ||
| const NumericT * | d_mat, | ||
| unsigned int | d_mat_row_start, | ||
| unsigned int | d_mat_col_start, | ||
| unsigned int | d_mat_row_inc, | ||
| unsigned int | d_mat_col_inc, | ||
| unsigned int | d_mat_row_size, | ||
| unsigned int | d_mat_col_size, | ||
| unsigned int | d_mat_internal_rows, | ||
| unsigned int | d_mat_internal_cols, | ||
| NumericT * | result, | ||
| unsigned int | result_row_start, | ||
| unsigned int | result_col_start, | ||
| unsigned int | result_row_inc, | ||
| unsigned int | result_col_inc, | ||
| unsigned int | result_row_size, | ||
| unsigned int | result_col_size, | ||
| unsigned int | result_internal_rows, | ||
| unsigned int | result_internal_cols | ||
| ) |
Definition at line 431 of file sparse_matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::compressed_matrix_diagonal_kernel | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const NumericT * | elements, | ||
| NumericT * | result, | ||
| unsigned int | size | ||
| ) |
Definition at line 593 of file sparse_matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::compressed_matrix_vec_mul_adaptive_kernel | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const unsigned int * | row_blocks, | ||
| const NumericT * | elements, | ||
| unsigned int | num_blocks, | ||
| const NumericT * | x, | ||
| unsigned int | start_x, | ||
| unsigned int | inc_x, | ||
| NumericT * | result, | ||
| unsigned int | start_result, | ||
| unsigned int | inc_result, | ||
| unsigned int | size_result | ||
| ) |
Definition at line 146 of file sparse_matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::compressed_matrix_vec_mul_kernel | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const NumericT * | elements, | ||
| const NumericT * | x, | ||
| unsigned int | start_x, | ||
| unsigned int | inc_x, | ||
| NumericT * | result, | ||
| unsigned int | start_result, | ||
| unsigned int | inc_result, | ||
| unsigned int | size_result | ||
| ) |
Definition at line 118 of file sparse_matrix_operations.hpp.
|
inline |
Compute midpoint of interval [left, right] avoiding overflow if possible
| left | left / lower limit of interval |
| right | right / upper limit of interval |
Definition at line 89 of file bisect_util.hpp.
|
inline |
Compute number of eigenvalues that are smaller than x given a symmetric, real, and tridiagonal matrix
| g_d | diagonal elements stored in global memory |
| g_s | superdiagonal elements stored in global memory |
| n | size of matrix |
| x | value for which the number of eigenvalues that are smaller is seeked |
| tid | thread identified (e.g. threadIdx.x or gtid) |
| num_intervals_active | number of active intervals / threads that currently process an interval |
| s_d | scratch space to store diagonal entries of the tridiagonal matrix in shared memory |
| s_s | scratch space to store superdiagonal entries of the tridiagonal matrix in shared memory |
| converged | flag if the current thread is already converged (that is count does not have to be computed) |
Definition at line 177 of file bisect_util.hpp.
|
inline |
Compute number of eigenvalues that are smaller than x given a symmetric, real, and tridiagonal matrix
| g_d | diagonal elements stored in global memory |
| g_s | superdiagonal elements stored in global memory |
| n | size of matrix |
| x | value for which the number of eigenvalues that are smaller is seeked |
| tid | thread identified (e.g. threadIdx.x or gtid) |
| num_intervals_active | number of active intervals / threads that currently process an interval |
| s_d | scratch space to store diagonal entries of the tridiagonal matrix in shared memory |
| s_s | scratch space to store superdiagonal entries of the tridiagonal matrix in shared memory |
| converged | flag if the current thread is already converged (that is count does not have to be computed) |
Definition at line 237 of file bisect_util.hpp.
| __global__ void viennacl::linalg::cuda::coordinate_matrix_d_mat_mul_kernel | ( | const unsigned int * | coords, |
| const NumericT * | elements, | ||
| const unsigned int * | group_boundaries, | ||
| const NumericT * | d_mat, | ||
| unsigned int | d_mat_row_start, | ||
| unsigned int | d_mat_col_start, | ||
| unsigned int | d_mat_row_inc, | ||
| unsigned int | d_mat_col_inc, | ||
| unsigned int | d_mat_row_size, | ||
| unsigned int | d_mat_col_size, | ||
| unsigned int | d_mat_internal_rows, | ||
| unsigned int | d_mat_internal_cols, | ||
| NumericT * | result, | ||
| unsigned int | result_row_start, | ||
| unsigned int | result_col_start, | ||
| unsigned int | result_row_inc, | ||
| unsigned int | result_col_inc, | ||
| unsigned int | result_row_size, | ||
| unsigned int | result_col_size, | ||
| unsigned int | result_internal_rows, | ||
| unsigned int | result_internal_cols | ||
| ) |
Definition at line 1180 of file sparse_matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::coordinate_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | coords, |
| const NumericT * | elements, | ||
| const unsigned int * | group_boundaries, | ||
| const NumericT * | d_mat, | ||
| unsigned int | d_mat_row_start, | ||
| unsigned int | d_mat_col_start, | ||
| unsigned int | d_mat_row_inc, | ||
| unsigned int | d_mat_col_inc, | ||
| unsigned int | d_mat_row_size, | ||
| unsigned int | d_mat_col_size, | ||
| unsigned int | d_mat_internal_rows, | ||
| unsigned int | d_mat_internal_cols, | ||
| NumericT * | result, | ||
| unsigned int | result_row_start, | ||
| unsigned int | result_col_start, | ||
| unsigned int | result_row_inc, | ||
| unsigned int | result_col_inc, | ||
| unsigned int | result_row_size, | ||
| unsigned int | result_col_size, | ||
| unsigned int | result_internal_rows, | ||
| unsigned int | result_internal_cols | ||
| ) |
Definition at line 1375 of file sparse_matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::coordinate_matrix_vec_mul_kernel | ( | const unsigned int * | coords, |
| const NumericT * | elements, | ||
| const unsigned int * | group_boundaries, | ||
| const NumericT * | x, | ||
| unsigned int | start_x, | ||
| unsigned int | inc_x, | ||
| NumericT * | result, | ||
| unsigned int | start_result, | ||
| unsigned int | inc_result | ||
| ) |
Definition at line 1080 of file sparse_matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::copy_col_column_major_kernel | ( | T * | A, |
| T * | V, | ||
| uint | row_start, | ||
| uint | col_start, | ||
| uint | size, | ||
| uint | stride | ||
| ) |
Definition at line 1477 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::copy_col_row_major_kernel | ( | T * | A, |
| T * | V, | ||
| uint | row_start, | ||
| uint | col_start, | ||
| uint | size, | ||
| uint | stride | ||
| ) |
Definition at line 1459 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::copy_row_column_major_kernel | ( | T * | A, |
| T * | V, | ||
| uint | row_start, | ||
| uint | col_start, | ||
| uint | size, | ||
| uint | stride | ||
| ) |
Definition at line 1514 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::copy_row_row_major_kernel | ( | T * | A, |
| T * | V, | ||
| uint | row_start, | ||
| uint | col_start, | ||
| uint | size, | ||
| uint | stride | ||
| ) |
Definition at line 1495 of file matrix_operations_col.hpp.
| void viennacl::linalg::cuda::copy_vec | ( | matrix_base< NumericT > & | A, |
| vector_base< NumericT > & | V, | ||
| vcl_size_t | row_start, | ||
| vcl_size_t | col_start, | ||
| bool | copy_col | ||
| ) |
This function copies a row or a column from a matrix to a vector.
| A | The matrix where to copy from. |
| V | The vector to fill with data. |
| row_start | The number of the first row to copy. |
| col_start | The number of the first column to copy. |
| copy_col | Set to TRUE to copy a column, FALSE to copy a row. |
Definition at line 2527 of file matrix_operations.hpp.
| __device__ void viennacl::linalg::cuda::createIndicesCompaction | ( | T * | s_compaction_list_exc, |
| unsigned int | num_threads_compaction | ||
| ) |
Create indices for compaction, that is process s_compaction_list_exc which is 1 for intervals that generated a second child and 0 otherwise and create for each of the non-zero elements the index where the new interval belongs to in a compact representation of all generated second childs
| s_compaction_list_exc | list containing the flags which threads generated two childs |
| num_threads_compaction | number of threads to employ for compaction |
Definition at line 373 of file bisect_util.hpp.
| __global__ void viennacl::linalg::cuda::csr_block_trans_lu_backward | ( | const unsigned int * | row_jumper_U, |
| const unsigned int * | column_indices_U, | ||
| const NumericT * | elements_U, | ||
| const NumericT * | diagonal_U, | ||
| const unsigned int * | block_offsets, | ||
| NumericT * | result, | ||
| unsigned int | size | ||
| ) |
Definition at line 700 of file sparse_matrix_operations_solve.hpp.
| __global__ void viennacl::linalg::cuda::csr_block_trans_unit_lu_forward | ( | const unsigned int * | row_jumper_L, |
| const unsigned int * | column_indices_L, | ||
| const NumericT * | elements_L, | ||
| const unsigned int * | block_offsets, | ||
| NumericT * | result, | ||
| unsigned int | size | ||
| ) |
Definition at line 668 of file sparse_matrix_operations_solve.hpp.
| __global__ void viennacl::linalg::cuda::csr_lu_backward_kernel | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const NumericT * | elements, | ||
| NumericT * | vector, | ||
| unsigned int | size | ||
| ) |
Definition at line 257 of file sparse_matrix_operations_solve.hpp.
| __global__ void viennacl::linalg::cuda::csr_lu_forward_kernel | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const NumericT * | elements, | ||
| NumericT * | vector, | ||
| unsigned int | size | ||
| ) |
Definition at line 110 of file sparse_matrix_operations_solve.hpp.
| __global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const NumericT * | elements, | ||
| const NumericT * | diagonal_entries, | ||
| NumericT * | vector, | ||
| unsigned int | size | ||
| ) |
Definition at line 597 of file sparse_matrix_operations_solve.hpp.
| __global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel2 | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const NumericT * | elements, | ||
| const NumericT * | diagonal_entries, | ||
| NumericT * | vector, | ||
| unsigned int | size | ||
| ) |
Definition at line 563 of file sparse_matrix_operations_solve.hpp.
| __global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const NumericT * | elements, | ||
| const NumericT * | diagonal_entries, | ||
| NumericT * | vector, | ||
| unsigned int | size | ||
| ) |
Definition at line 429 of file sparse_matrix_operations_solve.hpp.
| __global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel2 | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const NumericT * | elements, | ||
| NumericT * | vector, | ||
| unsigned int | size | ||
| ) |
Definition at line 342 of file sparse_matrix_operations_solve.hpp.
| __global__ void viennacl::linalg::cuda::csr_trans_unit_lu_backward_kernel | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const NumericT * | elements, | ||
| NumericT * | vector, | ||
| unsigned int | size | ||
| ) |
Definition at line 497 of file sparse_matrix_operations_solve.hpp.
| __global__ void viennacl::linalg::cuda::csr_trans_unit_lu_forward_kernel | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const NumericT * | elements, | ||
| NumericT * | vector, | ||
| unsigned int | size | ||
| ) |
Definition at line 367 of file sparse_matrix_operations_solve.hpp.
| __global__ void viennacl::linalg::cuda::csr_unit_lu_backward_kernel | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const NumericT * | elements, | ||
| NumericT * | vector, | ||
| unsigned int | size | ||
| ) |
Definition at line 180 of file sparse_matrix_operations_solve.hpp.
| __global__ void viennacl::linalg::cuda::csr_unit_lu_forward_kernel | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const NumericT * | elements, | ||
| NumericT * | vector, | ||
| unsigned int | size | ||
| ) |
Definition at line 42 of file sparse_matrix_operations_solve.hpp.
| __device__ NumericT viennacl::linalg::cuda::cuda_abs | ( | NumericT | val | ) |
Definition at line 2893 of file vector_operations.hpp.
|
inline |
Definition at line 2894 of file vector_operations.hpp.
|
inline |
Definition at line 2895 of file vector_operations.hpp.
|
inline |
Definition at line 2896 of file vector_operations.hpp.
|
inline |
Definition at line 2897 of file vector_operations.hpp.
| void viennacl::linalg::cuda::direct | ( | viennacl::vector< NumericT, AlignmentV > const & | in, |
| viennacl::vector< NumericT, AlignmentV > & | out, | ||
| vcl_size_t | size, | ||
| vcl_size_t | stride, | ||
| vcl_size_t | batch_num, | ||
| NumericT | sign = NumericT(-1), |
||
| viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
| ) |
Direct 1D algorithm for computing Fourier transformation.
Works on any sizes of data. Serial implementation has o(n^2) complexity
Definition at line 196 of file fft_operations.hpp.
| void viennacl::linalg::cuda::direct | ( | viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const & | in, |
| viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > & | out, | ||
| vcl_size_t | size, | ||
| vcl_size_t | stride, | ||
| vcl_size_t | batch_num, | ||
| NumericT | sign = NumericT(-1), |
||
| viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
| ) |
Direct 2D algorithm for computing Fourier transformation.
Works on any sizes of data. Serial implementation has o(n^2) complexity
Definition at line 221 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::el_wise_mul_div | ( | NumericT * | matrix1, |
| NumericT const * | matrix2, | ||
| NumericT const * | matrix3, | ||
| unsigned int | size | ||
| ) |
Main CUDA kernel for nonnegative matrix factorization of a dense matrices.
Definition at line 38 of file nmf_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT, SizeT > & | A, |
| matrix_expression< const matrix_base< NumericT, SizeT >, const matrix_base< NumericT, SizeT >, op_element_binary< OpT > > const & | proxy | ||
| ) |
Definition at line 511 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< float, SizeT > & | A, |
| matrix_expression< const matrix_base< float, SizeT >, const matrix_base< float, SizeT >, op_element_binary< OpT > > const & | proxy | ||
| ) |
Definition at line 571 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< double, SizeT > & | A, |
| matrix_expression< const matrix_base< double, SizeT >, const matrix_base< double, SizeT >, op_element_binary< OpT > > const & | proxy | ||
| ) |
Definition at line 631 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_abs > > const & | proxy | ||
| ) |
Definition at line 699 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_acos > > const & | proxy | ||
| ) |
Definition at line 741 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_asin > > const & | proxy | ||
| ) |
Definition at line 783 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_atan > > const & | proxy | ||
| ) |
Definition at line 825 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_ceil > > const & | proxy | ||
| ) |
Definition at line 867 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cos > > const & | proxy | ||
| ) |
Definition at line 909 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_binary< OpT > > const & | proxy | ||
| ) |
Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax)
| vec1 | The result vector (or -range, or -slice) |
| proxy | The proxy object holding v2, v3 and the operation |
Definition at line 933 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_cosh > > const & | proxy | ||
| ) |
Definition at line 951 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< float > & | vec1, |
| vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OpT > > const & | proxy | ||
| ) |
Definition at line 963 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< double > & | vec1, |
| vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OpT > > const & | proxy | ||
| ) |
Definition at line 993 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_exp > > const & | proxy | ||
| ) |
Definition at line 993 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_fabs > > const & | proxy | ||
| ) |
Definition at line 1035 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_acos > > const & | proxy | ||
| ) |
Definition at line 1038 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_asin > > const & | proxy | ||
| ) |
Definition at line 1065 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_floor > > const & | proxy | ||
| ) |
Definition at line 1077 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_atan > > const & | proxy | ||
| ) |
Definition at line 1093 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log > > const & | proxy | ||
| ) |
Definition at line 1119 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_ceil > > const & | proxy | ||
| ) |
Definition at line 1121 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cos > > const & | proxy | ||
| ) |
Definition at line 1149 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_log10 > > const & | proxy | ||
| ) |
Definition at line 1161 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_cosh > > const & | proxy | ||
| ) |
Definition at line 1177 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sin > > const & | proxy | ||
| ) |
Definition at line 1203 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_exp > > const & | proxy | ||
| ) |
Definition at line 1205 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_fabs > > const & | proxy | ||
| ) |
Definition at line 1233 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sinh > > const & | proxy | ||
| ) |
Definition at line 1245 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_abs > > const & | proxy | ||
| ) |
Definition at line 1260 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_sqrt > > const & | proxy | ||
| ) |
Definition at line 1287 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_floor > > const & | proxy | ||
| ) |
Definition at line 1289 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log > > const & | proxy | ||
| ) |
Definition at line 1317 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tan > > const & | proxy | ||
| ) |
Definition at line 1329 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_log10 > > const & | proxy | ||
| ) |
Definition at line 1345 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | matrix_base< NumericT > & | A, |
| matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_unary< op_tanh > > const & | proxy | ||
| ) |
Definition at line 1371 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sin > > const & | proxy | ||
| ) |
Definition at line 1373 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sinh > > const & | proxy | ||
| ) |
Definition at line 1401 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_sqrt > > const & | proxy | ||
| ) |
Definition at line 1429 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tan > > const & | proxy | ||
| ) |
Definition at line 1457 of file vector_operations.hpp.
| void viennacl::linalg::cuda::element_op | ( | vector_base< NumericT > & | vec1, |
| vector_expression< const vector_base< NumericT >, const vector_base< NumericT >, op_element_unary< op_tanh > > const & | proxy | ||
| ) |
Definition at line 1485 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::element_op_col_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2, | ||
| unsigned int | op_type | ||
| ) |
Definition at line 755 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::element_op_int_col_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2, | ||
| unsigned int | op_type | ||
| ) |
Definition at line 804 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::element_op_int_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| NumericT const * | vec3, | ||
| unsigned int | start3, | ||
| unsigned int | inc3, | ||
| unsigned int | op_type | ||
| ) |
Definition at line 891 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::element_op_int_row_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2, | ||
| unsigned int | op_type | ||
| ) |
Definition at line 835 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::element_op_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| NumericT const * | vec3, | ||
| unsigned int | start3, | ||
| unsigned int | inc3, | ||
| unsigned int | op_type | ||
| ) |
Definition at line 845 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::element_op_row_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| const NumericT * | C, | ||
| unsigned int | C_start1, | ||
| unsigned int | C_start2, | ||
| unsigned int | C_inc1, | ||
| unsigned int | C_inc2, | ||
| unsigned int | C_internal_size1, | ||
| unsigned int | C_internal_size2, | ||
| unsigned int | op_type | ||
| ) |
Definition at line 786 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::ell_matrix_d_mat_mul_kernel | ( | const unsigned int * | sp_mat_coords, |
| const NumericT * | sp_mat_elements, | ||
| unsigned int | sp_mat_row_num, | ||
| unsigned int | sp_mat_col_num, | ||
| unsigned int | sp_mat_internal_row_num, | ||
| unsigned int | sp_mat_items_per_row, | ||
| unsigned int | sp_mat_aligned_items_per_row, | ||
| const NumericT * | d_mat, | ||
| unsigned int | d_mat_row_start, | ||
| unsigned int | d_mat_col_start, | ||
| unsigned int | d_mat_row_inc, | ||
| unsigned int | d_mat_col_inc, | ||
| unsigned int | d_mat_row_size, | ||
| unsigned int | d_mat_col_size, | ||
| unsigned int | d_mat_internal_rows, | ||
| unsigned int | d_mat_internal_cols, | ||
| NumericT * | result, | ||
| unsigned int | result_row_start, | ||
| unsigned int | result_col_start, | ||
| unsigned int | result_row_inc, | ||
| unsigned int | result_col_inc, | ||
| unsigned int | result_row_size, | ||
| unsigned int | result_col_size, | ||
| unsigned int | result_internal_rows, | ||
| unsigned int | result_internal_cols | ||
| ) |
Definition at line 1645 of file sparse_matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::ell_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | sp_mat_coords, |
| const NumericT * | sp_mat_elements, | ||
| unsigned int | sp_mat_row_num, | ||
| unsigned int | sp_mat_col_num, | ||
| unsigned int | sp_mat_internal_row_num, | ||
| unsigned int | sp_mat_items_per_row, | ||
| unsigned int | sp_mat_aligned_items_per_row, | ||
| const NumericT * | d_mat, | ||
| unsigned int | d_mat_row_start, | ||
| unsigned int | d_mat_col_start, | ||
| unsigned int | d_mat_row_inc, | ||
| unsigned int | d_mat_col_inc, | ||
| unsigned int | d_mat_row_size, | ||
| unsigned int | d_mat_col_size, | ||
| unsigned int | d_mat_internal_rows, | ||
| unsigned int | d_mat_internal_cols, | ||
| NumericT * | result, | ||
| unsigned int | result_row_start, | ||
| unsigned int | result_col_start, | ||
| unsigned int | result_row_inc, | ||
| unsigned int | result_col_inc, | ||
| unsigned int | result_row_size, | ||
| unsigned int | result_col_size, | ||
| unsigned int | result_internal_rows, | ||
| unsigned int | result_internal_cols | ||
| ) |
Definition at line 1818 of file sparse_matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::ell_matrix_vec_mul_kernel | ( | const unsigned int * | coords, |
| const NumericT * | elements, | ||
| const NumericT * | x, | ||
| unsigned int | start_x, | ||
| unsigned int | inc_x, | ||
| NumericT * | result, | ||
| unsigned int | start_result, | ||
| unsigned int | inc_result, | ||
| unsigned int | row_num, | ||
| unsigned int | col_num, | ||
| unsigned int | internal_row_num, | ||
| unsigned int | items_per_row, | ||
| unsigned int | aligned_items_per_row | ||
| ) |
Definition at line 1575 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::exclusive_scan | ( | vector_base< NumericT, F > & | vec1, |
| vector_base< NumericT, F > & | vec2 | ||
| ) |
This function implements an exclusive scan.
| vec1 | Input vector: Gets overwritten by the routine. |
| vec2 | The output vector. |
Definition at line 2788 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::exclusive_scan_kernel_1 | ( | T * | X, |
| unsigned int | startX, | ||
| unsigned int | incX, | ||
| unsigned int | InputSize, | ||
| T * | Y, | ||
| unsigned int | startY, | ||
| unsigned int | incY, | ||
| T * | S, | ||
| unsigned int | startS, | ||
| unsigned int | incS | ||
| ) |
Definition at line 1868 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::fft_direct | ( | const Numeric2T * | input, |
| Numeric2T * | output, | ||
| unsigned int | size, | ||
| unsigned int | stride, | ||
| unsigned int | batch_num, | ||
| NumericT | sign, | ||
| bool | is_row_major | ||
| ) |
Definition at line 139 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::fft_div_vec_scalar | ( | Numeric2T * | input1, |
| unsigned int | size, | ||
| NumericT | factor | ||
| ) |
Definition at line 689 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::fft_mult_vec | ( | const NumericT * | input1, |
| const NumericT * | input2, | ||
| NumericT * | output, | ||
| unsigned int | size | ||
| ) |
Definition at line 656 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::fft_radix2 | ( | Numeric2T * | input, |
| unsigned int | s, | ||
| unsigned int | bit_size, | ||
| unsigned int | size, | ||
| unsigned int | stride, | ||
| unsigned int | batch_num, | ||
| NumericT | sign, | ||
| bool | is_row_major | ||
| ) |
Definition at line 370 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::fft_radix2_local | ( | Numeric2T * | input, |
| unsigned int | bit_size, | ||
| unsigned int | size, | ||
| unsigned int | stride, | ||
| unsigned int | batch_num, | ||
| NumericT | sign, | ||
| bool | is_row_major | ||
| ) |
Definition at line 297 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::fft_reorder | ( | NumericT * | input, |
| unsigned int | bit_size, | ||
| unsigned int | size, | ||
| unsigned int | stride, | ||
| unsigned int | batch_num, | ||
| bool | is_row_major | ||
| ) |
Definition at line 240 of file fft_operations.hpp.
|
inline |
Compute the next lower power of two of n
| n | number for which next higher power of two is seeked |
Definition at line 46 of file bisect_util.hpp.
|
inline |
Definition at line 127 of file fft_operations.hpp.
| void viennacl::linalg::cuda::givens_next | ( | matrix_base< NumericT > & | Q, |
| vector_base< NumericT > & | tmp1, | ||
| vector_base< NumericT > & | tmp2, | ||
| int | l, | ||
| int | m | ||
| ) |
This function updates the matrix Q. It is part of the tql2 algorithm.
| Q | The matrix to be updated. |
| tmp1 | Vector with data from the tql2 algorithm. |
| tmp2 | Vector with data from the tql2 algorithm. |
| l | Data from the tql2 algorithm. |
| m | Data from the tql2 algorithm. |
Definition at line 2695 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::givens_next_column_major_kernel | ( | T * | matr, |
| T * | cs, | ||
| T * | ss, | ||
| uint | size, | ||
| uint | stride, | ||
| uint | start_i, | ||
| uint | end_i | ||
| ) |
Definition at line 1771 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::givens_next_row_major_kernel | ( | T * | matr, |
| T * | cs, | ||
| T * | ss, | ||
| uint | size, | ||
| uint | stride, | ||
| uint | start_i, | ||
| uint | end_i | ||
| ) |
Definition at line 1724 of file matrix_operations_col.hpp.
| void viennacl::linalg::cuda::house_update_A_left | ( | matrix_base< NumericT > & | A, |
| vector_base< NumericT > & | D, | ||
| vcl_size_t | start | ||
| ) |
This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P.
| A | The matrix to be updated. |
| D | The normalized householder vector. |
| start | The repetition counter. |
Definition at line 2588 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::house_update_A_left_column_major_kernel | ( | T * | A, |
| T * | V, | ||
| uint | row_start, | ||
| uint | col_start, | ||
| uint | size1, | ||
| uint | size2, | ||
| uint | stride | ||
| ) |
Definition at line 1560 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::house_update_A_left_row_major_kernel | ( | T * | A, |
| T * | V, | ||
| uint | row_start, | ||
| uint | col_start, | ||
| uint | size1, | ||
| uint | size2, | ||
| uint | stride | ||
| ) |
Definition at line 1535 of file matrix_operations_col.hpp.
| void viennacl::linalg::cuda::house_update_A_right | ( | matrix_base< NumericT > & | A, |
| vector_base< NumericT > & | D | ||
| ) |
This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P.
| A | The matrix to be updated. |
| D | The normalized householder vector. |
Definition at line 2627 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::house_update_A_right_column_major_kernel | ( | T * | A, |
| T * | V, | ||
| uint | row_start, | ||
| uint | col_start, | ||
| uint | size1, | ||
| uint | size2, | ||
| uint | stride | ||
| ) |
Definition at line 1618 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::house_update_A_right_row_major_kernel | ( | T * | A, |
| T * | V, | ||
| uint | row_start, | ||
| uint | col_start, | ||
| uint | size1, | ||
| uint | size2, | ||
| uint | stride | ||
| ) |
Definition at line 1587 of file matrix_operations_col.hpp.
| void viennacl::linalg::cuda::house_update_QL | ( | matrix_base< NumericT > & | Q, |
| vector_base< NumericT > & | D, | ||
| vcl_size_t | A_size1 | ||
| ) |
This function updates the matrix Q, which is needed for the computation of the eigenvectors.
| Q | The matrix to be updated. |
| D | The householder vector. |
| A_size1 | size1 of matrix A |
Definition at line 2664 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::house_update_QL_column_major_kernel | ( | T * | QL, |
| T * | V, | ||
| uint | size1, | ||
| uint | strideQ | ||
| ) |
Definition at line 1696 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::house_update_QL_row_major_kernel | ( | T * | QL, |
| T * | V, | ||
| uint | size1, | ||
| uint | strideQ | ||
| ) |
Definition at line 1669 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::hyb_matrix_d_mat_mul_kernel | ( | const unsigned int * | ell_coords, |
| const NumericT * | ell_elements, | ||
| const unsigned int * | csr_rows, | ||
| const unsigned int * | csr_cols, | ||
| const NumericT * | csr_elements, | ||
| unsigned int | row_num, | ||
| unsigned int | internal_row_num, | ||
| unsigned int | items_per_row, | ||
| unsigned int | aligned_items_per_row, | ||
| const NumericT * | d_mat, | ||
| unsigned int | d_mat_row_start, | ||
| unsigned int | d_mat_col_start, | ||
| unsigned int | d_mat_row_inc, | ||
| unsigned int | d_mat_col_inc, | ||
| unsigned int | d_mat_row_size, | ||
| unsigned int | d_mat_col_size, | ||
| unsigned int | d_mat_internal_rows, | ||
| unsigned int | d_mat_internal_cols, | ||
| NumericT * | result, | ||
| unsigned int | result_row_start, | ||
| unsigned int | result_col_start, | ||
| unsigned int | result_row_inc, | ||
| unsigned int | result_col_inc, | ||
| unsigned int | result_row_size, | ||
| unsigned int | result_col_size, | ||
| unsigned int | result_internal_rows, | ||
| unsigned int | result_internal_cols | ||
| ) |
Definition at line 2158 of file sparse_matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::hyb_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | ell_coords, |
| const NumericT * | ell_elements, | ||
| const unsigned int * | csr_rows, | ||
| const unsigned int * | csr_cols, | ||
| const NumericT * | csr_elements, | ||
| unsigned int | row_num, | ||
| unsigned int | internal_row_num, | ||
| unsigned int | items_per_row, | ||
| unsigned int | aligned_items_per_row, | ||
| const NumericT * | d_mat, | ||
| unsigned int | d_mat_row_start, | ||
| unsigned int | d_mat_col_start, | ||
| unsigned int | d_mat_row_inc, | ||
| unsigned int | d_mat_col_inc, | ||
| unsigned int | d_mat_row_size, | ||
| unsigned int | d_mat_col_size, | ||
| unsigned int | d_mat_internal_rows, | ||
| unsigned int | d_mat_internal_cols, | ||
| NumericT * | result, | ||
| unsigned int | result_row_start, | ||
| unsigned int | result_col_start, | ||
| unsigned int | result_row_inc, | ||
| unsigned int | result_col_inc, | ||
| unsigned int | result_row_size, | ||
| unsigned int | result_col_size, | ||
| unsigned int | result_internal_rows, | ||
| unsigned int | result_internal_cols | ||
| ) |
Definition at line 2356 of file sparse_matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::hyb_matrix_vec_mul_kernel | ( | const unsigned int * | ell_coords, |
| const NumericT * | ell_elements, | ||
| const unsigned int * | csr_rows, | ||
| const unsigned int * | csr_cols, | ||
| const NumericT * | csr_elements, | ||
| const NumericT * | x, | ||
| unsigned int | start_x, | ||
| unsigned int | inc_x, | ||
| NumericT * | result, | ||
| unsigned int | start_result, | ||
| unsigned int | inc_result, | ||
| unsigned int | row_num, | ||
| unsigned int | internal_row_num, | ||
| unsigned int | items_per_row, | ||
| unsigned int | aligned_items_per_row | ||
| ) |
Definition at line 2074 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::inclusive_scan | ( | vector_base< NumericT > & | vec1, |
| vector_base< NumericT > & | vec2 | ||
| ) |
This function implements an inclusive scan.
| vec1 | Input vector: Gets overwritten by the routine. |
| vec2 | The output vector. |
Definition at line 2730 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::inclusive_scan_kernel_1 | ( | T * | X, |
| unsigned int | startX, | ||
| unsigned int | incX, | ||
| unsigned int | InputSize, | ||
| T * | Y, | ||
| unsigned int | startY, | ||
| unsigned int | incY, | ||
| T * | S, | ||
| unsigned int | startS, | ||
| unsigned int | incS | ||
| ) |
Definition at line 1822 of file matrix_operations_col.hpp.
| vcl_size_t viennacl::linalg::cuda::index_norm_inf | ( | vector_base< NumericT > const & | vec1 | ) |
Computes the index of the first entry that is equal to the supremum-norm in modulus.
| vec1 | The vector |
Definition at line 2955 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::index_norm_inf_kernel | ( | const NumericT * | vec, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| unsigned int * | result | ||
| ) |
Definition at line 2900 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::inner_prod_2_kernel | ( | const NumericT * | x, |
| unsigned int | startx, | ||
| unsigned int | stridex, | ||
| unsigned int | sizex, | ||
| const NumericT * | y0, | ||
| unsigned int | start0, | ||
| unsigned int | stride0, | ||
| const NumericT * | y1, | ||
| unsigned int | start1, | ||
| unsigned int | stride1, | ||
| NumericT * | group_results | ||
| ) |
Definition at line 1821 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::inner_prod_3_kernel | ( | const NumericT * | x, |
| unsigned int | startx, | ||
| unsigned int | stridex, | ||
| unsigned int | sizex, | ||
| const NumericT * | y0, | ||
| unsigned int | start0, | ||
| unsigned int | stride0, | ||
| const NumericT * | y1, | ||
| unsigned int | start1, | ||
| unsigned int | stride1, | ||
| const NumericT * | y2, | ||
| unsigned int | start2, | ||
| unsigned int | stride2, | ||
| NumericT * | group_results | ||
| ) |
Definition at line 1860 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::inner_prod_4_kernel | ( | const NumericT * | x, |
| unsigned int | startx, | ||
| unsigned int | stridex, | ||
| unsigned int | sizex, | ||
| const NumericT * | y0, | ||
| unsigned int | start0, | ||
| unsigned int | stride0, | ||
| const NumericT * | y1, | ||
| unsigned int | start1, | ||
| unsigned int | stride1, | ||
| const NumericT * | y2, | ||
| unsigned int | start2, | ||
| unsigned int | stride2, | ||
| const NumericT * | y3, | ||
| unsigned int | start3, | ||
| unsigned int | stride3, | ||
| NumericT * | group_results | ||
| ) |
Definition at line 1905 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::inner_prod_8_kernel | ( | const NumericT * | x, |
| unsigned int | startx, | ||
| unsigned int | stridex, | ||
| unsigned int | sizex, | ||
| const NumericT * | y0, | ||
| unsigned int | start0, | ||
| unsigned int | stride0, | ||
| const NumericT * | y1, | ||
| unsigned int | start1, | ||
| unsigned int | stride1, | ||
| const NumericT * | y2, | ||
| unsigned int | start2, | ||
| unsigned int | stride2, | ||
| const NumericT * | y3, | ||
| unsigned int | start3, | ||
| unsigned int | stride3, | ||
| const NumericT * | y4, | ||
| unsigned int | start4, | ||
| unsigned int | stride4, | ||
| const NumericT * | y5, | ||
| unsigned int | start5, | ||
| unsigned int | stride5, | ||
| const NumericT * | y6, | ||
| unsigned int | start6, | ||
| unsigned int | stride6, | ||
| const NumericT * | y7, | ||
| unsigned int | start7, | ||
| unsigned int | stride7, | ||
| NumericT * | group_results | ||
| ) |
Definition at line 1956 of file vector_operations.hpp.
| void viennacl::linalg::cuda::inner_prod_cpu | ( | vector_base< NumericT > const & | vec1, |
| vector_base< NumericT > const & | vec2, | ||
| NumericT & | result | ||
| ) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).
| vec1 | The first vector |
| vec2 | The second vector |
| result | The result scalar (on the host) |
Definition at line 1785 of file vector_operations.hpp.
| void viennacl::linalg::cuda::inner_prod_impl | ( | vector_base< NumericT > const & | vec1, |
| vector_base< NumericT > const & | vec2, | ||
| ScalarT & | result | ||
| ) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).
| vec1 | The first vector |
| vec2 | The second vector |
| result | The result scalar (on the gpu) |
Definition at line 1753 of file vector_operations.hpp.
| void viennacl::linalg::cuda::inner_prod_impl | ( | vector_base< NumericT > const & | x, |
| vector_tuple< NumericT > const & | vec_tuple, | ||
| vector_base< NumericT > & | result | ||
| ) |
Definition at line 2053 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::inner_prod_kernel | ( | const NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| unsigned int | size2, | ||
| NumericT * | group_buffer | ||
| ) |
Definition at line 1507 of file vector_operations.hpp.
| void viennacl::linalg::cuda::inplace_solve | ( | matrix_base< NumericT > const & | A, |
| matrix_base< NumericT > & | B, | ||
| SolverTagT | tag | ||
| ) |
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation).
| A | The system matrix |
| B | The matrix of row vectors, where the solution is directly written to |
| tag | Solver tag for identifying the respective triangular solver |
Definition at line 253 of file direct_solve.hpp.
| void viennacl::linalg::cuda::inplace_solve | ( | matrix_base< NumericT > const & | mat, |
| vector_base< NumericT > & | vec, | ||
| SolverTagT | |||
| ) |
Direct inplace solver for dense triangular systems (non-transposed version)
| mat | The system matrix proxy |
| vec | The load vector, where the solution is directly written to |
Definition at line 398 of file direct_solve.hpp.
| viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixT & | mat, |
| viennacl::vector_base< NumericT > & | vec, | ||
| viennacl::linalg::unit_lower_tag | |||
| ) |
Carries out triangular inplace solves.
| mat | The matrix |
| vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 627 of file sparse_matrix_operations.hpp.
| viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixT & | mat, |
| viennacl::vector_base< NumericT > & | vec, | ||
| viennacl::linalg::lower_tag | |||
| ) |
Carries out triangular inplace solves.
| mat | The matrix |
| vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 648 of file sparse_matrix_operations.hpp.
| viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixT & | mat, |
| viennacl::vector_base< NumericT > & | vec, | ||
| viennacl::linalg::unit_upper_tag | |||
| ) |
Carries out triangular inplace solves.
| mat | The matrix |
| vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 670 of file sparse_matrix_operations.hpp.
| viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixT & | mat, |
| viennacl::vector_base< NumericT > & | vec, | ||
| viennacl::linalg::upper_tag | |||
| ) |
Carries out triangular inplace solves.
| mat | The matrix |
| vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 691 of file sparse_matrix_operations.hpp.
| viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > & | mat, |
| viennacl::vector_base< NumericT > & | vec, | ||
| viennacl::linalg::unit_lower_tag | |||
| ) |
Carries out triangular inplace solves.
| mat | The matrix |
| vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 715 of file sparse_matrix_operations.hpp.
| viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > & | mat, |
| viennacl::vector_base< NumericT > & | vec, | ||
| viennacl::linalg::lower_tag | |||
| ) |
Carries out triangular inplace solves.
| mat | The matrix |
| vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 736 of file sparse_matrix_operations.hpp.
| viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > & | mat, |
| viennacl::vector_base< NumericT > & | vec, | ||
| viennacl::linalg::unit_upper_tag | |||
| ) |
Carries out triangular inplace solves.
| mat | The matrix |
| vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 767 of file sparse_matrix_operations.hpp.
| viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixT>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixT, const SparseMatrixT, op_trans > & | mat, |
| viennacl::vector_base< NumericT > & | vec, | ||
| viennacl::linalg::upper_tag | |||
| ) |
Carries out triangular inplace solves.
| mat | The matrix |
| vec | The vector holding the right hand side. Is overwritten by the solution. |
Definition at line 788 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::matrix_assign | ( | matrix_base< NumericT > & | mat, |
| NumericT | s, | ||
| bool | clear = false |
||
| ) |
Definition at line 279 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_assign_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | alpha | ||
| ) |
Definition at line 718 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_diagonal_assign_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | alpha | ||
| ) |
Definition at line 736 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_abs_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 851 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_acos_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 874 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_asin_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 897 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_atan_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 920 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_ceil_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 943 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_cos_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 966 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_cosh_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 989 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_exp_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1012 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_fabs_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1035 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_floor_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1058 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_log10_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1104 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_log_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1081 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_sin_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1127 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_sinh_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1150 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_sqrt_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1173 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_tan_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1196 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::matrix_col_element_tanh_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1219 of file matrix_operations_col.hpp.
| void viennacl::linalg::cuda::matrix_column | ( | const matrix_base< NumericT > & | mat, |
| unsigned int | j, | ||
| vector_base< NumericT > & | vec | ||
| ) |
Definition at line 472 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::matrix_diag_from_vector | ( | const vector_base< NumericT > & | vec, |
| int | k, | ||
| matrix_base< NumericT > & | mat | ||
| ) |
Definition at line 340 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::matrix_diag_to_vector | ( | matrix_base< NumericT > const & | mat, |
| int | k, | ||
| vector_base< NumericT > & | vec | ||
| ) |
Definition at line 392 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::matrix_diagonal_assign | ( | matrix_base< NumericT > & | mat, |
| NumericT | s | ||
| ) |
Definition at line 311 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_AA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 38 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_AT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 125 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_TA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 212 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_TT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 299 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_AA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 749 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_AT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 836 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_TA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 923 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_TT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 1010 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_AA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 1463 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_AT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 1550 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_TA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 1637 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_TT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 1724 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_AA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 2178 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_AT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 2265 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_TA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 2352 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_TT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 2439 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_lower_solve_kernel | ( | const NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| bool | row_major_A, | ||
| NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_size1, | ||
| unsigned int | B_size2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| bool | row_major_B, | ||
| bool | unit_diagonal | ||
| ) |
Definition at line 107 of file direct_solve.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_AA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 393 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_AT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 480 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_TA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 567 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_TT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 654 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_AA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 1104 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_AT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 1191 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_TA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 1278 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_TT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 1365 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_AA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 1819 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_AT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 1906 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_TA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 1993 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_TT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 2080 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_AA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 2535 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_AT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 2622 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_TA_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 2709 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_TT_kernel | ( | NumericT | alpha, |
| const NumericT * | A, | ||
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | B, | ||
| unsigned int | B_row_start, | ||
| unsigned int | B_col_start, | ||
| unsigned int | B_row_inc, | ||
| unsigned int | B_col_inc, | ||
| unsigned int | B_row_size, | ||
| unsigned int | B_col_size, | ||
| unsigned int | B_internal_rows, | ||
| unsigned int | B_internal_cols, | ||
| NumericT | beta, | ||
| NumericT * | C, | ||
| unsigned int | C_row_start, | ||
| unsigned int | C_col_start, | ||
| unsigned int | C_row_inc, | ||
| unsigned int | C_col_inc, | ||
| unsigned int | C_row_size, | ||
| unsigned int | C_col_size, | ||
| unsigned int | C_internal_rows, | ||
| unsigned int | C_internal_cols | ||
| ) |
Definition at line 2796 of file matrix_operations_prod.hpp.
| __global__ void viennacl::linalg::cuda::matrix_matrix_upper_solve_kernel | ( | const NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| bool | row_major_A, | ||
| NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_size1, | ||
| unsigned int | B_size2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| bool | row_major_B, | ||
| bool | unit_diagonal | ||
| ) |
Definition at line 41 of file direct_solve.hpp.
| void viennacl::linalg::cuda::matrix_row | ( | matrix_base< NumericT > const & | mat, |
| unsigned int | i, | ||
| vector_base< NumericT > & | vec | ||
| ) |
Definition at line 439 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_assign_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | alpha | ||
| ) |
Definition at line 749 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_diagonal_assign_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | alpha | ||
| ) |
Definition at line 767 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_abs_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 881 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_acos_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 904 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_asin_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 927 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_atan_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 950 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_ceil_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 973 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_cos_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 996 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_cosh_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1019 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_exp_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1042 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_fabs_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1065 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_floor_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1088 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_log10_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1134 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_log_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1111 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_sin_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1157 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_sinh_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1180 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_sqrt_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1203 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_tan_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1226 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::matrix_row_element_tanh_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_inc1, | ||
| unsigned int | B_inc2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2 | ||
| ) |
Definition at line 1249 of file matrix_operations_row.hpp.
| void viennacl::linalg::cuda::max_cpu | ( | vector_base< NumericT > const & | vec1, |
| NumericT & | result | ||
| ) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU.
| vec1 | The vector |
| result | The result host scalar |
Definition at line 2810 of file vector_operations.hpp.
| void viennacl::linalg::cuda::max_impl | ( | vector_base< NumericT > const & | vec1, |
| scalar< NumericT > & | result | ||
| ) |
Computes the maximum of a vector, both reduction stages run on the GPU.
| vec1 | The vector |
| result | The result GPU scalar |
Definition at line 2782 of file vector_operations.hpp.
| void viennacl::linalg::cuda::min_cpu | ( | vector_base< NumericT > const & | vec1, |
| NumericT & | result | ||
| ) |
Computes the maximum of a vector, first reduction stage on the GPU, second stage on the CPU.
| vec1 | The vector |
| result | The result host scalar |
Definition at line 2864 of file vector_operations.hpp.
| void viennacl::linalg::cuda::min_impl | ( | vector_base< NumericT > const & | vec1, |
| scalar< NumericT > & | result | ||
| ) |
Computes the maximum of a vector, both reduction stages run on the GPU.
| vec1 | The vector |
| result | The result GPU scalar |
Definition at line 2836 of file vector_operations.hpp.
| void viennacl::linalg::cuda::multiply_complex | ( | viennacl::vector< NumericT, AlignmentV > const & | input1, |
| viennacl::vector< NumericT, AlignmentV > const & | input2, | ||
| viennacl::vector< NumericT, AlignmentV > & | output | ||
| ) |
Mutiply two complex vectors and store result in output.
Definition at line 673 of file fft_operations.hpp.
| void viennacl::linalg::cuda::nmf | ( | viennacl::matrix_base< NumericT > const & | V, |
| viennacl::matrix_base< NumericT > & | W, | ||
| viennacl::matrix_base< NumericT > & | H, | ||
| viennacl::linalg::nmf_config const & | conf | ||
| ) |
The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized.
| V | Input matrix |
| W | First factor |
| H | Second factor |
| conf | A configuration object holding tolerances and the like |
Definition at line 59 of file nmf_operations.hpp.
| void viennacl::linalg::cuda::norm_1_cpu | ( | vector_base< NumericT > const & | vec1, |
| NumericT & | result | ||
| ) |
Computes the l^1-norm of a vector.
| vec1 | The vector |
| result | The result scalar |
Definition at line 2622 of file vector_operations.hpp.
| void viennacl::linalg::cuda::norm_1_impl | ( | vector_base< NumericT > const & | vec1, |
| scalar< NumericT > & | result | ||
| ) |
Computes the l^1-norm of a vector.
| vec1 | The vector |
| result | The result scalar |
Definition at line 2604 of file vector_operations.hpp.
| void viennacl::linalg::cuda::norm_2_cpu | ( | vector_base< NumericT > const & | vec1, |
| NumericT & | result | ||
| ) |
Computes the l^2-norm of a vector - implementation.
| vec1 | The vector |
| result | The result scalar |
Definition at line 2668 of file vector_operations.hpp.
| void viennacl::linalg::cuda::norm_2_impl | ( | vector_base< NumericT > const & | vec1, |
| scalar< NumericT > & | result | ||
| ) |
Computes the l^2-norm of a vector - implementation.
| vec1 | The vector |
| result | The result scalar |
Definition at line 2649 of file vector_operations.hpp.
| void viennacl::linalg::cuda::norm_inf_cpu | ( | vector_base< NumericT > const & | vec1, |
| NumericT & | result | ||
| ) |
Computes the supremum-norm of a vector.
| vec1 | The vector |
| result | The result scalar |
Definition at line 2716 of file vector_operations.hpp.
| void viennacl::linalg::cuda::norm_inf_impl | ( | vector_base< NumericT > const & | vec1, |
| scalar< NumericT > & | result | ||
| ) |
Computes the supremum-norm of a vector.
| vec1 | The vector |
| result | The result scalar |
Definition at line 2696 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::norm_kernel_floats | ( | const NumericT * | vec, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| unsigned int | norm_selector, | ||
| NumericT * | group_buffer | ||
| ) |
Definition at line 2252 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::norm_kernel_integers | ( | const NumericT * | vec, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| unsigned int | norm_selector, | ||
| NumericT * | group_buffer | ||
| ) |
Definition at line 2345 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::norm_kernel_unsigned_integers | ( | const NumericT * | vec, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| unsigned int | norm_selector, | ||
| NumericT * | group_buffer | ||
| ) |
Definition at line 2429 of file vector_operations.hpp.
| void viennacl::linalg::cuda::normalize | ( | viennacl::vector< NumericT, AlignmentV > & | input | ) |
Normalize vector on with his own size.
Definition at line 699 of file fft_operations.hpp.
|
inline |
Definition at line 97 of file fft_operations.hpp.
|
inline |
Definition at line 122 of file fft_operations.hpp.
|
inline |
Definition at line 79 of file fft_operations.hpp.
|
inline |
Definition at line 103 of file fft_operations.hpp.
|
inline |
Definition at line 85 of file fft_operations.hpp.
|
inline |
Definition at line 109 of file fft_operations.hpp.
|
inline |
Definition at line 91 of file fft_operations.hpp.
|
inline |
Definition at line 116 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_bicgstab_coo_vec_mul_kernel | ( | const unsigned int * | coords, |
| const NumericT * | elements, | ||
| const unsigned int * | group_boundaries, | ||
| const NumericT * | p, | ||
| NumericT * | Ap, | ||
| const NumericT * | r0star, | ||
| unsigned int | size, | ||
| NumericT * | inner_prod_buffer, | ||
| unsigned int | buffer_size, | ||
| unsigned int | buffer_offset | ||
| ) |
Definition at line 942 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_bicgstab_csr_vec_mul_kernel | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const unsigned int * | row_blocks, | ||
| const NumericT * | elements, | ||
| unsigned int | num_blocks, | ||
| const NumericT * | p, | ||
| NumericT * | Ap, | ||
| const NumericT * | r0star, | ||
| unsigned int | size, | ||
| NumericT * | inner_prod_buffer, | ||
| unsigned int | buffer_size, | ||
| unsigned int | buffer_offset | ||
| ) |
Definition at line 800 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_bicgstab_ell_vec_mul_kernel | ( | const unsigned int * | coords, |
| const NumericT * | elements, | ||
| unsigned int | internal_row_num, | ||
| unsigned int | items_per_row, | ||
| const NumericT * | p, | ||
| NumericT * | Ap, | ||
| const NumericT * | r0star, | ||
| unsigned int | size, | ||
| NumericT * | inner_prod_buffer, | ||
| unsigned int | buffer_size, | ||
| unsigned int | buffer_offset | ||
| ) |
Definition at line 1090 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_bicgstab_hyb_vec_mul_kernel | ( | const unsigned int * | ell_coords, |
| const NumericT * | ell_elements, | ||
| const unsigned int * | csr_rows, | ||
| const unsigned int * | csr_cols, | ||
| const NumericT * | csr_elements, | ||
| unsigned int | internal_row_num, | ||
| unsigned int | items_per_row, | ||
| const NumericT * | p, | ||
| NumericT * | Ap, | ||
| const NumericT * | r0star, | ||
| unsigned int | size, | ||
| NumericT * | inner_prod_buffer, | ||
| unsigned int | buffer_size, | ||
| unsigned int | buffer_offset | ||
| ) |
Definition at line 1287 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | compressed_matrix< NumericT > const & | A, |
| vector_base< NumericT > const & | p, | ||
| vector_base< NumericT > & | Ap, | ||
| vector_base< NumericT > const & | r0star, | ||
| vector_base< NumericT > & | inner_prod_buffer, | ||
| vcl_size_t | buffer_chunk_size, | ||
| vcl_size_t | buffer_chunk_offset | ||
| ) |
Definition at line 908 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | coordinate_matrix< NumericT > const & | A, |
| vector_base< NumericT > const & | p, | ||
| vector_base< NumericT > & | Ap, | ||
| vector_base< NumericT > const & | r0star, | ||
| vector_base< NumericT > & | inner_prod_buffer, | ||
| vcl_size_t | buffer_chunk_size, | ||
| vcl_size_t | buffer_chunk_offset | ||
| ) |
Definition at line 1056 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | ell_matrix< NumericT > const & | A, |
| vector_base< NumericT > const & | p, | ||
| vector_base< NumericT > & | Ap, | ||
| vector_base< NumericT > const & | r0star, | ||
| vector_base< NumericT > & | inner_prod_buffer, | ||
| vcl_size_t | buffer_chunk_size, | ||
| vcl_size_t | buffer_chunk_offset | ||
| ) |
Definition at line 1153 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | sliced_ell_matrix< NumericT > const & | A, |
| vector_base< NumericT > const & | p, | ||
| vector_base< NumericT > & | Ap, | ||
| vector_base< NumericT > const & | r0star, | ||
| vector_base< NumericT > & | inner_prod_buffer, | ||
| vcl_size_t | buffer_chunk_size, | ||
| vcl_size_t | buffer_chunk_offset | ||
| ) |
Definition at line 1254 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_bicgstab_prod | ( | hyb_matrix< NumericT > const & | A, |
| vector_base< NumericT > const & | p, | ||
| vector_base< NumericT > & | Ap, | ||
| vector_base< NumericT > const & | r0star, | ||
| vector_base< NumericT > & | inner_prod_buffer, | ||
| vcl_size_t | buffer_chunk_size, | ||
| vcl_size_t | buffer_chunk_offset | ||
| ) |
Definition at line 1363 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_bicgstab_sliced_ell_vec_mul_kernel | ( | const unsigned int * | columns_per_block, |
| const unsigned int * | column_indices, | ||
| const unsigned int * | block_start, | ||
| const NumericT * | elements, | ||
| const NumericT * | p, | ||
| NumericT * | Ap, | ||
| const NumericT * | r0star, | ||
| unsigned int | size, | ||
| NumericT * | inner_prod_buffer, | ||
| unsigned int | buffer_size, | ||
| unsigned int | buffer_offset | ||
| ) |
Definition at line 1185 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_bicgstab_update_s | ( | vector_base< NumericT > & | s, |
| vector_base< NumericT > & | r, | ||
| vector_base< NumericT > const & | Ap, | ||
| vector_base< NumericT > & | inner_prod_buffer, | ||
| vcl_size_t | buffer_chunk_size, | ||
| vcl_size_t | buffer_chunk_offset | ||
| ) |
Definition at line 695 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_bicgstab_update_s_kernel | ( | NumericT * | s, |
| NumericT const * | residual, | ||
| NumericT const * | Ap, | ||
| unsigned int | size, | ||
| NumericT * | inner_prod_buffer, | ||
| unsigned int | chunk_size, | ||
| unsigned int | chunk_offset | ||
| ) |
Definition at line 638 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_bicgstab_vector_kernel | ( | NumericT * | result, |
| NumericT | alpha, | ||
| NumericT * | p, | ||
| NumericT | omega, | ||
| NumericT const * | s, | ||
| NumericT * | residual, | ||
| NumericT const * | As, | ||
| NumericT | beta, | ||
| NumericT const * | Ap, | ||
| NumericT const * | r0star, | ||
| NumericT * | inner_prod_buffer, | ||
| unsigned int | size | ||
| ) |
Definition at line 717 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_bicgstab_vector_update | ( | vector_base< NumericT > & | result, |
| NumericT | alpha, | ||
| vector_base< NumericT > & | p, | ||
| NumericT | omega, | ||
| vector_base< NumericT > const & | s, | ||
| vector_base< NumericT > & | residual, | ||
| vector_base< NumericT > const & | As, | ||
| NumericT | beta, | ||
| vector_base< NumericT > const & | Ap, | ||
| vector_base< NumericT > const & | r0star, | ||
| vector_base< NumericT > & | inner_prod_buffer, | ||
| vcl_size_t | buffer_chunk_size | ||
| ) |
Definition at line 768 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_cg_coo_vec_mul_kernel | ( | const unsigned int * | coords, |
| const NumericT * | elements, | ||
| const unsigned int * | group_boundaries, | ||
| const NumericT * | p, | ||
| NumericT * | Ap, | ||
| unsigned int | size, | ||
| NumericT * | inner_prod_buffer, | ||
| unsigned int | buffer_size | ||
| ) |
Definition at line 241 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_cg_csr_vec_mul_kernel | ( | const unsigned int * | row_indices, |
| const unsigned int * | column_indices, | ||
| const unsigned int * | row_blocks, | ||
| const NumericT * | elements, | ||
| unsigned int | num_blocks, | ||
| const NumericT * | p, | ||
| NumericT * | Ap, | ||
| unsigned int | size, | ||
| NumericT * | inner_prod_buffer, | ||
| unsigned int | buffer_size | ||
| ) |
Definition at line 114 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_cg_ell_vec_mul_kernel | ( | const unsigned int * | coords, |
| const NumericT * | elements, | ||
| unsigned int | internal_row_num, | ||
| unsigned int | items_per_row, | ||
| const NumericT * | p, | ||
| NumericT * | Ap, | ||
| unsigned int | size, | ||
| NumericT * | inner_prod_buffer, | ||
| unsigned int | buffer_size | ||
| ) |
Definition at line 373 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_cg_hyb_vec_mul_kernel | ( | const unsigned int * | ell_coords, |
| const NumericT * | ell_elements, | ||
| const unsigned int * | csr_rows, | ||
| const unsigned int * | csr_cols, | ||
| const NumericT * | csr_elements, | ||
| unsigned int | internal_row_num, | ||
| unsigned int | items_per_row, | ||
| const NumericT * | p, | ||
| NumericT * | Ap, | ||
| unsigned int | size, | ||
| NumericT * | inner_prod_buffer, | ||
| unsigned int | buffer_size | ||
| ) |
Definition at line 542 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_cg_prod | ( | compressed_matrix< NumericT > const & | A, |
| vector_base< NumericT > const & | p, | ||
| vector_base< NumericT > & | Ap, | ||
| vector_base< NumericT > & | inner_prod_buffer | ||
| ) |
Definition at line 213 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_cg_prod | ( | coordinate_matrix< NumericT > const & | A, |
| vector_base< NumericT > const & | p, | ||
| vector_base< NumericT > & | Ap, | ||
| vector_base< NumericT > & | inner_prod_buffer | ||
| ) |
Definition at line 345 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_cg_prod | ( | ell_matrix< NumericT > const & | A, |
| vector_base< NumericT > const & | p, | ||
| vector_base< NumericT > & | Ap, | ||
| vector_base< NumericT > & | inner_prod_buffer | ||
| ) |
Definition at line 428 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_cg_prod | ( | sliced_ell_matrix< NumericT > const & | A, |
| vector_base< NumericT > const & | p, | ||
| vector_base< NumericT > & | Ap, | ||
| vector_base< NumericT > & | inner_prod_buffer | ||
| ) |
Definition at line 515 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_cg_prod | ( | hyb_matrix< NumericT > const & | A, |
| vector_base< NumericT > const & | p, | ||
| vector_base< NumericT > & | Ap, | ||
| vector_base< NumericT > & | inner_prod_buffer | ||
| ) |
Definition at line 610 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_cg_sliced_ell_vec_mul_kernel | ( | const unsigned int * | columns_per_block, |
| const unsigned int * | column_indices, | ||
| const unsigned int * | block_start, | ||
| const NumericT * | elements, | ||
| const NumericT * | p, | ||
| NumericT * | Ap, | ||
| unsigned int | size, | ||
| NumericT * | inner_prod_buffer, | ||
| unsigned int | buffer_size | ||
| ) |
Definition at line 454 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_cg_vector_kernel | ( | NumericT * | result, |
| NumericT | alpha, | ||
| NumericT * | p, | ||
| NumericT * | r, | ||
| NumericT const * | Ap, | ||
| NumericT | beta, | ||
| NumericT * | inner_prod_buffer, | ||
| unsigned int | size | ||
| ) |
Definition at line 44 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_cg_vector_update | ( | vector_base< NumericT > & | result, |
| NumericT | alpha, | ||
| vector_base< NumericT > & | p, | ||
| vector_base< NumericT > & | r, | ||
| vector_base< NumericT > const & | Ap, | ||
| NumericT | beta, | ||
| vector_base< NumericT > & | inner_prod_buffer | ||
| ) |
Definition at line 85 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage1 | ( | vector_base< T > const & | device_krylov_basis, |
| vcl_size_t | v_k_size, | ||
| vcl_size_t | v_k_internal_size, | ||
| vcl_size_t | param_k, | ||
| vector_base< T > & | vi_in_vk_buffer, | ||
| vcl_size_t | buffer_chunk_size | ||
| ) |
Definition at line 1540 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage1_kernel | ( | T const * | krylov_basis, |
| unsigned int | size, | ||
| unsigned int | internal_size, | ||
| unsigned int | k, | ||
| T * | vi_in_vk_buffer, | ||
| unsigned int | chunk_size | ||
| ) |
Definition at line 1488 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage2 | ( | vector_base< T > & | device_krylov_basis, |
| vcl_size_t | v_k_size, | ||
| vcl_size_t | v_k_internal_size, | ||
| vcl_size_t | param_k, | ||
| vector_base< T > const & | vi_in_vk_buffer, | ||
| vector_base< T > & | R_buffer, | ||
| vcl_size_t | krylov_dim, | ||
| vector_base< T > & | inner_prod_buffer, | ||
| vcl_size_t | buffer_chunk_size | ||
| ) |
Definition at line 1632 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_gmres_gram_schmidt_stage2_kernel | ( | T * | krylov_basis, |
| unsigned int | size, | ||
| unsigned int | internal_size, | ||
| unsigned int | k, | ||
| T const * | vi_in_vk_buffer, | ||
| unsigned int | chunk_size, | ||
| T * | R_buffer, | ||
| unsigned int | krylov_dim, | ||
| T * | inner_prod_buffer | ||
| ) |
Definition at line 1565 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_gmres_normalize_vk | ( | vector_base< T > & | v_k, |
| vector_base< T > const & | residual, | ||
| vector_base< T > & | R_buffer, | ||
| vcl_size_t | offset_in_R, | ||
| vector_base< T > const & | inner_prod_buffer, | ||
| vector_base< T > & | r_dot_vk_buffer, | ||
| vcl_size_t | buffer_chunk_size, | ||
| vcl_size_t | buffer_chunk_offset | ||
| ) |
Performs a vector normalization needed for an efficient pipelined GMRES algorithm.
This routines computes for vectors 'r', 'v_k': Second reduction step for ||v_k|| v_k /= ||v_k|| First reduction step for <r, v_k>
Definition at line 1457 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_gmres_normalize_vk_kernel | ( | T * | vk, |
| unsigned int | vk_offset, | ||
| T const * | residual, | ||
| T * | R_buffer, | ||
| unsigned int | R_offset, | ||
| T const * | inner_prod_buffer, | ||
| unsigned int | chunk_size, | ||
| T * | r_dot_vk_buffer, | ||
| unsigned int | chunk_offset, | ||
| unsigned int | size | ||
| ) |
Definition at line 1395 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_gmres_prod | ( | compressed_matrix< T > const & | A, |
| vector_base< T > const & | p, | ||
| vector_base< T > & | Ap, | ||
| vector_base< T > & | inner_prod_buffer | ||
| ) |
Definition at line 1709 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_gmres_prod | ( | coordinate_matrix< T > const & | A, |
| vector_base< T > const & | p, | ||
| vector_base< T > & | Ap, | ||
| vector_base< T > & | inner_prod_buffer | ||
| ) |
Definition at line 1731 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_gmres_prod | ( | ell_matrix< T > const & | A, |
| vector_base< T > const & | p, | ||
| vector_base< T > & | Ap, | ||
| vector_base< T > & | inner_prod_buffer | ||
| ) |
Definition at line 1753 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_gmres_prod | ( | sliced_ell_matrix< T > const & | A, |
| vector_base< T > const & | p, | ||
| vector_base< T > & | Ap, | ||
| vector_base< T > & | inner_prod_buffer | ||
| ) |
Definition at line 1774 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_gmres_prod | ( | hyb_matrix< T > const & | A, |
| vector_base< T > const & | p, | ||
| vector_base< T > & | Ap, | ||
| vector_base< T > & | inner_prod_buffer | ||
| ) |
Definition at line 1796 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::pipelined_gmres_update_result | ( | vector_base< T > & | result, |
| vector_base< T > const & | residual, | ||
| vector_base< T > const & | krylov_basis, | ||
| vcl_size_t | v_k_size, | ||
| vcl_size_t | v_k_internal_size, | ||
| vector_base< T > const & | coefficients, | ||
| vcl_size_t | param_k | ||
| ) |
Definition at line 1684 of file iterative_operations.hpp.
| __global__ void viennacl::linalg::cuda::pipelined_gmres_update_result_kernel | ( | T * | result, |
| T const * | residual, | ||
| T const * | krylov_basis, | ||
| unsigned int | size, | ||
| unsigned int | internal_size, | ||
| T const * | coefficients, | ||
| unsigned int | k | ||
| ) |
Definition at line 1664 of file iterative_operations.hpp.
| void viennacl::linalg::cuda::plane_rotation | ( | vector_base< NumericT > & | vec1, |
| vector_base< NumericT > & | vec2, | ||
| NumericT | alpha, | ||
| NumericT | beta | ||
| ) |
Computes a plane rotation of two vectors.
Computes (x,y) <- (alpha * x + beta * y, -beta * x + alpha * y)
| vec1 | The first vector |
| vec2 | The second vector |
| alpha | The first transformation coefficient |
| beta | The second transformation coefficient |
Definition at line 3015 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::plane_rotation_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| unsigned int | size2, | ||
| NumericT | alpha, | ||
| NumericT | beta | ||
| ) |
Definition at line 2979 of file vector_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_matrix< NumericT, AlignmentV > & | mat, |
| const viennacl::vector_base< NumericT > & | vec, | ||
| viennacl::vector_base< NumericT > & | result | ||
| ) |
Carries out matrix-vector multiplication with a compressed_matrix.
Implementation of the convenience expression result = prod(mat, vec);
| mat | The matrix |
| vec | The vector |
| result | The result vector |
Definition at line 225 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_matrix< NumericT, AlignmentV > & | sp_mat, |
| const viennacl::matrix_base< NumericT > & | d_mat, | ||
| viennacl::matrix_base< NumericT > & | result | ||
| ) |
Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed.
Implementation of the convenience expression result = prod(mat, vec);
| sp_mat | The sparse matrix |
| d_mat | The dense matrix |
| result | The result matrix |
Definition at line 339 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_matrix< NumericT, AlignmentV > & | sp_mat, |
| const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > & | d_mat, | ||
| viennacl::matrix_base< NumericT > & | result | ||
| ) |
Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed.
Implementation of the convenience expression result = prod(sp_mat, d_mat);
| sp_mat | The sparse matrix |
| d_mat | The transposed dense matrix proxy |
| result | The result matrix |
Definition at line 494 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_compressed_matrix< NumericT > & | mat, |
| const viennacl::vector_base< NumericT > & | vec, | ||
| viennacl::vector_base< NumericT > & | result | ||
| ) |
Carries out matrix-vector multiplication with a compressed_compressed_matrix.
Implementation of the convenience expression result = prod(mat, vec);
| mat | The matrix |
| vec | The vector |
| result | The result vector |
Definition at line 906 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const viennacl::coordinate_matrix< NumericT, AlignmentV > & | mat, |
| const viennacl::vector_base< NumericT > & | vec, | ||
| viennacl::vector_base< NumericT > & | result | ||
| ) |
Carries out matrix-vector multiplication with a coordinate_matrix.
Implementation of the convenience expression result = prod(mat, vec);
| mat | The matrix |
| vec | The vector |
| result | The result vector |
Definition at line 1157 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const viennacl::coordinate_matrix< NumericT, AlignmentV > & | sp_mat, |
| const viennacl::matrix_base< NumericT > & | d_mat, | ||
| viennacl::matrix_base< NumericT > & | result | ||
| ) |
Carries out Compressed Matrix(COO)-Dense Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, d_mat);
| sp_mat | The Sparse Matrix (Coordinate format) |
| d_mat | The Dense Matrix |
| result | The Result Matrix |
Definition at line 1283 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const matrix_base< NumericT > & | mat, |
| bool | mat_transpose, | ||
| const vector_base< NumericT > & | vec, | ||
| vector_base< NumericT > & | result | ||
| ) |
Carries out matrix-vector multiplication.
Implementation of the convenience expressions result = prod(mat, vec); and result = prod(trans(mat), vec);
| mat | The matrix |
| mat_transpose | Whether the matrix is to be transposed. |
| vec | The vector |
| result | The result vector |
Definition at line 1427 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const viennacl::coordinate_matrix< NumericT, AlignmentV > & | sp_mat, |
| const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > & | d_mat, | ||
| viennacl::matrix_base< NumericT > & | result | ||
| ) |
Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, trans(d_mat));
| sp_mat | The Sparse Matrix (Coordinate format) |
| d_mat | The Dense Transposed Matrix |
| result | The Result Matrix |
Definition at line 1477 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const viennacl::ell_matrix< NumericT, AlignmentV > & | mat, |
| const viennacl::vector_base< NumericT > & | vec, | ||
| viennacl::vector_base< NumericT > & | result | ||
| ) |
Carries out matrix-vector multiplication with a ell_matrix.
Implementation of the convenience expression result = prod(mat, vec);
| mat | The matrix |
| vec | The vector |
| result | The result vector |
Definition at line 1623 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const viennacl::ell_matrix< NumericT, AlignmentV > & | sp_mat, |
| const viennacl::matrix_base< NumericT > & | d_mat, | ||
| viennacl::matrix_base< NumericT > & | result | ||
| ) |
Carries out Sparse Matrix(ELL)-Dense Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, d_mat); sp_mat being in ELL format
| sp_mat | The sparse matrix (ELL) |
| d_mat | The dense matrix |
| result | The result matrix |
Definition at line 1715 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const viennacl::ell_matrix< NumericT, AlignmentV > & | sp_mat, |
| const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > & | d_mat, | ||
| viennacl::matrix_base< NumericT > & | result | ||
| ) |
Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, trans(d_mat)); sp_mat being in ELL format
| sp_mat | The sparse matrix (ELL) |
| d_mat | The dense matrix |
| result | The result matrix |
Definition at line 1888 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const viennacl::sliced_ell_matrix< NumericT, IndexT > & | mat, |
| const viennacl::vector_base< NumericT > & | vec, | ||
| viennacl::vector_base< NumericT > & | result | ||
| ) |
Carries out matrix-vector multiplication with a sliced_ell_matrix.
Implementation of the convenience expression result = prod(mat, vec);
| mat | The matrix |
| vec | The vector |
| result | The result vector |
Definition at line 2047 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const viennacl::hyb_matrix< NumericT, AlignmentV > & | mat, |
| const viennacl::vector_base< NumericT > & | vec, | ||
| viennacl::vector_base< NumericT > & | result | ||
| ) |
Carries out matrix-vector multiplication with a hyb_matrix.
Implementation of the convenience expression result = prod(mat, vec);
| mat | The matrix |
| vec | The vector |
| result | The result vector |
Definition at line 2132 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const viennacl::hyb_matrix< NumericT, AlignmentV > & | mat, |
| const viennacl::matrix_base< NumericT > & | d_mat, | ||
| viennacl::matrix_base< NumericT > & | result | ||
| ) |
Carries out matrix-vector multiplication with a hyb_matrix.
Implementation of the convenience expression result = prod(mat, d_mat);
| mat | The sparse matrix |
| d_mat | The dense matrix (row- or column-major) |
| result | The dense result matrix (row- or column-major) |
Definition at line 2239 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const matrix_base< NumericT > & | A, |
| bool | trans_A, | ||
| const matrix_base< NumericT > & | B, | ||
| bool | trans_B, | ||
| matrix_base< NumericT > & | C, | ||
| ScalarT | alpha, | ||
| ScalarT | beta | ||
| ) |
Carries out matrix-matrix multiplication.
Implementation of C = prod(A, B);
Definition at line 2385 of file matrix_operations.hpp.
| void viennacl::linalg::cuda::prod_impl | ( | const viennacl::hyb_matrix< NumericT, AlignmentV > & | mat, |
| const viennacl::matrix_expression< const viennacl::matrix_base< NumericT >, const viennacl::matrix_base< NumericT >, viennacl::op_trans > & | d_mat, | ||
| viennacl::matrix_base< NumericT > & | result | ||
| ) |
Carries out matrix-vector multiplication with a hyb_matrix.
Implementation of the convenience expression result = prod(mat, trans(d_mat));
| mat | The sparse matrix |
| d_mat | Transposed matrix proxy object for the rhs dense matrix (row- or column-major) |
| result | The dense result matrix (row- or column-major) |
Definition at line 2437 of file sparse_matrix_operations.hpp.
| void viennacl::linalg::cuda::radix2 | ( | viennacl::vector< NumericT, AlignmentV > & | in, |
| vcl_size_t | size, | ||
| vcl_size_t | stride, | ||
| vcl_size_t | batch_num, | ||
| NumericT | sign = NumericT(-1), |
||
| viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
| ) |
Radix-2 1D algorithm for computing Fourier transformation.
Works only on power-of-two sizes of data. Serial implementation has o(n * lg n) complexity. This is a Cooley-Tukey algorithm
Definition at line 441 of file fft_operations.hpp.
| void viennacl::linalg::cuda::radix2 | ( | viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > & | in, |
| vcl_size_t | size, | ||
| vcl_size_t | stride, | ||
| vcl_size_t | batch_num, | ||
| NumericT | sign = NumericT(-1), |
||
| viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
| ) |
Radix-2 2D algorithm for computing Fourier transformation.
Works only on power-of-two sizes of data. Serial implementation has o(n * lg n) complexity. This is a Cooley-Tukey algorithm
Definition at line 493 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::real_to_complex | ( | const RealT * | in, |
| ComplexT * | out, | ||
| unsigned int | size | ||
| ) |
Definition at line 781 of file fft_operations.hpp.
| void viennacl::linalg::cuda::real_to_complex | ( | viennacl::vector_base< NumericT > const & | in, |
| viennacl::vector_base< NumericT > & | out, | ||
| vcl_size_t | size | ||
| ) |
Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part)
Definition at line 796 of file fft_operations.hpp.
| void viennacl::linalg::cuda::reorder | ( | viennacl::vector< NumericT, AlignmentV > & | in, |
| vcl_size_t | size, | ||
| vcl_size_t | stride, | ||
| vcl_size_t | bits_datasize, | ||
| vcl_size_t | batch_num, | ||
| viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER | data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR |
||
| ) |
Definition at line 281 of file fft_operations.hpp.
| void viennacl::linalg::cuda::reverse | ( | viennacl::vector_base< NumericT > & | in | ) |
Reverse vector to oposite order and save it in input vector.
Definition at line 846 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::reverse_inplace | ( | NumericT * | vec, |
| uint | size | ||
| ) |
Definition at line 831 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::scalar_swap_kernel | ( | NumericT * | s1, |
| NumericT * | s2 | ||
| ) |
Definition at line 345 of file scalar_operations.hpp.
| __global__ void viennacl::linalg::cuda::scaled_rank1_update_col_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | val, | ||
| unsigned int | options2, | ||
| const NumericT * | vec1, | ||
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| unsigned int | size2 | ||
| ) |
Definition at line 1334 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::scaled_rank1_update_col_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | val, | ||
| unsigned int | options2, | ||
| const NumericT * | vec1, | ||
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| unsigned int | size2 | ||
| ) |
Definition at line 1374 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::scaled_rank1_update_row_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT | val, | ||
| unsigned int | options2, | ||
| const NumericT * | vec1, | ||
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| unsigned int | size2 | ||
| ) |
Definition at line 1363 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::scaled_rank1_update_row_kernel | ( | NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| const NumericT * | val, | ||
| unsigned int | options2, | ||
| const NumericT * | vec1, | ||
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| const NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2, | ||
| unsigned int | size2 | ||
| ) |
Definition at line 1403 of file matrix_operations_row.hpp.
| void viennacl::linalg::cuda::scaled_rank_1_update | ( | matrix_base< NumericT > & | mat1, |
| ScalarT const & | alpha, | ||
| vcl_size_t | len_alpha, | ||
| bool | reciprocal_alpha, | ||
| bool | flip_sign_alpha, | ||
| const vector_base< NumericT > & | vec1, | ||
| const vector_base< NumericT > & | vec2 | ||
| ) |
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update.
Implementation of the convenience expression result += alpha * outer_prod(vec1, vec2);
| mat1 | The matrix to be updated |
| alpha | The scaling factor (either a viennacl::scalar<>, float, or double) |
| len_alpha | Length of the buffer for an eventual final reduction step (currently always '1') |
| reciprocal_alpha | Use 1/alpha instead of alpha |
| flip_sign_alpha | Use -alpha instead of alpha |
| vec1 | The first vector |
| vec2 | The second vector |
Definition at line 2417 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::scan_kernel_2 | ( | T * | S_ref, |
| unsigned int | startS_ref, | ||
| unsigned int | incS_ref, | ||
| T * | S, | ||
| unsigned int | startS, | ||
| unsigned int | incS, | ||
| unsigned int | InputSize | ||
| ) |
Definition at line 1918 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::scan_kernel_3 | ( | T * | S_ref, |
| unsigned int | startS_ref, | ||
| unsigned int | incS_ref, | ||
| T * | S, | ||
| unsigned int | startS, | ||
| unsigned int | incS | ||
| ) |
Definition at line 1960 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::scan_kernel_4 | ( | T * | S, |
| unsigned int | startS, | ||
| unsigned int | incS, | ||
| T * | Y, | ||
| unsigned int | startY, | ||
| unsigned int | incY, | ||
| unsigned int | OutputSize | ||
| ) |
Definition at line 1979 of file matrix_operations_col.hpp.
|
inline |
Compute addresses to obtain compact list of block start addresses.
Definition at line 238 of file bisect_kernel_large.hpp.
|
inline |
Perform initial scan for compaction of intervals containing one and multiple eigenvalues; also do initial scan to build blocks
Definition at line 369 of file bisect_kernel_large.hpp.
|
inline |
Perform scan to obtain number of eigenvalues before a specific block.
Definition at line 303 of file bisect_kernel_large.hpp.
| __global__ void viennacl::linalg::cuda::sliced_ell_matrix_vec_mul_kernel | ( | const unsigned int * | columns_per_block, |
| const unsigned int * | column_indices, | ||
| const unsigned int * | block_start, | ||
| const NumericT * | elements, | ||
| const NumericT * | x, | ||
| unsigned int | start_x, | ||
| unsigned int | inc_x, | ||
| unsigned int | size_x, | ||
| NumericT * | result, | ||
| unsigned int | start_result, | ||
| unsigned int | inc_result, | ||
| unsigned int | size_result | ||
| ) |
Definition at line 2001 of file sparse_matrix_operations.hpp.
| __device__ void viennacl::linalg::cuda::storeInterval | ( | unsigned int | addr, |
| NumericT * | s_left, | ||
| NumericT * | s_right, | ||
| T * | s_left_count, | ||
| T * | s_right_count, | ||
| NumericT | left, | ||
| NumericT | right, | ||
| S | left_count, | ||
| S | right_count, | ||
| NumericT | precision | ||
| ) |
Check if interval converged and store appropriately
| addr | address where to store the information of the interval |
| s_left | shared memory storage for left interval limits |
| s_right | shared memory storage for right interval limits |
| s_left_count | shared memory storage for number of eigenvalues less than left interval limits |
| s_right_count | shared memory storage for number of eigenvalues less than right interval limits |
| left | lower limit of interval |
| right | upper limit of interval |
| left_count | eigenvalues less than left |
| right_count | eigenvalues less than right |
| precision | desired precision for eigenvalues |
Definition at line 124 of file bisect_util.hpp.
| __device__ void viennacl::linalg::cuda::storeIntervalConverged | ( | NumericT * | s_left, |
| NumericT * | s_right, | ||
| T * | s_left_count, | ||
| T * | s_right_count, | ||
| NumericT & | left, | ||
| NumericT & | mid, | ||
| NumericT & | right, | ||
| S & | left_count, | ||
| S & | mid_count, | ||
| S & | right_count, | ||
| T * | s_compaction_list_exc, | ||
| unsigned int & | compact_second_chunk, | ||
| const unsigned int | num_threads_active, | ||
| unsigned int & | is_active_second | ||
| ) |
Definition at line 465 of file bisect_util.hpp.
| __device__ void viennacl::linalg::cuda::storeNonEmptyIntervals | ( | unsigned int | addr, |
| const unsigned int | num_threads_active, | ||
| NumericT * | s_left, | ||
| NumericT * | s_right, | ||
| T * | s_left_count, | ||
| T * | s_right_count, | ||
| NumericT | left, | ||
| NumericT | mid, | ||
| NumericT | right, | ||
| const S | left_count, | ||
| const S | mid_count, | ||
| const S | right_count, | ||
| NumericT | precision, | ||
| unsigned int & | compact_second_chunk, | ||
| T * | s_compaction_list_exc, | ||
| unsigned int & | is_active_second | ||
| ) |
Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread.
| addr | base address for storing intervals |
| num_threads_active | number of threads / intervals in current sweep |
| s_left | shared memory storage for left interval limits |
| s_right | shared memory storage for right interval limits |
| s_left_count | shared memory storage for number of eigenvalues less than left interval limits |
| s_right_count | shared memory storage for number of eigenvalues less than right interval limits |
| left | lower limit of interval |
| mid | midpoint of interval |
| right | upper limit of interval |
| left_count | eigenvalues less than left |
| mid_count | eigenvalues less than mid |
| right_count | eigenvalues less than right |
| precision | desired precision for eigenvalues |
| compact_second_chunk | shared mem flag if second chunk is used and ergo requires compaction |
| s_compaction_list_exc | helper array for stream compaction, s_compaction_list_exc[tid] = 1 when the thread generated two child intervals |
| is_active_second | mark is thread has a second non-empty child interval |
Definition at line 309 of file bisect_util.hpp.
| __device__ void viennacl::linalg::cuda::storeNonEmptyIntervalsLarge | ( | unsigned int | addr, |
| const unsigned int | num_threads_active, | ||
| NumericT * | s_left, | ||
| NumericT * | s_right, | ||
| unsigned short * | s_left_count, | ||
| unsigned short * | s_right_count, | ||
| NumericT | left, | ||
| NumericT | mid, | ||
| NumericT | right, | ||
| const unsigned short | left_count, | ||
| const unsigned short | mid_count, | ||
| const unsigned short | right_count, | ||
| NumericT | epsilon, | ||
| unsigned int & | compact_second_chunk, | ||
| unsigned short * | s_compaction_list, | ||
| unsigned int & | is_active_second | ||
| ) |
Store all non-empty intervals resulting from the subdivision of the interval currently processed by the thread
Definition at line 475 of file bisect_kernel_large.hpp.
| __device__ void viennacl::linalg::cuda::subdivideActiveInterval | ( | const unsigned int | tid, |
| NumericT * | s_left, | ||
| NumericT * | s_right, | ||
| T * | s_left_count, | ||
| T * | s_right_count, | ||
| const unsigned int | num_threads_active, | ||
| NumericT & | left, | ||
| NumericT & | right, | ||
| unsigned int & | left_count, | ||
| unsigned int & | right_count, | ||
| NumericT & | mid, | ||
| unsigned int & | all_threads_converged | ||
| ) |
Subdivide interval if active and not already converged.
| tid | id of thread |
| s_left | shared memory storage for left interval limits |
| s_right | shared memory storage for right interval limits |
| s_left_count | shared memory storage for number of eigenvalues less than left interval limits |
| s_right_count | shared memory storage for number of eigenvalues less than right interval limits |
| num_threads_active | number of active threads in warp |
| left | lower limit of interval |
| right | upper limit of interval |
| left_count | eigenvalues less than left |
| right_count | eigenvalues less than right |
| mid | median of interval |
| all_threads_converged | shared memory flag if all threads are |
Definition at line 529 of file bisect_util.hpp.
| viennacl::enable_if< viennacl::is_scalar<ScalarT1>::value && viennacl::is_scalar<ScalarT2>::value >::type viennacl::linalg::cuda::swap | ( | ScalarT1 & | s1, |
| ScalarT2 & | s2 | ||
| ) |
Swaps the contents of two scalars, data is copied.
| s1 | The first scalar |
| s2 | The second scalar |
Definition at line 361 of file scalar_operations.hpp.
| void viennacl::linalg::cuda::trans | ( | matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const & | proxy, |
| matrix_base< NumericT > & | temp_trans | ||
| ) |
Definition at line 57 of file matrix_operations.hpp.
| __global__ void viennacl::linalg::cuda::trans_kernel | ( | const NumericT * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_stride1, | ||
| unsigned int | A_stride2, | ||
| NumericT * | B, | ||
| unsigned int | B_start1, | ||
| unsigned int | B_start2, | ||
| unsigned int | B_internal_size1, | ||
| unsigned int | B_internal_size2, | ||
| unsigned int | B_stride1, | ||
| unsigned int | B_stride2, | ||
| bool | data_major | ||
| ) |
Definition at line 35 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::trans_vec_mul_col_kernel | ( | const NumericT * | A, |
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | v, | ||
| unsigned int | v_start, | ||
| unsigned int | v_inc, | ||
| unsigned int | v_size, | ||
| NumericT * | result, | ||
| unsigned int | result_start, | ||
| unsigned int | result_inc, | ||
| unsigned int | result_size | ||
| ) |
Definition at line 1277 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::trans_vec_mul_row_kernel | ( | const NumericT * | A, |
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | v, | ||
| unsigned int | v_start, | ||
| unsigned int | v_inc, | ||
| unsigned int | v_size, | ||
| NumericT * | result, | ||
| unsigned int | result_start, | ||
| unsigned int | result_inc, | ||
| unsigned int | result_size | ||
| ) |
Definition at line 1321 of file matrix_operations_row.hpp.
| __global__ void viennacl::linalg::cuda::transpose | ( | const NumericT * | input, |
| NumericT * | output, | ||
| unsigned int | row_num, | ||
| unsigned int | col_num | ||
| ) |
Definition at line 712 of file fft_operations.hpp.
| void viennacl::linalg::cuda::transpose | ( | viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > const & | input, |
| viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > & | output | ||
| ) |
Transpose matrix.
Definition at line 731 of file fft_operations.hpp.
| void viennacl::linalg::cuda::transpose | ( | viennacl::matrix< NumericT, viennacl::row_major, AlignmentV > & | input | ) |
Inplace_transpose matrix.
Definition at line 769 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::transpose_inplace | ( | NumericT * | input, |
| unsigned int | row_num, | ||
| unsigned int | col_num | ||
| ) |
Definition at line 745 of file fft_operations.hpp.
| __global__ void viennacl::linalg::cuda::triangular_substitute_inplace_col_kernel | ( | NumericT const * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT * | v, | ||
| unsigned int | v_start, | ||
| unsigned int | v_inc, | ||
| unsigned int | v_size, | ||
| unsigned int | options | ||
| ) |
Definition at line 307 of file direct_solve.hpp.
| __global__ void viennacl::linalg::cuda::triangular_substitute_inplace_row_kernel | ( | NumericT const * | A, |
| unsigned int | A_start1, | ||
| unsigned int | A_start2, | ||
| unsigned int | A_inc1, | ||
| unsigned int | A_inc2, | ||
| unsigned int | A_size1, | ||
| unsigned int | A_size2, | ||
| unsigned int | A_internal_size1, | ||
| unsigned int | A_internal_size2, | ||
| NumericT * | v, | ||
| unsigned int | v_start, | ||
| unsigned int | v_inc, | ||
| unsigned int | v_size, | ||
| unsigned int | options | ||
| ) |
Definition at line 266 of file direct_solve.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_abs_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1251 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_acos_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1029 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_asin_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1056 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_atan_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1084 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_ceil_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1112 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_cos_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1140 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_cosh_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1168 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_exp_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1196 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_fabs_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1224 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_floor_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1280 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_log10_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1336 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_log_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1308 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_sin_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1364 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_sinh_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1392 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_sqrt_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1420 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_tan_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1448 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_element_tanh_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT const * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 1476 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vec_mul_col_kernel | ( | const NumericT * | A, |
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | v, | ||
| unsigned int | v_start, | ||
| unsigned int | v_inc, | ||
| unsigned int | v_size, | ||
| NumericT * | result, | ||
| unsigned int | result_start, | ||
| unsigned int | result_inc, | ||
| unsigned int | result_size | ||
| ) |
Definition at line 1246 of file matrix_operations_col.hpp.
| __global__ void viennacl::linalg::cuda::vec_mul_row_kernel | ( | const NumericT * | A, |
| unsigned int | A_row_start, | ||
| unsigned int | A_col_start, | ||
| unsigned int | A_row_inc, | ||
| unsigned int | A_col_inc, | ||
| unsigned int | A_row_size, | ||
| unsigned int | A_col_size, | ||
| unsigned int | A_internal_rows, | ||
| unsigned int | A_internal_cols, | ||
| const NumericT * | v, | ||
| unsigned int | v_start, | ||
| unsigned int | v_inc, | ||
| unsigned int | v_size, | ||
| NumericT * | result, | ||
| unsigned int | result_start, | ||
| unsigned int | result_inc, | ||
| unsigned int | result_size | ||
| ) |
Definition at line 1276 of file matrix_operations_row.hpp.
| void viennacl::linalg::cuda::vector_assign | ( | vector_base< NumericT > & | vec1, |
| ScalarT1 const & | alpha, | ||
| bool | up_to_internal_size = false |
||
| ) |
Assign a constant value to a vector (-range/-slice)
| vec1 | The vector to which the value should be assigned |
| alpha | The value to be assigned |
| up_to_internal_size | Specifies whether alpha should also be written to padded memory (mostly used for clearing the whole buffer). |
Definition at line 777 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vector_assign_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| unsigned int | internal_size1, | ||
| NumericT | alpha | ||
| ) |
Definition at line 756 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vector_maxmin_kernel | ( | const NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| unsigned int | option, | ||
| NumericT * | result | ||
| ) |
Definition at line 2739 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vector_multi_sum_kernel | ( | NumericT const * | vec1, |
| NumericT * | result, | ||
| unsigned int | start_result, | ||
| unsigned int | inc_result | ||
| ) |
Definition at line 2031 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vector_sum_kernel_floats | ( | const NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| unsigned int | option, | ||
| NumericT * | result | ||
| ) |
Definition at line 1547 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vector_sum_kernel_integers | ( | const NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| unsigned int | option, | ||
| NumericT * | result | ||
| ) |
Definition at line 1589 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vector_sum_kernel_unsigned_integers | ( | const NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| unsigned int | option, | ||
| NumericT * | result | ||
| ) |
Definition at line 1626 of file vector_operations.hpp.
| void viennacl::linalg::cuda::vector_swap | ( | vector_base< NumericT > & | vec1, |
| vector_base< NumericT > & | vec2 | ||
| ) |
Swaps the contents of two vectors, data is copied.
| vec1 | The first vector (or -range, or -slice) |
| vec2 | The second vector (or -range, or -slice) |
Definition at line 827 of file vector_operations.hpp.
| __global__ void viennacl::linalg::cuda::vector_swap_kernel | ( | NumericT * | vec1, |
| unsigned int | start1, | ||
| unsigned int | inc1, | ||
| unsigned int | size1, | ||
| NumericT * | vec2, | ||
| unsigned int | start2, | ||
| unsigned int | inc2 | ||
| ) |
Definition at line 800 of file vector_operations.hpp.
| __device__ void viennacl::linalg::cuda::writeToGmem | ( | const unsigned int | tid, |
| const unsigned int | tid_2, | ||
| const unsigned int | num_threads_active, | ||
| const unsigned int | num_blocks_mult, | ||
| NumericT * | g_left_one, | ||
| NumericT * | g_right_one, | ||
| unsigned int * | g_pos_one, | ||
| NumericT * | g_left_mult, | ||
| NumericT * | g_right_mult, | ||
| unsigned int * | g_left_count_mult, | ||
| unsigned int * | g_right_count_mult, | ||
| NumericT * | s_left, | ||
| NumericT * | s_right, | ||
| unsigned short * | s_left_count, | ||
| unsigned short * | s_right_count, | ||
| unsigned int * | g_blocks_mult, | ||
| unsigned int * | g_blocks_mult_sum, | ||
| unsigned short * | s_compaction_list, | ||
| unsigned short * | s_cl_helper, | ||
| unsigned int | offset_mult_lambda | ||
| ) |
Write data to global memory.
Definition at line 53 of file bisect_kernel_large.hpp.
| __global__ void viennacl::linalg::cuda::zero2 | ( | NumericT * | input1, |
| NumericT * | input2, | ||
| unsigned int | size | ||
| ) |
Definition at line 601 of file fft_operations.hpp.