#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <cuda_runtime.h>
#include <cusolverDn.h>
#include <cusolver_common.h>
int main() {
int major=-1,minor=-1,patch=-1;
cusolverGetProperty(MAJOR_VERSION, &major);
cusolverGetProperty(MINOR_VERSION, &minor);
cusolverGetProperty(PATCH_LEVEL, &patch);
printf("CUSOLVER Version (Major,Minor,PatchLevel): %d.%d.%d\n", major,minor,patch);
cusolverDnHandle_t handle=NULL;
cudaStream_t stream=NULL;
cusolverStatus_t status = CUSOLVER_STATUS_SUCCESS;
int N = 3;
double * cA = (double*)malloc(N * N * sizeof(double));
double * dA;
for (int i = 0; i < N * N; ++i) {
cA[i] = 1;
}
cudaMalloc((void**)&dA, N * N * sizeof(double) );
cudaMemcpy(dA, cA, N * N * sizeof(double), cudaMemcpyHostToDevice);
assert(cusolverDnCreate(&handle) == CUSOLVER_STATUS_SUCCESS);
assert(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking) ==
cudaSuccess);
assert(cusolverDnSetStream(handle, stream) == CUSOLVER_STATUS_SUCCESS);
int lwork;
double *dwork;
int *dipiv, *dinfo, *cinfo;
cinfo = (int*)malloc(sizeof(int) * 1);
cudaMalloc((void**)&dinfo, sizeof(int));
cudaMalloc((void**)&dipiv, N * sizeof(int));
status = cusolverDnDgetrf_bufferSize(handle,
N,
N,
dA,
N,
&lwork);
assert(CUSOLVER_STATUS_SUCCESS == status);
cudaMalloc((void**)&dwork, sizeof(double) * lwork);
cusolverDnDgetrf(handle,
N,
N,
dA,
N,
dwork,
NULL,
dinfo);
cudaMemcpy(cA, dA, N * N * sizeof(double), cudaMemcpyDeviceToHost);
cudaMemcpy(cinfo, dinfo, sizeof(int), cudaMemcpyDeviceToHost);
printf("dinfo: %d\n", *cinfo);
for (int i = 0; i < N; ++i) {
for (int j = 0; j < N; ++j) {
printf("%lf ", cA[i + j * N]);
}
printf("\n");
}
}
Linear algebra GPU library function bug tracking issue [magma/cusolver/cublas]
This issue is used to track known bugs in GPU library functions, e.g. MAGMA, cuSOLVER, cuBLAS. These known issues include crash, large numerical mismatches, nan outputs, etc.
torch.choleskycusolverDnXpotrfBatchednanoutput for large ill-conditioned matrix.magma_Xpotrf_batchedtorch.eightorch.eigvalshcusolverDnXsyevjBatchedcusolverDn<t>getrftorch.triangular_solve (sparse)cusparseSpSMCode for reproducing the nan issue of getrf
Compile with
nvcc -lcusolverSee also
cc @ngimel @jianyuh @nikitaved @pearu @mruberry @heitorschueroff @walterddr @IvanYashchuk @ptrblck