#pragma once #include #include #include #include #include #include #include #define ASSERT(c) assert(c) #define ALIGN(v, n) ((v + n - 1) / n * n) #define INF std::numeric_limits::infinity() #define __FILENAME__ (__FILE__+ SOURCE_PATH_LENGTH) #define GRL_ERROR(format, args...) \ greedrl_error(__FILENAME__, __LINE__, format, ##args); \ #define GRL_CHECK(flag, format, args...) \ greedrl_check(__FILENAME__, __LINE__, flag, format, ##args); \ #define MALLOC(ptr, T, size) \ ptr = (T*) malloc(sizeof(T) * (size)); \ GRL_CHECK(ptr != nullptr, "out of memory!"); \ #define GALLOC(ptr, T, size) \ GRL_CHECK((size) > 0, "malloc 0 bytes"); \ T* const ptr = (T*) malloc(sizeof(T) * (size)); \ GRL_CHECK(ptr != nullptr, "out of memory!"); \ AllocGuard ptr##_##alloc##_##guard(ptr); \ #define REALLOC(ptr, T, size) \ GRL_CHECK((size) > 0, "malloc 0 bytes"); \ ptr = (T*) realloc(ptr, sizeof(T) * (size)); \ GRL_CHECK(ptr != nullptr, "out of memory!"); \ #define GRL_CHECK_TENSOR(tensor, device, allow_sub_contiguous, allow_null, ...) \ greedrl_check_tensor(__FILENAME__, __LINE__, tensor, #tensor, device, \ allow_sub_contiguous, allow_null, {__VA_ARGS__}); \ const int GRL_WORKER_START = 0; const int GRL_WORKER_END = 1; const int GRL_TASK = 2; const int GRL_FINISH = 3; const int MAX_BATCH_SIZE = 100000; const int MAX_TASK_COUNT = 5120; const int MAX_SHARED_MEM = 48128; using String = std::string; using Device = torch::Device; using Tensor = torch::Tensor; using TensorMap = std::map; using TensorList = std::vector; inline void greedrl_error(const char* const file, const int64_t line, const char* const format, ...) { const int N = 2048; static char buf[N]; va_list args; va_start(args, format); int n = vsnprintf(buf, N, format, args); va_end(args); if(n < N) { snprintf(buf+n, N-n, " at %s:%ld", file, line); } throw std::runtime_error(buf); } inline void greedrl_check(const char* const file, const int64_t line, const bool flag, const char* const format, ...) { if(flag) { return; } const int N = 2048; static char buf[N]; va_list args; va_start(args, format); int n = vsnprintf(buf, N, format, args); va_end(args); if(n < N) { snprintf(buf+n, N-n, " at %s:%ld", file, line); } throw std::runtime_error(buf); } // contiguous except the 1st dimension inline bool is_sub_contiguous(const Tensor& tensor) { int dim = tensor.dim(); if(dim==1) return true; auto sizes = tensor.sizes(); auto strides = tensor.strides(); if(strides[dim-1] != 1) return false; int s = 1; for(int i=dim-2; i>0; i--) { s *= sizes[i+1]; if(strides[i] != s) return false; } return true; }; inline void greedrl_check_tensor(const char* const file, const int line, const Tensor& tensor, const String& name, const Device& device, bool allow_sub_contiguous, bool allow_null, std::initializer_list sizes) { greedrl_check(file, line, tensor.numel() < 1000 * 1000 * 1000, "tensor size too large"); auto device2 = tensor.device(); greedrl_check(file, line, device2==device, "'%s' device is %s, but expect %s", name.c_str(), device2.str().c_str(), device.str().c_str()); bool is_contiguous = allow_sub_contiguous ? is_sub_contiguous(tensor) : tensor.is_contiguous(); greedrl_check(file, line, is_contiguous, "'%s' is not contiguous", name.c_str()); if(allow_null && tensor.data_ptr() == nullptr) return; if(tensor.dim() != sizes.size()) { greedrl_error(file, line, "'%s' dim is %d, but expect %d", name.c_str(), (int)tensor.dim(), (int)sizes.size()); } int i=0; for(auto s:sizes) { greedrl_check(file, line, tensor.size(i)==s, "'%s' size(%d) is %d, but expect %d", name.c_str(), i, (int)tensor.size(i), s); i++; } } #ifdef CUDA_FOUND #include #define GRL_CHECK_CUDA(error)\ greedrl_check_cuda(error, __FILENAME__, __LINE__); inline void greedrl_check_cuda(const cudaError_t& error, const char* file, const int64_t line) { if(error==cudaSuccess) { return; } const int N = 2048; static char buf[N]; snprintf(buf, N, "%s, at %s:%ld", cudaGetErrorString(error), file, line); throw std::runtime_error(buf); } cudaDeviceProp& cuda_get_device_prop(int i); #endif