File size: 1,218 Bytes
2fe3da0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include <cuda.h>
#include <stdint.h>
#include "vec3f.h"
#include "vec4f.h"
#include "tensor.h"
dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims);
dim3 getLaunchGridSize(dim3 blockSize, dim3 dims);
#ifdef __CUDACC__
#ifdef _MSC_VER
#define M_PI 3.14159265358979323846f
#endif
__host__ __device__ static inline dim3 getWarpSize(dim3 blockSize)
{
return dim3(
min(blockSize.x, 32u),
min(max(32u / blockSize.x, 1u), min(32u, blockSize.y)),
min(max(32u / (blockSize.x * blockSize.y), 1u), min(32u, blockSize.z))
);
}
__device__ static inline float clamp(float val, float mn, float mx) { return min(max(val, mn), mx); }
#else
dim3 getWarpSize(dim3 blockSize);
#endif |