class Q4Matrix | |
{ | |
public: | |
int device; | |
int height; | |
int width; | |
int groups; | |
int groupsize; | |
uint32_t* cuda_qweight = NULL; | |
uint32_t* cuda_qzeros = NULL; | |
half* cuda_scales = NULL; | |
uint32_t* cuda_x_map = NULL; | |
Q4Matrix | |
( | |
const int _height, | |
const int _width, | |
const int _groups, | |
uint32_t* _qweight, | |
uint32_t* _qzeros, | |
half* _scales, | |
uint32_t* _g_idx, | |
const int _device | |
); | |
~Q4Matrix(); | |
void reconstruct(half* out); | |
private: | |
void make_sequential(const uint32_t* cpu_g_idx); | |
}; | |
void g_q4_keep_matrix(Q4Matrix* m); | |
void g_q4_free_matrices(); | |