|
#pragma once |
|
|
|
#include "llama.h" |
|
|
|
#include <string> |
|
#include <vector> |
|
#include <stdexcept> |
|
|
|
#ifdef __GNUC__ |
|
#ifdef __MINGW32__ |
|
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__))) |
|
#else |
|
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__))) |
|
#endif |
|
#else |
|
#define LLAMA_ATTRIBUTE_FORMAT(...) |
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
LLAMA_ATTRIBUTE_FORMAT(2, 3) |
|
void llama_log_internal (ggml_log_level level, const char * format, ...); |
|
void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data); |
|
|
|
#define LLAMA_LOG(...) llama_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__) |
|
#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) |
|
#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__) |
|
#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) |
|
#define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__) |
|
#define LLAMA_LOG_CONT(...) llama_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__) |
|
|
|
|
|
|
|
|
|
|
|
struct time_meas { |
|
time_meas(int64_t & t_acc, bool disable = false) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {} |
|
|
|
~time_meas() { |
|
if (t_start_us >= 0) { |
|
t_acc += ggml_time_us() - t_start_us; |
|
} |
|
} |
|
|
|
const int64_t t_start_us; |
|
|
|
int64_t & t_acc; |
|
}; |
|
|
|
static void replace_all(std::string & s, const std::string & search, const std::string & replace) { |
|
if (search.empty()) { |
|
return; |
|
} |
|
std::string builder; |
|
builder.reserve(s.length()); |
|
size_t pos = 0; |
|
size_t last_pos = 0; |
|
while ((pos = s.find(search, last_pos)) != std::string::npos) { |
|
builder.append(s, last_pos, pos - last_pos); |
|
builder.append(replace); |
|
last_pos = pos + search.length(); |
|
} |
|
builder.append(s, last_pos, std::string::npos); |
|
s = std::move(builder); |
|
} |
|
|
|
const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map( |
|
struct llama_context * ctx |
|
); |
|
|
|
|
|
template<typename T> |
|
struct ring_buffer { |
|
ring_buffer(size_t cap) : capacity(cap), data(cap) {} |
|
|
|
T & front() { |
|
if (sz == 0) { |
|
throw std::runtime_error("ring buffer is empty"); |
|
} |
|
return data[first]; |
|
} |
|
|
|
const T & front() const { |
|
if (sz == 0) { |
|
throw std::runtime_error("ring buffer is empty"); |
|
} |
|
return data[first]; |
|
} |
|
|
|
T & back() { |
|
if (sz == 0) { |
|
throw std::runtime_error("ring buffer is empty"); |
|
} |
|
return data[pos]; |
|
} |
|
|
|
const T & back() const { |
|
if (sz == 0) { |
|
throw std::runtime_error("ring buffer is empty"); |
|
} |
|
return data[pos]; |
|
} |
|
|
|
void push_back(const T & value) { |
|
if (capacity == 0) { |
|
throw std::runtime_error("ring buffer: capacity is zero"); |
|
} |
|
|
|
if (sz == capacity) { |
|
|
|
first = (first + 1) % capacity; |
|
} else { |
|
sz++; |
|
} |
|
data[pos] = value; |
|
pos = (pos + 1) % capacity; |
|
} |
|
|
|
T pop_front() { |
|
if (sz == 0) { |
|
throw std::runtime_error("ring buffer is empty"); |
|
} |
|
T value = data[first]; |
|
first = (first + 1) % capacity; |
|
sz--; |
|
return value; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const T & rat(size_t i) const { |
|
if (i >= sz) { |
|
throw std::runtime_error("ring buffer: index out of bounds"); |
|
} |
|
return data[(first + sz - i - 1) % capacity]; |
|
} |
|
|
|
std::vector<T> to_vector() const { |
|
std::vector<T> result; |
|
result.reserve(sz); |
|
for (size_t i = 0; i < sz; i++) { |
|
result.push_back(data[(first + i) % capacity]); |
|
} |
|
return result; |
|
} |
|
|
|
void clear() { |
|
|
|
sz = 0; |
|
first = 0; |
|
pos = 0; |
|
} |
|
|
|
bool empty() const { |
|
return sz == 0; |
|
} |
|
|
|
size_t size() const { |
|
return sz; |
|
} |
|
|
|
size_t capacity = 0; |
|
size_t sz = 0; |
|
size_t first = 0; |
|
size_t pos = 0; |
|
std::vector<T> data; |
|
}; |
|
|