Spaces:

Mat17892
/

iris

Runtime error

App Files Files Community

iris / llama.cpp /src /llama-impl.h

Mat17892

llamacpp

b664585 verified 16 days ago

raw

history blame contribute delete

4.68 kB

	#pragma once

	#include "llama.h"

	#include <string>
	#include <vector>
	#include <stdexcept>

	#ifdef __GNUC__
	#ifdef __MINGW32__
	#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
	#else
	#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
	#endif
	#else
	#define LLAMA_ATTRIBUTE_FORMAT(...)
	#endif

	//
	// logging
	//

	LLAMA_ATTRIBUTE_FORMAT(2, 3)
	void llama_log_internal (ggml_log_level level, const char * format, ...);
	void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);

	#define LLAMA_LOG(...) llama_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
	#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
	#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
	#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
	#define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
	#define LLAMA_LOG_CONT(...) llama_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)

	//
	// helpers
	//

	struct time_meas {
	time_meas(int64_t & t_acc, bool disable = false) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {}

	~time_meas() {
	if (t_start_us >= 0) {
	t_acc += ggml_time_us() - t_start_us;
	}
	}

	const int64_t t_start_us;

	int64_t & t_acc;
	};

	static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
	if (search.empty()) {
	return;
	}
	std::string builder;
	builder.reserve(s.length());
	size_t pos = 0;
	size_t last_pos = 0;
	while ((pos = s.find(search, last_pos)) != std::string::npos) {
	builder.append(s, last_pos, pos - last_pos);
	builder.append(replace);
	last_pos = pos + search.length();
	}
	builder.append(s, last_pos, std::string::npos);
	s = std::move(builder);
	}

	const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(
	struct llama_context * ctx
	);

	// the ring buffer works similarly to std::deque, but with a fixed capacity
	template<typename T>
	struct ring_buffer {
	ring_buffer(size_t cap) : capacity(cap), data(cap) {}

	T & front() {
	if (sz == 0) {
	throw std::runtime_error("ring buffer is empty");
	}
	return data[first];
	}

	const T & front() const {
	if (sz == 0) {
	throw std::runtime_error("ring buffer is empty");
	}
	return data[first];
	}

	T & back() {
	if (sz == 0) {
	throw std::runtime_error("ring buffer is empty");
	}
	return data[pos];
	}

	const T & back() const {
	if (sz == 0) {
	throw std::runtime_error("ring buffer is empty");
	}
	return data[pos];
	}

	void push_back(const T & value) {
	if (capacity == 0) {
	throw std::runtime_error("ring buffer: capacity is zero");
	}

	if (sz == capacity) {
	// advance the start when buffer is full
	first = (first + 1) % capacity;
	} else {
	sz++;
	}
	data[pos] = value;
	pos = (pos + 1) % capacity;
	}

	T pop_front() {
	if (sz == 0) {
	throw std::runtime_error("ring buffer is empty");
	}
	T value = data[first];
	first = (first + 1) % capacity;
	sz--;
	return value;
	}

	//T & operator[](size_t i) {
	// if (i >= sz) {
	// throw std::runtime_error("ring buffer: index out of bounds");
	// }
	// return data[(first + i) % capacity];
	//}

	//const T & at(size_t i) const {
	// if (i >= sz) {
	// throw std::runtime_error("ring buffer: index out of bounds");
	// }
	// return data[(first + i) % capacity];
	//}

	const T & rat(size_t i) const {
	if (i >= sz) {
	throw std::runtime_error("ring buffer: index out of bounds");
	}
	return data[(first + sz - i - 1) % capacity];
	}

	std::vector<T> to_vector() const {
	std::vector<T> result;
	result.reserve(sz);
	for (size_t i = 0; i < sz; i++) {
	result.push_back(data[(first + i) % capacity]);
	}
	return result;
	}

	void clear() {
	// here only reset the status of the buffer
	sz = 0;
	first = 0;
	pos = 0;
	}

	bool empty() const {
	return sz == 0;
	}

	size_t size() const {
	return sz;
	}

	size_t capacity = 0;
	size_t sz = 0;
	size_t first = 0;
	size_t pos = 0;
	std::vector<T> data;
	};