self_instruct/src/interact_llama3_llamacpp.py
import fire
from llama_cpp import Llama
SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
def interact(
model_path,
n_ctx=8192,
top_k=30,
top_p=0.9,
temperature=0.6,
repeat_penalty=1.1
):
model = Llama(
model_path=model_path,
n_ctx=n_ctx,
n_parts=1,
verbose=True,
)
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
while True:
user_message = input("User: ")
messages.append({"role": "user", "content": user_message})
for part in model.create_chat_completion(
messages,
temperature=temperature,
top_k=top_k,
top_p=top_p,
repeat_penalty=repeat_penalty,
stream=True,
):
delta = part["choices"][0]["delta"]
if "content" in delta:
print(delta["content"], end="", flush=True)
print()
if __name__ == "__main__":
fire.Fire(interact)