Guide: NPC Dialogue
This guide shows how to build AI-driven NPC dialogue with personality, streaming text, multi-turn memory, and emotion tags for driving animations.
Giving an NPC a Personality
Use the system message to define who the NPC is. This sets the tone, vocabulary, and behavior for all responses:
- Python
- Godot (GDScript)
- Unity (C#)
- Unreal (C++)
import json
from openai import OpenAI
client = OpenAI(base_url="http://localhost:11434/v1", api_key="not-needed")
response = client.chat.completions.create(
model="in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
messages=[
{
"role": "system",
"content": "You are Greta, a grumpy blacksmith in a medieval village. "
"You speak in short, blunt sentences. You secretly care about "
"the player but would never admit it. Keep responses under 3 sentences.",
},
{"role": "user", "content": "Can you forge me a legendary sword?"},
],
temperature=0.8,
)
print(response.choices[0].message.content)
# "Legendary? Ha. You can barely hold a dagger without cutting yourself."
var request = JSON.stringify({
"model": "in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
"messages": [
{
"role": "system",
"content": "You are Greta, a grumpy blacksmith in a medieval village. You speak in short, blunt sentences. You secretly care about the player but would never admit it. Keep responses under 3 sentences."
},
{"role": "user", "content": "Can you forge me a legendary sword?"}
],
"temperature": 0.8
})
engine.llm_chat_stream(request)
string requestJson = @"{
""model"": ""in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M"",
""messages"": [
{""role"": ""system"", ""content"": ""You are Greta, a grumpy blacksmith. You speak in short, blunt sentences. Keep responses under 3 sentences.""},
{""role"": ""user"", ""content"": ""Can you forge me a legendary sword?""}
],
""temperature"": 0.8
}";
AtelicoEngine.Instance.Llm.ChatCompletionStream(requestJson,
onChunk: (chunk) => dialogueText.text += ExtractToken(chunk),
onComplete: () => Debug.Log("Done"),
onError: (err) => Debug.LogError(err));
FString RequestJson = TEXT(R"({
"model": "in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
"messages": [
{"role": "system", "content": "You are Greta, a grumpy blacksmith. Keep responses under 3 sentences."},
{"role": "user", "content": "Can you forge me a legendary sword?"}
],
"temperature": 0.8
})");
AI->ChatCompletionStream(RequestJson);
// Tokens arrive via OnTokenReceived delegate
Tips for system prompts:
- Keep them concise -- the system prompt is sent with every request and uses context tokens
- Include constraints like "Keep responses under 3 sentences" to prevent rambling
- Define the NPC's knowledge boundaries: "You only know about the village and nearby forest"
Streaming for Typewriter Dialogue
Stream tokens one at a time to display text progressively, creating a natural typewriter effect in your dialogue UI:
- Python
- Godot (GDScript)
- Unity (C#)
- Unreal (C++)
stream = client.chat.completions.create(
model="in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
messages=[
{"role": "system", "content": "You are a mysterious oracle. Speak in riddles."},
{"role": "user", "content": "What lies beyond the mountains?"},
],
stream=True,
)
for chunk in stream:
content = chunk.choices[0].delta.content
if content:
print(content, end="", flush=True)
# Connect signals in _ready()
engine.inference_token_generated.connect(_on_token)
engine.inference_completed.connect(_on_done)
var dialogue_text := ""
func stream_dialogue(prompt: String) -> void:
dialogue_text = ""
label.text = ""
var request = JSON.stringify({
"model": "in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
"messages": [
{"role": "system", "content": "You are a mysterious oracle."},
{"role": "user", "content": prompt}
]
})
engine.llm_chat_stream(request)
func _on_token(_job_id: int, chunk_json: String) -> void:
var chunk = JSON.parse_string(chunk_json)
var delta = chunk["choices"][0]["delta"]
if delta.has("content") and delta["content"] != null:
dialogue_text += delta["content"]
label.text = dialogue_text
AtelicoEngine.Instance.Llm.ChatCompletionStream(requestJson,
onChunk: (chunkJson) =>
{
string token = ExtractToken(chunkJson);
if (!string.IsNullOrEmpty(token))
dialogueText.text += token; // TextMeshPro text updates each frame
},
onComplete: () => { /* NPC finished speaking */ },
onError: (error) => Debug.LogError(error));
// Bind delegate in BeginPlay
AI->OnTokenReceived.AddDynamic(this, &AMyNPC::OnToken);
void AMyNPC::OnToken(const FString& Token, const FString& Accumulated)
{
// Accumulated contains the full text so far
DialogueWidget->SetText(FText::FromString(Accumulated));
}
Multi-Turn Conversation
Include previous messages so the NPC remembers the conversation:
- Python
- Godot (GDScript)
- Unity (C#)
- Unreal (C++)
conversation = [
{"role": "system", "content": "You are Boris, a tavern keeper. Friendly and gossipy."}
]
def talk(player_message: str) -> str:
conversation.append({"role": "user", "content": player_message})
response = client.chat.completions.create(
model="in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
messages=conversation,
)
reply = response.choices[0].message.content
conversation.append({"role": "assistant", "content": reply})
return reply
print(talk("What's on the menu?"))
# "Roasted boar and mushroom stew! The stew's my specialty."
print(talk("I'll have the stew. Any rumors?"))
# Boris now knows you ordered the stew
var conversation: Array = [
{"role": "system", "content": "You are Boris, a tavern keeper. Friendly and gossipy."}
]
func talk(player_input: String) -> void:
conversation.append({"role": "user", "content": player_input})
var request = JSON.stringify({
"model": "in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
"messages": conversation
})
engine.llm_chat_stream(request)
func _on_stream_done(_job_id: int, _success: bool) -> void:
# Store the reply for future context
conversation.append({"role": "assistant", "content": dialogue_text})
private List<object> conversation = new()
{
new { role = "system", content = "You are Boris, a tavern keeper." }
};
public void Talk(string playerMessage)
{
conversation.Add(new { role = "user", content = playerMessage });
var request = new { model = "in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M", messages = conversation };
var accumulated = new StringBuilder();
AtelicoEngine.Instance.Llm.ChatCompletionStream(
JsonSerializer.Serialize(request),
onChunk: (chunk) => { accumulated.Append(ExtractToken(chunk)); },
onComplete: () => { conversation.Add(new { role = "assistant", content = accumulated.ToString() }); },
onError: (err) => Debug.LogError(err));
}
TArray<TSharedPtr<FJsonValue>> Conversation;
void AMyNPC::Talk(const FString& PlayerMessage)
{
// Add user message to history
auto UserMsg = MakeShareable(new FJsonObject);
UserMsg->SetStringField("role", "user");
UserMsg->SetStringField("content", PlayerMessage);
Conversation.Add(MakeShareable(new FJsonValueObject(UserMsg)));
// Build request with full history
auto Body = MakeShareable(new FJsonObject);
Body->SetStringField("model", "in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M");
Body->SetArrayField("messages", Conversation);
// ... serialize and send ...
}
void AMyNPC::OnChatCompleted(const FString& FullResponse)
{
// Store assistant reply for future turns
auto AssistantMsg = MakeShareable(new FJsonObject);
AssistantMsg->SetStringField("role", "assistant");
AssistantMsg->SetStringField("content", FullResponse);
Conversation.Add(MakeShareable(new FJsonValueObject(AssistantMsg)));
}
Dialogue with Emotion Tags
Use structured generation to get both the NPC's text and metadata for driving animations. The engine automatically describes the JSON schema to the model, so you don't need to include "respond as JSON" instructions in your prompts:
- Python
- Godot (GDScript)
- Unity (C#)
- Unreal (C++)
response = client.chat.completions.create(
model="in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
messages=[
{"role": "system", "content": "You are Greta, a grumpy blacksmith."},
{"role": "user", "content": "I brought you flowers!"},
],
response_format={
"type": "json_schema",
"json_schema": {
"name": "NPCDialogue",
"schema": {
"type": "object",
"properties": {
"text": {"type": "string"},
"emotion": {"type": "string", "enum": ["happy", "sad", "angry", "surprised", "neutral", "embarrassed"]},
"gesture": {"type": "string", "enum": ["wave", "nod", "shrug", "point", "cross_arms", "none"]},
},
"required": ["text", "emotion", "gesture"],
},
"strict": True,
},
},
)
import json
dialogue = json.loads(response.choices[0].message.content)
print(f"[{dialogue['emotion']}] {dialogue['text']}")
# [embarrassed] Flowers?! I... well, put them over there, I suppose.
# Use dialogue['gesture'] to trigger animation: "cross_arms"
var response_format = {
"type": "json_schema",
"json_schema": {
"name": "NPCDialogue",
"schema": {
"type": "object",
"properties": {
"text": {"type": "string"},
"emotion": {"type": "string", "enum": ["happy", "sad", "angry", "surprised", "neutral", "embarrassed"]},
"gesture": {"type": "string", "enum": ["wave", "nod", "shrug", "point", "cross_arms", "none"]}
},
"required": ["text", "emotion", "gesture"]
},
"strict": true
}
}
# Use with llm_chat (blocking) to get structured result
var response_json = engine.llm_chat(JSON.stringify({
"model": "in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
"messages": messages,
"response_format": response_format
}))
var response = JSON.parse_string(response_json)
var dialogue = JSON.parse_string(response["choices"][0]["message"]["content"])
# Drive the NPC's animation
npc_sprite.play(dialogue["emotion"]) # Set facial expression
if dialogue["gesture"] != "none":
npc_sprite.play_gesture(dialogue["gesture"])
dialogue_label.text = dialogue["text"]
[Serializable]
public class NPCDialogue
{
public string text;
public string emotion;
public string gesture;
}
// After getting the response:
var content = response.choices[0].message.content;
var dialogue = JsonSerializer.Deserialize<NPCDialogue>(content);
// Drive animations
animator.SetTrigger(dialogue.emotion);
if (dialogue.gesture != "none")
animator.SetTrigger(dialogue.gesture);
dialogueText.text = dialogue.text;
// Parse the structured JSON response
TSharedPtr<FJsonObject> Dialogue;
auto Reader = TJsonReaderFactory<>::Create(ContentJson);
FJsonSerializer::Deserialize(Reader, Dialogue);
FString Text = Dialogue->GetStringField("text");
FString Emotion = Dialogue->GetStringField("emotion");
FString Gesture = Dialogue->GetStringField("gesture");
// Drive UE5 animation blueprint
NPCMesh->GetAnimInstance()->SetEmotion(Emotion);
if (Gesture != "none")
NPCMesh->GetAnimInstance()->PlayGesture(Gesture);
DialogueWidget->SetText(FText::FromString(Text));
The output is guaranteed to match the schema -- emotion will always be one of the six defined values, and gesture will always be valid. Your game code can safely use these values without validation.
Tips for Better NPC Dialogue
- Temperature 0.6-0.8 gives natural variation without being too random
- Keep system prompts concise -- every token counts toward context length
- Limit response length with
max_tokens(50-150 for dialogue) to prevent rambling - Trim conversation history -- keep the system prompt and last 10-15 messages, summarize older ones to stay within context limits
- Use enums in structured generation for fields your game needs to branch on (emotions, actions, items)