Getting Started
This guide assumes you have an Atelico server bundle containing:
atelico-server-- the inference server binaryatelico-asset-downloader-- model downloading tool
1. Download a Model
List available models:
./atelico-asset-downloader list --namespace models
Download a model. For a quick start, the 1B quantized model is small and fast:
./atelico-asset-downloader download meta-llama/Llama-3.2-1B-Instruct-Q4_K_M
For better quality responses, use the 3B model:
./atelico-asset-downloader download meta-llama/Llama-3.2-3B-Instruct-Q4_K_M
Or use interactive mode to browse and select:
./atelico-asset-downloader interactive
Models are cached locally and only need to be downloaded once:
- macOS:
~/Library/Caches/atelico/models/ - Linux:
~/.cache/atelico/models/ - Windows:
%LOCALAPPDATA%\atelico\models\
2. Start the Server
./atelico-server
The server starts on port 11434 and auto-detects your GPU:
- Mac: uses Metal (Apple Silicon GPU)
- NVIDIA: uses CUDA
- No GPU: falls back to CPU
To use a different port:
./atelico-server --port 8080
3. Verify It's Running
curl http://localhost:11434/v1/models
You should see a list of available models:
{
"object": "list",
"data": [
{"id": "in-memory::meta-llama/Llama-3.2-1B-Instruct-Q4_K_M", "object": "model", "owned_by": "atelico"},
...
]
}
4. Send Your First Request
curl http://localhost:11434/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "in-memory::meta-llama/Llama-3.2-1B-Instruct-Q4_K_M",
"messages": [{"role": "user", "content": "Hello! What can you do?"}]
}'
That's it. The first request takes a few seconds while the model loads into GPU memory. Subsequent requests are fast.
5. Use from Your Engine or Language
The server speaks the OpenAI API protocol. Any client library that works with OpenAI works with Atelico -- just point it at http://localhost:11434/v1.
- Python
- Godot (GDScript)
- Unity (C#)
- Unreal (C++)
from openai import OpenAI
client = OpenAI(
base_url="http://localhost:11434/v1",
api_key="not-needed" # required by the library but not used
)
response = client.chat.completions.create(
model="in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
messages=[{"role": "user", "content": "Tell me a short joke"}],
temperature=0.7,
)
print(response.choices[0].message.content)
# Using the AtelicoEngineNode (GDExtension, no HTTP needed)
@onready var engine = $AtelicoEngineNode
func _ready():
engine.initialize_engine()
func ask_npc(prompt: String) -> void:
var request = {
"model": "in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.7
}
var request_json = JSON.stringify(request)
engine.async_chat_completions(request_json)
func _on_async_request_completed(job_id: int, response: String) -> void:
var parsed = JSON.parse_string(response)
var text = parsed["choices"][0]["message"]["content"]
print(text)
Or using the HTTP server with HTTPRequest:
var http = HTTPRequest.new()
func _ready():
add_child(http)
http.request_completed.connect(_on_response)
func ask(prompt: String) -> void:
var body = JSON.stringify({
"model": "in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
"messages": [{"role": "user", "content": prompt}]
})
http.request("http://localhost:11434/v1/chat/completions",
["Content-Type: application/json"], HTTPClient.METHOD_POST, body)
func _on_response(_result, _code, _headers, body: PackedByteArray) -> void:
var parsed = JSON.parse_string(body.get_string_from_utf8())
print(parsed["choices"][0]["message"]["content"])
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
using UnityEngine;
public class AtelicoClient : MonoBehaviour
{
private static readonly HttpClient client = new HttpClient();
private const string BaseUrl = "http://localhost:11434/v1";
public async Task<string> Ask(string prompt)
{
var request = new
{
model = "in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
messages = new[] { new { role = "user", content = prompt } },
temperature = 0.7
};
var json = JsonSerializer.Serialize(request);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await client.PostAsync($"{BaseUrl}/chat/completions", content);
var responseJson = await response.Content.ReadAsStringAsync();
using var doc = JsonDocument.Parse(responseJson);
return doc.RootElement
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString();
}
}
#include "HttpModule.h"
#include "Interfaces/IHttpRequest.h"
#include "Interfaces/IHttpResponse.h"
#include "Dom/JsonObject.h"
#include "Serialization/JsonSerializer.h"
void UAtelicoClient::Ask(const FString& Prompt)
{
auto Request = FHttpModule::Get().CreateRequest();
Request->SetURL(TEXT("http://localhost:11434/v1/chat/completions"));
Request->SetVerb(TEXT("POST"));
Request->SetHeader(TEXT("Content-Type"), TEXT("application/json"));
TSharedPtr<FJsonObject> Body = MakeShareable(new FJsonObject);
Body->SetStringField("model", "in-memory::meta-llama/Llama-3.2-3B-Instruct-Q4_K_M");
TArray<TSharedPtr<FJsonValue>> Messages;
TSharedPtr<FJsonObject> Message = MakeShareable(new FJsonObject);
Message->SetStringField("role", "user");
Message->SetStringField("content", Prompt);
Messages.Add(MakeShareable(new FJsonValueObject(Message)));
Body->SetArrayField("messages", Messages);
FString BodyString;
TSharedRef<TJsonWriter<>> Writer = TJsonWriterFactory<>::Create(&BodyString);
FJsonSerializer::Serialize(Body.ToSharedRef(), Writer);
Request->SetContentAsString(BodyString);
Request->OnProcessRequestComplete().BindLambda(
[](FHttpRequestPtr Req, FHttpResponsePtr Resp, bool bSuccess)
{
if (bSuccess && Resp.IsValid())
{
TSharedPtr<FJsonObject> Json;
TSharedRef<TJsonReader<>> Reader = TJsonReaderFactory<>::Create(Resp->GetContentAsString());
FJsonSerializer::Deserialize(Reader, Json);
auto Choices = Json->GetArrayField("choices");
FString Content = Choices[0]->AsObject()
->GetObjectField("message")
->GetStringField("content");
UE_LOG(LogTemp, Log, TEXT("Response: %s"), *Content);
}
});
Request->ProcessRequest();
}
Next Steps
- Chat Completions API -- Streaming, system prompts, multi-turn conversations
- Structured Generation -- Force the model to output valid JSON
- Models -- Available models, formats, and how to manage them
- Server Configuration -- Ports, environment variables, proxy backends