Skip to main content
Version: 0.7

C FFI: Getting Started

The C FFI is the low-level interface for embedding the Atelico AI Engine in any language or engine that can call C functions. Unity and Unreal use this under the hood, but you can use it directly for custom engines, Swift/Kotlin mobile apps, or any other integration.

What You'll Build

A C/C++ program that initializes the engine, loads a model, streams a chat response token-by-token, and shuts down cleanly.

By the end, you'll understand:

  1. How to link the native library
  2. The engine lifecycle (create / use / destroy)
  3. How to make a blocking chat request
  4. How to stream tokens with the poll pattern
  5. Error handling and string ownership

Prerequisites

  • A C or C++ compiler
  • The Atelico native library (libatelico_ffi.a or atelico_ffi.dll)
  • The atelico_ffi.h header file
  • A downloaded model:
./atelico-asset-downloader download meta-llama/Llama-3.2-3B-Instruct-Q4_K_M
# Compile with static library (macOS)
clang++ -std=c++17 main.cpp -L./lib -latelico_ffi -framework Metal -framework Foundation -o my_app

# Linux
g++ -std=c++17 main.cpp -L./lib -latelico_ffi -lpthread -ldl -lm -o my_app

Dynamic Linking

# macOS
clang++ -std=c++17 main.cpp -L./lib -latelico_ffi -o my_app
# Run with: DYLD_LIBRARY_PATH=./lib ./my_app

# Linux
g++ -std=c++17 main.cpp -L./lib -latelico_ffi -o my_app
# Run with: LD_LIBRARY_PATH=./lib ./my_app

# Windows (MSVC)
cl /EHsc main.cpp /link atelico_ffi.dll.lib
# atelico_ffi.dll must be next to the exe at runtime

Include the Header

#include "atelico_ffi.h"

Step 2: Engine Lifecycle

#include "atelico_ffi.h"
#include <cstdio>

int main()
{
// Create the engine (NULL config = auto-detect GPU, use defaults)
AtelicoEngine* engine = nullptr;
int32_t rc = atelico_engine_create(nullptr, &engine);
if (rc != ATELICO_OK || engine == nullptr)
{
fprintf(stderr, "Failed to create engine: %s\n", atelico_last_error());
return 1;
}
printf("Engine created successfully\n");

// ... use the engine ...

// Shut down and free all resources
atelico_engine_destroy(engine);
return 0;
}

Key rules:

  • atelico_engine_create must be called from the main thread
  • atelico_engine_destroy is safe to call on nullptr
  • Only one engine instance should exist at a time

Step 3: Load a Model

// Pre-load a model (blocking — downloads if not cached)
rc = atelico_model_load(engine, "meta-llama/Llama-3.2-3B-Instruct-Q4_K_M");
if (rc != ATELICO_OK)
{
fprintf(stderr, "Failed to load model: %s\n", atelico_last_error());
atelico_engine_destroy(engine);
return 1;
}
printf("Model loaded\n");

Step 4: Blocking Chat Request

const char* request = R"({
"model": "meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
"messages": [
{"role": "system", "content": "You are a friendly tavern keeper. Keep responses under 2 sentences."},
{"role": "user", "content": "What's on the menu?"}
],
"max_tokens": 100,
"temperature": 0.7
})";

const char* response = nullptr;
rc = atelico_llm_chat(engine, request, &response);
if (rc == ATELICO_OK && response != nullptr)
{
printf("Response: %s\n", response);
// IMPORTANT: response is valid only until the next API call on this thread.
// Copy it if you need to keep it.
}
else
{
fprintf(stderr, "Chat failed: %s\n", atelico_last_error());
}

Step 5: Streaming with the Poll Pattern

Streaming is the recommended approach for real-time applications. It uses a poll loop that fits naturally into a game's frame update:

// Start a streaming request — returns immediately
uint64_t stream_id = 0;
rc = atelico_llm_chat_stream(engine, request, &stream_id);
if (rc != ATELICO_OK)
{
fprintf(stderr, "Failed to start stream: %s\n", atelico_last_error());
return 1;
}

// Poll for tokens (in a game, do this once per frame in your update loop)
bool done = false;
while (!done)
{
const char* chunk_json = nullptr;
int32_t poll_rc = atelico_stream_poll(engine, stream_id, &chunk_json);

switch (poll_rc)
{
case ATELICO_OK:
// Got a token chunk — print it
// chunk_json is valid until the next API call
printf("%s", chunk_json);
fflush(stdout);
break;

case ATELICO_ERR_STREAM_EMPTY:
// No data yet — in a game loop, just continue to the next frame
// In a console app, sleep briefly to avoid busy-waiting
break;

case ATELICO_ERR_STREAM_DONE:
// Stream finished
done = true;
break;

default:
// Error
fprintf(stderr, "\nStream error: %s\n", atelico_last_error());
done = true;
break;
}
}

// Clean up the stream handle
atelico_stream_destroy(engine, stream_id);
printf("\nDone.\n");

In a Game Loop

The poll pattern is designed for frame-driven engines:

// Called every frame by your engine
void OnUpdate()
{
// Signal frame timing to the AI scheduler
atelico_engine_on_frame(engine);

// Poll active stream for new tokens
if (active_stream_id != 0)
{
const char* chunk = nullptr;
int32_t rc = atelico_stream_poll(engine, active_stream_id, &chunk);

if (rc == ATELICO_OK && chunk != nullptr)
{
// Append token to dialogue UI
AppendToDialogue(chunk);
}
else if (rc == ATELICO_ERR_STREAM_DONE)
{
atelico_stream_destroy(engine, active_stream_id);
active_stream_id = 0;
OnDialogueComplete();
}
// STREAM_EMPTY: no data this frame, try next frame
}
}

Step 6: Error Handling

All functions return int32_t result codes. On failure, call atelico_last_error() for a human-readable message:

int32_t rc = atelico_llm_chat(engine, bad_request, &response);
if (rc != ATELICO_OK)
{
const char* error = atelico_last_error();
// error is thread-local and valid until the next API call
fprintf(stderr, "Error (code %d): %s\n", rc, error);
}
CodeConstantMeaning
0ATELICO_OKSuccess
-1ATELICO_ERR_INVALID_HANDLENULL or invalid engine pointer
-2ATELICO_ERR_INVALID_ARGNULL required argument
-3ATELICO_ERR_INIT_FAILEDEngine initialization failed
-4ATELICO_ERR_MODEL_NOT_FOUNDModel ID not recognized
-5ATELICO_ERR_INFERENCE_FAILEDInference error
-6ATELICO_ERR_STREAM_DONEStream completed (not an error)
-7ATELICO_ERR_STREAM_EMPTYNo data available yet (not an error)
-8ATELICO_ERR_JSON_PARSEInvalid JSON in request
-9ATELICO_ERR_STORE_NOT_FOUNDKV store not found
-10ATELICO_ERR_IOI/O error
-11ATELICO_ERR_BLOCKEDGuardrail blocked content
-99ATELICO_ERR_INTERNALInternal error

String Ownership

Strings you pass to the API: The API reads them during the call. You retain ownership and can free them after the call returns.

Strings returned by the API: Stored in a thread-local buffer. Valid only until the next API call on the same thread. Copy immediately if you need to keep them:

const char* response = nullptr;
atelico_llm_chat(engine, request, &response);

// Copy before making another API call
std::string saved_response = response; // C++ copies here

// Now safe to make another call — response pointer is now invalid
atelico_llm_chat(engine, another_request, &response);

GPU Scheduling

Control GPU resource sharing at runtime:

// Scheduling modes
atelico_engine_set_scheduling_mode(engine, ATELICO_SCHEDULE_BALANCE); // Default
atelico_engine_set_scheduling_mode(engine, ATELICO_SCHEDULE_PRIORITIZE_COMPUTE); // Fast AI
atelico_engine_set_scheduling_mode(engine, ATELICO_SCHEDULE_PRIORITIZE_GRAPHICS); // Smooth FPS

// Resource limits
atelico_engine_set_vram_budget_mb(engine, 4096); // Cap VRAM
atelico_engine_set_target_tps(engine, 15); // Limit tokens/sec
atelico_engine_set_frame_time_ms(engine, 16); // 60 FPS hint

Complete Example

#include "atelico_ffi.h"
#include <cstdio>
#include <cstring>
#include <string>

int main()
{
// Create engine
AtelicoEngine* engine = nullptr;
if (atelico_engine_create(nullptr, &engine) != ATELICO_OK)
{
fprintf(stderr, "Init failed: %s\n", atelico_last_error());
return 1;
}

// Load model
if (atelico_model_load(engine, "meta-llama/Llama-3.2-3B-Instruct-Q4_K_M") != ATELICO_OK)
{
fprintf(stderr, "Model load failed: %s\n", atelico_last_error());
atelico_engine_destroy(engine);
return 1;
}

// Stream a chat response
const char* request = R"({
"model": "meta-llama/Llama-3.2-3B-Instruct-Q4_K_M",
"messages": [
{"role": "system", "content": "You are a narrator for a fantasy RPG."},
{"role": "user", "content": "Describe the entrance to the dungeon."}
],
"max_tokens": 200,
"temperature": 0.8
})";

uint64_t stream = 0;
if (atelico_llm_chat_stream(engine, request, &stream) != ATELICO_OK)
{
fprintf(stderr, "Stream failed: %s\n", atelico_last_error());
atelico_engine_destroy(engine);
return 1;
}

printf("NPC: ");
bool done = false;
while (!done)
{
const char* chunk = nullptr;
int32_t rc = atelico_stream_poll(engine, stream, &chunk);
if (rc == ATELICO_OK)
printf("%s", chunk);
else if (rc == ATELICO_ERR_STREAM_DONE)
done = true;
else if (rc != ATELICO_ERR_STREAM_EMPTY)
{
fprintf(stderr, "\nError: %s\n", atelico_last_error());
done = true;
}
}
printf("\n");

atelico_stream_destroy(engine, stream);
atelico_engine_destroy(engine);
return 0;
}

Next Steps