Skip to main content

Examples

Explore practical code examples to get the most out of the Nebul inference-api.

Streaming Responses

Stream tokens as they're generated for real-time output:

python
1234567891011121314151617
from openai import OpenAI
client = OpenAI(
api_key="sk-your-api-key-here",
base_url="https://api.inference.nebul.io/v1"
)
stream = client.chat.completions.create(
model="openai/gpt-oss-120b",
messages=[{"role": "user", "content": "Write a haiku about AI."}],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
print()

Text Embeddings

Generate vector embeddings for semantic search, similarity matching, and RAG applications.

python
12345678910111213
from openai import OpenAI
client = OpenAI(
api_key="sk-your-api-key-here",
base_url="https://api.inference.nebul.io/v1"
)
response = client.embeddings.create(
model="sentence-transformers/all-MiniLM-L6-v2",
input="Privacy is fundamental to maintaining trust in AI systems."
)
print(response.data[0].embedding)

Batch Embeddings

Embed multiple texts in a single request for efficiency:

python
1234567891011121314151617181920
from openai import OpenAI
client = OpenAI(
api_key="sk-your-api-key-here",
base_url="https://api.inference.nebul.io/v1"
)
texts = [
"First document to embed",
"Second document to embed",
"Third document to embed"
]
response = client.embeddings.create(
model="sentence-transformers/all-MiniLM-L6-v2",
input=texts
)
for i, data in enumerate(response.data):
print(f"Text {i+1}: {len(data.embedding)} dimensions")

Vision - Image Analysis

Analyze images by providing them as base64-encoded strings or image URLs.

Using Image URL

python
1234567891011121314151617181920212223242526
from openai import OpenAI
client = OpenAI(
api_key="sk-your-api-key-here",
base_url="https://api.inference.nebul.io/v1"
)
response = client.chat.completions.create(
model="Qwen/Qwen3-VL-235B-A22B-Thinking",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{
"type": "image_url",
"image_url": {
"url": "https://fastly.picsum.photos/id/669/200/200.jpg?hmac=lAa_bMRK0BRBCTEvl1acVqTfEDrXQc0yNwi683-13cE"
}
}
]
}
]
)
print(response.choices[0].message.content)

Using Base64-Encoded Image

python
12345678910111213141516171819202122232425262728293031
from openai import OpenAI
import base64
client = OpenAI(
api_key="sk-your-api-key-here",
base_url="https://api.inference.nebul.io/v1"
)
# Read and encode image
with open("image.jpg", "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
response = client.chat.completions.create(
model="Qwen/Qwen3-VL-235B-A22B-Thinking",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image in detail."},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
]
)
print(response.choices[0].message.content)

Function Calling & Tools

Enable models to call external functions and APIs. See Function Calling & Tools for the complete guide.

python
123456789101112131415161718192021222324252627282930313233343536373839404142
from openai import OpenAI
client = OpenAI(
api_key="sk-your-api-key-here",
base_url="https://api.inference.nebul.io/v1"
)
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and country, e.g. London, UK"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "Temperature unit"
}
},
"required": ["location"]
}
}
}
]
response = client.chat.completions.create(
model="Qwen/Qwen3-VL-235B-A22B-Thinking",
messages=[
{"role": "user", "content": "What's the weather in Paris?"}
],
tools=tools,
tool_choice="auto"
)
print(response.choices[0].message.tool_calls)