Tools: Running Local GGUF Models with Ollama (GPU Enabled) (2026)
1. Install & Start Ollama
2. Verify GPU Detection
3. Set Up Model Directory
4. Create a Modelfile
5. Create & Run the Model
6. Verify GPU Usage
7. Ollama Command Reference
Model Management
Running Models
In-Chat Commands
API (REST)
8. Manage Ollama Service (systemctl)
Start / Stop / Restart
Status & Logs
Enable / Disable on Boot
9. Gollama — Chat TUI for Ollama
Install Go (Fedora)
Install Gollama
Launch
Keyboard Shortcuts Note: Always include TEMPLATE for custom GGUFs. Use instruct/chat variants, not base models. Open a second terminal and monitor VRAM — an increase confirms GPU acceleration. Ollama runs a local server at http://localhost:11434. Gollama is a terminal chat interface for Ollama with conversation history saved via SQLite. Templates let you quickly answer FAQs or store snippets for re-use. Hide child comments as well For further actions, you may consider blocking this person and/or reporting abuseCommandCopy$ -weight: 500;">curl -fsSL https://ollama.com/-weight: 500;">install.sh | sh
-weight: 500;">systemctl -weight: 500;">start ollama
ollama --version
-weight: 500;">curl -fsSL https://ollama.com/-weight: 500;">install.sh | sh
-weight: 500;">systemctl -weight: 500;">start ollama
ollama --version
-weight: 500;">curl -fsSL https://ollama.com/-weight: 500;">install.sh | sh
-weight: 500;">systemctl -weight: 500;">start ollama
ollama --version
mkdir -p ~/Documents/LLM
cd ~/Documents/LLM
# Copy your .gguf file here
mkdir -p ~/Documents/LLM
cd ~/Documents/LLM
# Copy your .gguf file here
mkdir -p ~/Documents/LLM
cd ~/Documents/LLM
# Copy your .gguf file here
vim Modelfile
vim Modelfile
vim Modelfile
FROM ./Phi-4-mini-instruct-Q4_K_M.gguf SYSTEM """
You are a helpful AI assistant.
""" TEMPLATE """<|user|>
{{ .Prompt }}<|end|>
<|assistant|>
""" PARAMETER -weight: 500;">stop "<|user|>"
PARAMETER -weight: 500;">stop "<|assistant|>"
PARAMETER -weight: 500;">stop "<|end|>"
PARAMETER temperature 0.7
PARAMETER num_ctx 8192
FROM ./Phi-4-mini-instruct-Q4_K_M.gguf SYSTEM """
You are a helpful AI assistant.
""" TEMPLATE """<|user|>
{{ .Prompt }}<|end|>
<|assistant|>
""" PARAMETER -weight: 500;">stop "<|user|>"
PARAMETER -weight: 500;">stop "<|assistant|>"
PARAMETER -weight: 500;">stop "<|end|>"
PARAMETER temperature 0.7
PARAMETER num_ctx 8192
FROM ./Phi-4-mini-instruct-Q4_K_M.gguf SYSTEM """
You are a helpful AI assistant.
""" TEMPLATE """<|user|>
{{ .Prompt }}<|end|>
<|assistant|>
""" PARAMETER -weight: 500;">stop "<|user|>"
PARAMETER -weight: 500;">stop "<|assistant|>"
PARAMETER -weight: 500;">stop "<|end|>"
PARAMETER temperature 0.7
PARAMETER num_ctx 8192
ollama create mymodel -f Modelfile
ollama run mymodel
ollama create mymodel -f Modelfile
ollama run mymodel
ollama create mymodel -f Modelfile
ollama run mymodel
# NVIDIA
watch -n 1 nvidia-smi # AMD
watch -n 1 rocm-smi
# NVIDIA
watch -n 1 nvidia-smi # AMD
watch -n 1 rocm-smi
# NVIDIA
watch -n 1 nvidia-smi # AMD
watch -n 1 rocm-smi
journalctl -u ollama -f
# Look for: "using CUDA" or "offloading layers to GPU"
journalctl -u ollama -f
# Look for: "using CUDA" or "offloading layers to GPU"
journalctl -u ollama -f
# Look for: "using CUDA" or "offloading layers to GPU"
# Generate (single turn)
-weight: 500;">curl http://localhost:11434/api/generate -d '{ "model": "mymodel", "prompt": "Explain Docker in simple terms", "stream": false
}' # Chat (multi-turn)
-weight: 500;">curl http://localhost:11434/api/chat -d '{ "model": "mymodel", "messages": [ { "role": "user", "content": "Hello!" } ]
}' # List models via API
-weight: 500;">curl http://localhost:11434/api/tags # Check running models
-weight: 500;">curl http://localhost:11434/api/ps
# Generate (single turn)
-weight: 500;">curl http://localhost:11434/api/generate -d '{ "model": "mymodel", "prompt": "Explain Docker in simple terms", "stream": false
}' # Chat (multi-turn)
-weight: 500;">curl http://localhost:11434/api/chat -d '{ "model": "mymodel", "messages": [ { "role": "user", "content": "Hello!" } ]
}' # List models via API
-weight: 500;">curl http://localhost:11434/api/tags # Check running models
-weight: 500;">curl http://localhost:11434/api/ps
# Generate (single turn)
-weight: 500;">curl http://localhost:11434/api/generate -d '{ "model": "mymodel", "prompt": "Explain Docker in simple terms", "stream": false
}' # Chat (multi-turn)
-weight: 500;">curl http://localhost:11434/api/chat -d '{ "model": "mymodel", "messages": [ { "role": "user", "content": "Hello!" } ]
}' # List models via API
-weight: 500;">curl http://localhost:11434/api/tags # Check running models
-weight: 500;">curl http://localhost:11434/api/ps
# Start Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">start ollama # Stop Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">stop ollama # Restart Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">restart ollama
# Start Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">start ollama # Stop Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">stop ollama # Restart Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">restart ollama
# Start Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">start ollama # Stop Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">stop ollama # Restart Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">restart ollama
# Check -weight: 500;">service -weight: 500;">status
-weight: 500;">systemctl -weight: 500;">status ollama # View live logs
journalctl -u ollama -f # View last 50 log lines
journalctl -u ollama -n 50
# Check -weight: 500;">service -weight: 500;">status
-weight: 500;">systemctl -weight: 500;">status ollama # View live logs
journalctl -u ollama -f # View last 50 log lines
journalctl -u ollama -n 50
# Check -weight: 500;">service -weight: 500;">status
-weight: 500;">systemctl -weight: 500;">status ollama # View live logs
journalctl -u ollama -f # View last 50 log lines
journalctl -u ollama -n 50
# Enable Ollama to -weight: 500;">start on boot
-weight: 500;">systemctl -weight: 500;">enable ollama # Disable autostart
-weight: 500;">systemctl -weight: 500;">disable ollama # Check if enabled
-weight: 500;">systemctl is-enabled ollama
# Enable Ollama to -weight: 500;">start on boot
-weight: 500;">systemctl -weight: 500;">enable ollama # Disable autostart
-weight: 500;">systemctl -weight: 500;">disable ollama # Check if enabled
-weight: 500;">systemctl is-enabled ollama
# Enable Ollama to -weight: 500;">start on boot
-weight: 500;">systemctl -weight: 500;">enable ollama # Disable autostart
-weight: 500;">systemctl -weight: 500;">disable ollama # Check if enabled
-weight: 500;">systemctl is-enabled ollama
-weight: 600;">sudo -weight: 500;">dnf -weight: 500;">install golang -y
go version
-weight: 600;">sudo -weight: 500;">dnf -weight: 500;">install golang -y
go version
-weight: 600;">sudo -weight: 500;">dnf -weight: 500;">install golang -y
go version
go -weight: 500;">install github.com/gaurav-gosain/gollama@latest # Add Go binaries to PATH
echo 'export PATH=$PATH:~/go/bin' >> ~/.bashrc
source ~/.bashrc
go -weight: 500;">install github.com/gaurav-gosain/gollama@latest # Add Go binaries to PATH
echo 'export PATH=$PATH:~/go/bin' >> ~/.bashrc
source ~/.bashrc
go -weight: 500;">install github.com/gaurav-gosain/gollama@latest # Add Go binaries to PATH
echo 'export PATH=$PATH:~/go/bin' >> ~/.bashrc
source ~/.bashrc - i — enter insert mode (-weight: 500;">start typing)
- Esc — exit insert mode
- :wq — save and quit
- :q! — quit without saving
$ -weight: 500;">curl -fsSL https://ollama.com/-weight: 500;">install.sh | sh
-weight: 500;">systemctl -weight: 500;">start ollama
ollama --version
-weight: 500;">curl -fsSL https://ollama.com/-weight: 500;">install.sh | sh
-weight: 500;">systemctl -weight: 500;">start ollama
ollama --version
-weight: 500;">curl -fsSL https://ollama.com/-weight: 500;">install.sh | sh
-weight: 500;">systemctl -weight: 500;">start ollama
ollama --version
mkdir -p ~/Documents/LLM
cd ~/Documents/LLM
# Copy your .gguf file here
mkdir -p ~/Documents/LLM
cd ~/Documents/LLM
# Copy your .gguf file here
mkdir -p ~/Documents/LLM
cd ~/Documents/LLM
# Copy your .gguf file here
vim Modelfile
vim Modelfile
vim Modelfile
FROM ./Phi-4-mini-instruct-Q4_K_M.gguf SYSTEM """
You are a helpful AI assistant.
""" TEMPLATE """<|user|>
{{ .Prompt }}<|end|>
<|assistant|>
""" PARAMETER -weight: 500;">stop "<|user|>"
PARAMETER -weight: 500;">stop "<|assistant|>"
PARAMETER -weight: 500;">stop "<|end|>"
PARAMETER temperature 0.7
PARAMETER num_ctx 8192
FROM ./Phi-4-mini-instruct-Q4_K_M.gguf SYSTEM """
You are a helpful AI assistant.
""" TEMPLATE """<|user|>
{{ .Prompt }}<|end|>
<|assistant|>
""" PARAMETER -weight: 500;">stop "<|user|>"
PARAMETER -weight: 500;">stop "<|assistant|>"
PARAMETER -weight: 500;">stop "<|end|>"
PARAMETER temperature 0.7
PARAMETER num_ctx 8192
FROM ./Phi-4-mini-instruct-Q4_K_M.gguf SYSTEM """
You are a helpful AI assistant.
""" TEMPLATE """<|user|>
{{ .Prompt }}<|end|>
<|assistant|>
""" PARAMETER -weight: 500;">stop "<|user|>"
PARAMETER -weight: 500;">stop "<|assistant|>"
PARAMETER -weight: 500;">stop "<|end|>"
PARAMETER temperature 0.7
PARAMETER num_ctx 8192
ollama create mymodel -f Modelfile
ollama run mymodel
ollama create mymodel -f Modelfile
ollama run mymodel
ollama create mymodel -f Modelfile
ollama run mymodel
# NVIDIA
watch -n 1 nvidia-smi # AMD
watch -n 1 rocm-smi
# NVIDIA
watch -n 1 nvidia-smi # AMD
watch -n 1 rocm-smi
# NVIDIA
watch -n 1 nvidia-smi # AMD
watch -n 1 rocm-smi
journalctl -u ollama -f
# Look for: "using CUDA" or "offloading layers to GPU"
journalctl -u ollama -f
# Look for: "using CUDA" or "offloading layers to GPU"
journalctl -u ollama -f
# Look for: "using CUDA" or "offloading layers to GPU"
# Generate (single turn)
-weight: 500;">curl http://localhost:11434/api/generate -d '{ "model": "mymodel", "prompt": "Explain Docker in simple terms", "stream": false
}' # Chat (multi-turn)
-weight: 500;">curl http://localhost:11434/api/chat -d '{ "model": "mymodel", "messages": [ { "role": "user", "content": "Hello!" } ]
}' # List models via API
-weight: 500;">curl http://localhost:11434/api/tags # Check running models
-weight: 500;">curl http://localhost:11434/api/ps
# Generate (single turn)
-weight: 500;">curl http://localhost:11434/api/generate -d '{ "model": "mymodel", "prompt": "Explain Docker in simple terms", "stream": false
}' # Chat (multi-turn)
-weight: 500;">curl http://localhost:11434/api/chat -d '{ "model": "mymodel", "messages": [ { "role": "user", "content": "Hello!" } ]
}' # List models via API
-weight: 500;">curl http://localhost:11434/api/tags # Check running models
-weight: 500;">curl http://localhost:11434/api/ps
# Generate (single turn)
-weight: 500;">curl http://localhost:11434/api/generate -d '{ "model": "mymodel", "prompt": "Explain Docker in simple terms", "stream": false
}' # Chat (multi-turn)
-weight: 500;">curl http://localhost:11434/api/chat -d '{ "model": "mymodel", "messages": [ { "role": "user", "content": "Hello!" } ]
}' # List models via API
-weight: 500;">curl http://localhost:11434/api/tags # Check running models
-weight: 500;">curl http://localhost:11434/api/ps
# Start Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">start ollama # Stop Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">stop ollama # Restart Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">restart ollama
# Start Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">start ollama # Stop Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">stop ollama # Restart Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">restart ollama
# Start Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">start ollama # Stop Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">stop ollama # Restart Ollama -weight: 500;">service
-weight: 500;">systemctl -weight: 500;">restart ollama
# Check -weight: 500;">service -weight: 500;">status
-weight: 500;">systemctl -weight: 500;">status ollama # View live logs
journalctl -u ollama -f # View last 50 log lines
journalctl -u ollama -n 50
# Check -weight: 500;">service -weight: 500;">status
-weight: 500;">systemctl -weight: 500;">status ollama # View live logs
journalctl -u ollama -f # View last 50 log lines
journalctl -u ollama -n 50
# Check -weight: 500;">service -weight: 500;">status
-weight: 500;">systemctl -weight: 500;">status ollama # View live logs
journalctl -u ollama -f # View last 50 log lines
journalctl -u ollama -n 50
# Enable Ollama to -weight: 500;">start on boot
-weight: 500;">systemctl -weight: 500;">enable ollama # Disable autostart
-weight: 500;">systemctl -weight: 500;">disable ollama # Check if enabled
-weight: 500;">systemctl is-enabled ollama
# Enable Ollama to -weight: 500;">start on boot
-weight: 500;">systemctl -weight: 500;">enable ollama # Disable autostart
-weight: 500;">systemctl -weight: 500;">disable ollama # Check if enabled
-weight: 500;">systemctl is-enabled ollama
# Enable Ollama to -weight: 500;">start on boot
-weight: 500;">systemctl -weight: 500;">enable ollama # Disable autostart
-weight: 500;">systemctl -weight: 500;">disable ollama # Check if enabled
-weight: 500;">systemctl is-enabled ollama
-weight: 600;">sudo -weight: 500;">dnf -weight: 500;">install golang -y
go version
-weight: 600;">sudo -weight: 500;">dnf -weight: 500;">install golang -y
go version
-weight: 600;">sudo -weight: 500;">dnf -weight: 500;">install golang -y
go version
go -weight: 500;">install github.com/gaurav-gosain/gollama@latest # Add Go binaries to PATH
echo 'export PATH=$PATH:~/go/bin' >> ~/.bashrc
source ~/.bashrc
go -weight: 500;">install github.com/gaurav-gosain/gollama@latest # Add Go binaries to PATH
echo 'export PATH=$PATH:~/go/bin' >> ~/.bashrc
source ~/.bashrc
go -weight: 500;">install github.com/gaurav-gosain/gollama@latest # Add Go binaries to PATH
echo 'export PATH=$PATH:~/go/bin' >> ~/.bashrc
source ~/.bashrc - i — enter insert mode (-weight: 500;">start typing)
- Esc — exit insert mode
- :wq — save and quit
- :q! — quit without saving