# macOS / Linux
-weight: 500;">curl -fsSL https://ollama.com/-weight: 500;">install.sh | sh # Windows
# Download from https://ollama.com/download
# macOS / Linux
-weight: 500;">curl -fsSL https://ollama.com/-weight: 500;">install.sh | sh # Windows
# Download from https://ollama.com/download
# macOS / Linux
-weight: 500;">curl -fsSL https://ollama.com/-weight: 500;">install.sh | sh # Windows
# Download from https://ollama.com/download
ollama pull gemma4
ollama pull gemma4
ollama pull gemma4
ollama run gemma4 "Explain quantum computing in one paragraph"
ollama run gemma4 "Explain quantum computing in one paragraph"
ollama run gemma4 "Explain quantum computing in one paragraph"
import ollama def ask(question: str) -> str: response = ollama.generate( model="gemma4", prompt=question, options={"temperature": 0.3} ) return response["response"] # That's literally it
print(ask("What are the SOLID principles in software engineering?"))
import ollama def ask(question: str) -> str: response = ollama.generate( model="gemma4", prompt=question, options={"temperature": 0.3} ) return response["response"] # That's literally it
print(ask("What are the SOLID principles in software engineering?"))
import ollama def ask(question: str) -> str: response = ollama.generate( model="gemma4", prompt=question, options={"temperature": 0.3} ) return response["response"] # That's literally it
print(ask("What are the SOLID principles in software engineering?"))
class LocalLLMApp: def __init__(self, model: str = "gemma4"): self.client = ollama.Client() self.model = model def generate(self, prompt: str, temperature: float = 0.3, system: str = None) -> str: messages = [] if system: messages.append({"role": "system", "content": system}) messages.append({"role": "user", "content": prompt}) response = self.client.chat( model=self.model, messages=messages, options={"temperature": temperature} ) return response["message"]["content"]
class LocalLLMApp: def __init__(self, model: str = "gemma4"): self.client = ollama.Client() self.model = model def generate(self, prompt: str, temperature: float = 0.3, system: str = None) -> str: messages = [] if system: messages.append({"role": "system", "content": system}) messages.append({"role": "user", "content": prompt}) response = self.client.chat( model=self.model, messages=messages, options={"temperature": temperature} ) return response["message"]["content"]
class LocalLLMApp: def __init__(self, model: str = "gemma4"): self.client = ollama.Client() self.model = model def generate(self, prompt: str, temperature: float = 0.3, system: str = None) -> str: messages = [] if system: messages.append({"role": "system", "content": system}) messages.append({"role": "user", "content": prompt}) response = self.client.chat( model=self.model, messages=messages, options={"temperature": temperature} ) return response["message"]["content"]
import streamlit as st app = LocalLLMApp() st.title("My Local AI Tool")
user_input = st.text_area("Enter your text:") if st.button("Analyze"): with st.spinner("Thinking..."): result = app.generate(user_input) st.write(result)
import streamlit as st app = LocalLLMApp() st.title("My Local AI Tool")
user_input = st.text_area("Enter your text:") if st.button("Analyze"): with st.spinner("Thinking..."): result = app.generate(user_input) st.write(result)
import streamlit as st app = LocalLLMApp() st.title("My Local AI Tool")
user_input = st.text_area("Enter your text:") if st.button("Analyze"): with st.spinner("Thinking..."): result = app.generate(user_input) st.write(result)
from fastapi import FastAPI
from pydantic import BaseModel api = FastAPI()
app = LocalLLMApp() class Query(BaseModel): text: str temperature: float = 0.3 @api.post("/analyze")
async def analyze(query: Query): result = app.generate(query.text, temperature=query.temperature) return {"result": result}
from fastapi import FastAPI
from pydantic import BaseModel api = FastAPI()
app = LocalLLMApp() class Query(BaseModel): text: str temperature: float = 0.3 @api.post("/analyze")
async def analyze(query: Query): result = app.generate(query.text, temperature=query.temperature) return {"result": result}
from fastapi import FastAPI
from pydantic import BaseModel api = FastAPI()
app = LocalLLMApp() class Query(BaseModel): text: str temperature: float = 0.3 @api.post("/analyze")
async def analyze(query: Query): result = app.generate(query.text, temperature=query.temperature) return {"result": result}
services: ollama: image: ollama/ollama:latest ports: - "11434:11434" volumes: - ollama-data:/root/.ollama deploy: resources: reservations: devices: - capabilities: [gpu] app: build: . ports: - "8501:8501" - "8000:8000" depends_on: - ollama environment: - OLLAMA_HOST=http://ollama:11434 volumes: ollama-data:
services: ollama: image: ollama/ollama:latest ports: - "11434:11434" volumes: - ollama-data:/root/.ollama deploy: resources: reservations: devices: - capabilities: [gpu] app: build: . ports: - "8501:8501" - "8000:8000" depends_on: - ollama environment: - OLLAMA_HOST=http://ollama:11434 volumes: ollama-data:
services: ollama: image: ollama/ollama:latest ports: - "11434:11434" volumes: - ollama-data:/root/.ollama deploy: resources: reservations: devices: - capabilities: [gpu] app: build: . ports: - "8501:8501" - "8000:8000" depends_on: - ollama environment: - OLLAMA_HOST=http://ollama:11434 volumes: ollama-data: - Simple Q&A: 0.5-1 second
- Paragraph generation: 2-5 seconds
- Document analysis (2-3 pages): 5-15 seconds
- Long-form generation (1000+ words): 15-30 seconds - Healthcare: Patient intake, lab results, EHR de-identification
- Legal: Contract analysis, brief generation, compliance checking
- Education: Study bots, exam generators, flashcard creators
- Creative: Story generators, poetry engines, mood journals
- Developer Tools: Code review, API docs, performance profiling
- Finance: Budget analyzers, financial report summarizers
- Security: Vulnerability scanners, alert summarizers