def test_sentiment_classifier(): response = call_llm("Classify: 'great product'. Return one word.") assert response.strip().lower() in ["positive", "negative", "neutral"]
def test_sentiment_classifier(): response = call_llm("Classify: 'great product'. Return one word.") assert response.strip().lower() in ["positive", "negative", "neutral"]
def test_sentiment_classifier(): response = call_llm("Classify: 'great product'. Return one word.") assert response.strip().lower() in ["positive", "negative", "neutral"]
# Register
curl -X POST https://your-driftwatch-url/auth/register \ -H "Content-Type: application/json" \ -d '{"email": "[email protected]", "password": "yourpassword"}' # Save the api_key from the response
API_KEY="dw_your_api_key_here"
# Register
curl -X POST https://your-driftwatch-url/auth/register \ -H "Content-Type: application/json" \ -d '{"email": "[email protected]", "password": "yourpassword"}' # Save the api_key from the response
API_KEY="dw_your_api_key_here"
# Register
curl -X POST https://your-driftwatch-url/auth/register \ -H "Content-Type: application/json" \ -d '{"email": "[email protected]", "password": "yourpassword"}' # Save the api_key from the response
API_KEY="dw_your_api_key_here"
curl -X POST https://your-driftwatch-url/prompts \ -H "Authorization: Bearer $API_KEY" \ -H "Content-Type: application/json" \ -d '{ "name": "sentiment-classifier", "prompt_text": "Classify the sentiment as exactly one word: positive, negative, or neutral. Review: \"The product works fine but packaging was damaged.\"", "model": "gpt-4o", "validators": ["single_word", "word_in:positive,negative,neutral"] }'
curl -X POST https://your-driftwatch-url/prompts \ -H "Authorization: Bearer $API_KEY" \ -H "Content-Type: application/json" \ -d '{ "name": "sentiment-classifier", "prompt_text": "Classify the sentiment as exactly one word: positive, negative, or neutral. Review: \"The product works fine but packaging was damaged.\"", "model": "gpt-4o", "validators": ["single_word", "word_in:positive,negative,neutral"] }'
curl -X POST https://your-driftwatch-url/prompts \ -H "Authorization: Bearer $API_KEY" \ -H "Content-Type: application/json" \ -d '{ "name": "sentiment-classifier", "prompt_text": "Classify the sentiment as exactly one word: positive, negative, or neutral. Review: \"The product works fine but packaging was damaged.\"", "model": "gpt-4o", "validators": ["single_word", "word_in:positive,negative,neutral"] }'
# .github/workflows/llm-drift-check.yml
name: LLM Drift Check on: schedule: - cron: '0 * * * *' # hourly push: branches: [main] jobs: drift-check: runs-on: ubuntu-latest steps: - name: Run drift check run: | RESULT=$(curl -s -X POST https://your-driftwatch-url/monitor/run \ -H "Authorization: Bearer ${{ secrets.DRIFTWATCH_API_KEY }}") MAX_DRIFT=$(echo $RESULT | jq '.summary.max_drift') echo "Max drift: $MAX_DRIFT" # Fail CI if drift exceeds threshold if (( $(echo "$MAX_DRIFT > 0.5" | bc -l) )); then echo "BREAKING CHANGE: drift score $MAX_DRIFT exceeds threshold" exit 1 fi if (( $(echo "$MAX_DRIFT > 0.3" | bc -l) )); then echo "WARNING: drift score $MAX_DRIFT above alert threshold" fi
# .github/workflows/llm-drift-check.yml
name: LLM Drift Check on: schedule: - cron: '0 * * * *' # hourly push: branches: [main] jobs: drift-check: runs-on: ubuntu-latest steps: - name: Run drift check run: | RESULT=$(curl -s -X POST https://your-driftwatch-url/monitor/run \ -H "Authorization: Bearer ${{ secrets.DRIFTWATCH_API_KEY }}") MAX_DRIFT=$(echo $RESULT | jq '.summary.max_drift') echo "Max drift: $MAX_DRIFT" # Fail CI if drift exceeds threshold if (( $(echo "$MAX_DRIFT > 0.5" | bc -l) )); then echo "BREAKING CHANGE: drift score $MAX_DRIFT exceeds threshold" exit 1 fi if (( $(echo "$MAX_DRIFT > 0.3" | bc -l) )); then echo "WARNING: drift score $MAX_DRIFT above alert threshold" fi
# .github/workflows/llm-drift-check.yml
name: LLM Drift Check on: schedule: - cron: '0 * * * *' # hourly push: branches: [main] jobs: drift-check: runs-on: ubuntu-latest steps: - name: Run drift check run: | RESULT=$(curl -s -X POST https://your-driftwatch-url/monitor/run \ -H "Authorization: Bearer ${{ secrets.DRIFTWATCH_API_KEY }}") MAX_DRIFT=$(echo $RESULT | jq '.summary.max_drift') echo "Max drift: $MAX_DRIFT" # Fail CI if drift exceeds threshold if (( $(echo "$MAX_DRIFT > 0.5" | bc -l) )); then echo "BREAKING CHANGE: drift score $MAX_DRIFT exceeds threshold" exit 1 fi if (( $(echo "$MAX_DRIFT > 0.3" | bc -l) )); then echo "WARNING: drift score $MAX_DRIFT above alert threshold" fi - Establish baseline — run your production prompts, save outputs
- Run on schedule (or in CI) — same prompts, same parameters
- Score the delta — format compliance + semantic similarity + output length
- Alert on threshold — 0.3 = investigate, 0.5 = page - inst-01 (single-word classifier): drift score 0.575 — "Neutral." → "Neutral". Both pass the word_in:positive,negative,neutral validator. But response.strip() == "Neutral." is now false.
- json-01 (JSON extraction): drift score 0.316 — whitespace stripped, trailing period removed from value. json.loads() works. baseline == current does not.