{
  "id": "pytorch/optimizer-step-without-loss-backward",
  "signature": "RuntimeError: step() called before loss.backward(). Ensure you call loss.backward() before optimizer.step().",
  "signature_zh": "RuntimeError: 在 loss.backward() 之前调用了 step()。请确保在 optimizer.step() 之前调用 loss.backward()。",
  "regex": "step\\(\\) called before loss\\.backward\\(\\)",
  "domain": "pytorch",
  "category": "runtime_error",
  "subcategory": null,
  "root_cause": "The optimizer's step() method is invoked without a preceding backward() call, meaning gradients are not computed, and the optimizer attempts to update parameters with stale or zero gradients.",
  "root_cause_type": "generic",
  "root_cause_zh": "优化器的 step() 方法在没有先调用 backward() 的情况下被调用，意味着梯度未计算，优化器尝试使用过时或零梯度更新参数。",
  "versions": [
    {
      "version": "PyTorch 1.12.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "PyTorch 2.0.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "PyTorch 2.1.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "Call optimizer.zero_grad() before loss.backward() to reset gradients",
      "why_fails": "zero_grad() only clears gradients, it does not compute them. The core issue is missing backward() call, not gradient accumulation.",
      "fail_rate": 0.8,
      "condition": "",
      "sources": []
    },
    {
      "action": "Set requires_grad=False on all model parameters",
      "why_fails": "This disables gradient computation entirely, making the optimizer step meaningless and preventing learning.",
      "fail_rate": 0.95,
      "condition": "",
      "sources": []
    },
    {
      "action": "Use a learning rate scheduler step before optimizer step",
      "why_fails": "Scheduler step does not trigger gradient computation; it only adjusts the learning rate. The error persists.",
      "fail_rate": 0.9,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Ensure the training loop order is correct: for inputs, targets in dataloader: outputs = model(inputs); loss = criterion(outputs, targets); optimizer.zero_grad(); loss.backward(); optimizer.step()",
      "success_rate": 0.95,
      "how": "Ensure the training loop order is correct: for inputs, targets in dataloader: outputs = model(inputs); loss = criterion(outputs, targets); optimizer.zero_grad(); loss.backward(); optimizer.step()",
      "condition": "",
      "sources": []
    },
    {
      "action": "Add a conditional check before optimizer.step(): if loss.grad_fn is not None: optimizer.step() else: print('Skipping step: no gradient')",
      "success_rate": 0.85,
      "how": "Add a conditional check before optimizer.step(): if loss.grad_fn is not None: optimizer.step() else: print('Skipping step: no gradient')",
      "condition": "",
      "sources": []
    },
    {
      "action": "Use torch.no_grad() context manager only around inference, not around the backward pass. Example: with torch.no_grad(): outputs = model(inputs) for validation only.",
      "success_rate": 0.9,
      "how": "Use torch.no_grad() context manager only around inference, not around the backward pass. Example: with torch.no_grad(): outputs = model(inputs) for validation only.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "Ensure the training loop order is correct: for inputs, targets in dataloader: outputs = model(inputs); loss = criterion(outputs, targets); optimizer.zero_grad(); loss.backward(); optimizer.step()",
    "Add a conditional check before optimizer.step(): if loss.grad_fn is not None: optimizer.step() else: print('Skipping step: no gradient')",
    "Use torch.no_grad() context manager only around inference, not around the backward pass. Example: with torch.no_grad(): outputs = model(inputs) for validation only."
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://pytorch.org/docs/stable/optim.html#taking-an-optimization-step",
  "official_doc_section": null,
  "error_code": null,
  "verification_tier": "ai_generated",
  "confidence": 0.9,
  "fix_success_rate": 0.95,
  "resolvable": "true",
  "first_seen": "2023-04-20",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}