{
  "id": "communication/kafka-consumer-commit-failed-rebalance",
  "signature": "CommitFailedError: Commit cannot be completed since the group has already rebalanced and assigned the partitions to another member",
  "signature_zh": "CommitFailedError：由于消费者组已重新平衡并将分区分配给其他成员，无法完成提交",
  "regex": "CommitFailedError|commit cannot be completed since the group has already rebalanced",
  "domain": "communication",
  "category": "runtime_error",
  "subcategory": null,
  "root_cause": "Kafka consumer attempted to commit offsets after a group rebalance had already occurred, often because processing time exceeded `max.poll.interval.ms`, causing the consumer to be removed from the group.",
  "root_cause_type": "generic",
  "root_cause_zh": "Kafka 消费者在消费者组重新平衡后尝试提交偏移量，通常是因为处理时间超过了 `max.poll.interval.ms`，导致消费者被从组中移除。",
  "versions": [
    {
      "version": "Apache Kafka 3.4",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "Kafka 3.6",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "confluent-kafka-python 2.3",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "spring-kafka 3.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "Increase `max.poll.records` to process more records per poll and reduce polling frequency",
      "why_fails": "Processing more records per poll increases processing time, exacerbating the rebalance issue.",
      "fail_rate": 0.75,
      "condition": "",
      "sources": []
    },
    {
      "action": "Disable auto-commit and commit offsets manually after every single record",
      "why_fails": "Frequent commits increase load and may still fail if a rebalance occurs between commits.",
      "fail_rate": 0.65,
      "condition": "",
      "sources": []
    },
    {
      "action": "Set `session.timeout.ms` to a very low value to detect failures faster",
      "why_fails": "This can cause unnecessary rebalances if consumers are healthy but take slightly longer to poll.",
      "fail_rate": 0.8,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Increase `max.poll.interval.ms` to a value higher than the expected maximum processing time, e.g., `max.poll.interval.ms=600000` (10 minutes) in consumer config.",
      "success_rate": 0.9,
      "how": "Increase `max.poll.interval.ms` to a value higher than the expected maximum processing time, e.g., `max.poll.interval.ms=600000` (10 minutes) in consumer config.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Reduce processing time per poll by using asynchronous processing: fetch records, process in a separate thread pool, and commit offsets only after all processing completes, e.g., using `KafkaConsumer` with `enable.auto.commit=false` and manual async commits.",
      "success_rate": 0.85,
      "how": "Reduce processing time per poll by using asynchronous processing: fetch records, process in a separate thread pool, and commit offsets only after all processing completes, e.g., using `KafkaConsumer` with `enable.auto.commit=false` and manual async commits.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Implement cooperative rebalancing (incremental rebalance protocol) by setting `partition.assignment.strategy=CooperativeStickyAssignor`, which allows consumers to retain some partitions during rebalance.",
      "success_rate": 0.8,
      "how": "Implement cooperative rebalancing (incremental rebalance protocol) by setting `partition.assignment.strategy=CooperativeStickyAssignor`, which allows consumers to retain some partitions during rebalance.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "将 `max.poll.interval.ms` 增加到高于预期最大处理时间的值，例如在消费者配置中设置 `max.poll.interval.ms=600000`（10 分钟）。",
    "通过异步处理减少每次轮询的处理时间：获取记录，在单独线程池中处理，并在所有处理完成后提交偏移量，例如使用 `enable.auto.commit=false` 和手动异步提交。",
    "通过设置 `partition.assignment.strategy=CooperativeStickyAssignor` 实现协作式重新平衡（增量重新平衡协议），允许消费者在重新平衡期间保留部分分区。"
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://kafka.apache.org/documentation/#consumerconfigs_max.poll.interval.ms",
  "official_doc_section": null,
  "error_code": null,
  "verification_tier": "ai_generated",
  "confidence": 0.88,
  "fix_success_rate": 0.85,
  "resolvable": "true",
  "first_seen": "2023-11-05",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}