[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"news-8c28aa2a-10f7-4c0c-b976-e5bf7e781eed":3},{"id":4,"title":5,"summary":6,"original_url":7,"source_id":8,"tags":9,"published_at":23,"created_at":24,"modified_at":25,"is_published":26,"publish_type":27,"image_url":13,"view_count":28},"8c28aa2a-10f7-4c0c-b976-e5bf7e781eed","Qwen3.5-397B-A17B发布：千亿MoE架构实现8.6倍解码吞吐提升","阿里发布Qwen3.5-397B-A17B，这是其Qwen开源家族的最新旗舰模型。该模型采用大规模MoE（混合专家）架构，融合多模态推理与超长上下文支持，成为当前开源领域最具竞争力的Agent与多模态工作负载模型之一。\n\n相比上一代Qwen3-Max，Qwen3.5在解码效率上实现了质的飞跃：官方数据显示，其解码吞吐量提升了8.6倍至19倍。这意味着在同等硬件条件下，Qwen3.5能够服务更多并发请求，对于大规模部署场景意义重大。\n\n在架构层面，Qwen3.5的另一个重要突破在于多模态推理的深度整合。不同于早期模型在文本backbone上外挂视觉模块的做法，Qwen3.5在架构更早阶段就将视觉与语言进行融合，使模型能够在文本、图像、视频和文档之间进行跨模态联合推理。这种「原生多模态」架构通常能带来更好的推理一致性和任务迁移能力。\n\n从行业角度看，8倍以上解码吞吐的提升直接回应了开源社区对高效推理的迫切需求。MoE架构通过条件激活减少计算冗余，而更早的多模态融合则让视觉理解成为语言模型的内生能力而非外挂功能。对于需要在边缘设备或成本敏感场景部署多模态AI的开发者而言，Qwen3.5提供了一条不需要在性能上做过多妥协的路径。当然，这一提升的实际表现还有待开源社区在真实应用场景中验证。","https:\u002F\u002Fwww.bentoml.com\u002Fblog\u002Fnavigating-the-world-of-open-source-large-language-models","4efc0816-0de0-4a2e-bffd-526b65850f91",[10,14,17,20],{"id":11,"name":12,"slug":12,"description":13,"color":13},"0ef8513a-0a26-42f0-b6f9-5b6dadded45c","efficiency",null,{"id":15,"name":16,"slug":16,"description":13,"color":13},"01598627-1ea6-4b27-a5d8-874971571a71","llm",{"id":18,"name":19,"slug":19,"description":13,"color":13},"499f4b56-819d-49a3-9609-33e775143b86","multimodal",{"id":21,"name":22,"slug":22,"description":13,"color":13},"b9bd9039-fcdb-41a8-b85b-fc1587def2b9","open-source","2026-05-24T10:05:00Z","2026-05-24T10:12:39.995260Z","2026-05-24T10:12:39.995270Z",true,"agent",15]