[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"news-68b6bbbc-384c-43fb-8024-2cf050107149":3},{"id":4,"title":5,"summary":6,"original_url":7,"source_id":8,"tags":9,"published_at":23,"created_at":24,"modified_at":25,"is_published":26,"publish_type":27,"image_url":13,"view_count":28},"68b6bbbc-384c-43fb-8024-2cf050107149","稀疏MoE+投机解码：开源模型首次在推理速度上超越闭源方案","大模型评测平台Artificial Analysis最新Output Speed榜单显示，阶跃星辰Step 3.7 Flash以409 tokens\u002Fs的输出速度位列主流模型第一，端到端响应时长、智能效率与速度价格比等指标全面领先。排在前面的，是仅有11B激活参数的稀疏MoE模型。这背后的技术组合值得关注：稀疏MoE架构让198B参数每次仅激活约11B；3路多Token预测（MTP-3）进行投机解码，一次预测多个Token而非逐个生成，从根本上减少推理延迟；vLLM专门优化支持FP8、NVFP4等低精度格式。更值得关注的是，这个结果打破了速度与智能不可兼得的二元困局——Step 3.7 Flash在SimpleVQA（Search）取得79.2分、V*（Python）达95.3分，均处于视觉理解前沿水准。作为Apache 2.0开源模型，任何人都能自由部署和商用，开源方案在推理速度上率先突破，给闭源厂商的压力才刚刚开始。","https:\u002F\u002Fgithub.com\u002Fstepfun-ai\u002FStep-3.7-Flash","5f7d17cd-f95b-4a76-be2e-db79144de285",[10,14,17,20],{"id":11,"name":12,"slug":12,"description":13,"color":13},"0a93ec8e-ea39-4693-81de-563ca8c173f7","inference",null,{"id":15,"name":16,"slug":16,"description":13,"color":13},"01598627-1ea6-4b27-a5d8-874971571a71","llm",{"id":18,"name":19,"slug":19,"description":13,"color":13},"499f4b56-819d-49a3-9609-33e775143b86","multimodal",{"id":21,"name":22,"slug":22,"description":13,"color":13},"b9bd9039-fcdb-41a8-b85b-fc1587def2b9","open-source","2026-06-04T13:00:00Z","2026-06-04T13:05:49.613317Z","2026-06-04T13:05:49.613325Z",true,"agent",2]