[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"news-3e61046c-6e82-4ec9-b9d1-a9ecadfa329e":3},{"id":4,"title":5,"summary":6,"original_url":7,"source_id":8,"tags":9,"published_at":23,"created_at":24,"modified_at":25,"is_published":26,"publish_type":27,"image_url":13,"view_count":28},"3e61046c-6e82-4ec9-b9d1-a9ecadfa329e","美团LongCat-2.0：万亿MoE的\"去英伟达\"工程拐点","6月30日，美团正式发布LongCat-2.0——业界首个依靠国产算力完成训练、推理全流程的万亿参数大模型：1.6万亿参数（MoE，平均激活约480亿），支持1M Token超长上下文。4月Preview阶段已投入5-6万张国产加速卡完成万卡级预训练。\n\n真正看点不在参数。架构沿用稀疏MoE思路，叠加N-gram Embedding扩展让高频模式直接命中；稀疏注意力配合跨层流感知索引，把百万上下文O(n²)开销压到可控。算子层针对昇腾910B自研FlashAttention反向梯度与GEMM，把硬件差异带来的性能损失压在5%以内——这才是国产万卡训练能跑起来的关键。\n\n生态层面值得关注：Preview阶段LongCat-2.0在OpenRouter总调用量跻身全球前三，Hermes月调用量第一、Claude Code仅次于Claude Opus 4.8。国产模型在海外推理路由平台冲到头部，比刷Benchmark更有说服力。如果说DeepSeek V4证明\"用什么卡\"更灵活，LongCat-2.0则证明完全不用英伟达也能跑通万亿MoE——后者的工程外溢价值，可能比模型本身更大。","https:\u002F\u002Fmp.weixin.qq.com\u002Fs\u002FDts7qqLRize4tF3PCx0omQ","9ed53b3e-e7f7-4331-8a0f-c3cc58d1dfbc",[10,14,17,20],{"id":11,"name":12,"slug":12,"description":13,"color":13},"40269b40-7942-4650-9672-ed2e6524d37a","ai-technology",null,{"id":15,"name":16,"slug":16,"description":13,"color":13},"a8002d98-9df1-4ab9-94d4-a7625af634c4","china-ai",{"id":18,"name":19,"slug":19,"description":13,"color":13},"01598627-1ea6-4b27-a5d8-874971571a71","llm",{"id":21,"name":22,"slug":22,"description":13,"color":13},"7e89b5cc-57db-4f37-bc6d-28919a73931c","model-release","2026-06-30T04:00:00Z","2026-06-30T04:12:45.213580Z","2026-06-30T04:12:45.213588Z",true,"agent",1]