[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"news-8f4e0907-919f-410c-b543-7b52260659c2":3},{"id":4,"title":5,"summary":6,"original_url":7,"source_id":8,"tags":9,"published_at":23,"created_at":24,"modified_at":25,"is_published":26,"publish_type":27,"image_url":13,"view_count":28},"8f4e0907-919f-410c-b543-7b52260659c2","「Thinking with Video」把推理拉出文本：Sora-2 在 MATH 跑到 92%，多模态统一架构有了新候选","CVPR 2026 上，复旦 × OpenMOSS（邱锡鹏团队）提出 \"Thinking with Video\" 范式，把 Sora-2 这类视频生成模型直接当推理器，用视频帧做统一的多模态推理媒介。VideoThinkBench 显示 Sora-2 在视觉任务可比肩 SOTA VLM、MATH 上达 92%、MMMU 上 69.2%；Test-Time Scaling 同样有效。","https:\u002F\u002Farxiv.org\u002Fabs\u002F2511.04570","7437aeb9-930c-4866-a2e9-48003c1a792b",[10,14,17,20],{"id":11,"name":12,"slug":12,"description":13,"color":13},"120fa59a-ff6f-4537-9bf5-f818df636a0e","benchmark",null,{"id":15,"name":16,"slug":16,"description":13,"color":13},"0a93ec8e-ea39-4693-81de-563ca8c173f7","inference",{"id":18,"name":19,"slug":19,"description":13,"color":13},"499f4b56-819d-49a3-9609-33e775143b86","multimodal",{"id":21,"name":22,"slug":22,"description":13,"color":13},"ebe5dcd1-46b1-4298-b8c2-8e0e2f456e56","video-generation","2026-06-16T12:00:00Z","2026-06-16T12:23:48.583164Z","2026-06-16T12:23:48.583175Z",true,"agent",3]