[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"news-c3d62e7b-95d3-44f0-a993-ed3d0654b1fc":3},{"id":4,"title":5,"summary":6,"original_url":7,"source_id":8,"tags":9,"published_at":23,"created_at":24,"modified_at":25,"is_published":26,"publish_type":27,"image_url":13,"view_count":28},"c3d62e7b-95d3-44f0-a993-ed3d0654b1fc","商汤开源SenseNova U1：NEO-unify架构迈向原生统一多模态时代","4月28日，商汤科技正式发布并开源日日新SenseNova U1系列模型，基于自主研发的NEO-unify架构，在单一模型内统一了多模态理解、推理与生成。这是多模态模型领域一次值得关注的技术路线探索。\n\n当前主流多模态方案采用拼接式架构：视觉编码器（VE）将图像转为离散token，VAE处理部分视觉信息，最终与语言token拼合输入语言模型。本质上仍是语言模型看见了视觉信息。\n\nNEO-unify彻底另起炉灶：去除独立的视觉编码器和VAE，从最底层重建统一表征空间，将语言与视觉信息作为统一复合体直接建模，深入融入每一层计算。这实现了从模态集成到原生统一的范式跨越——理解与生成不再由不同模块分工，而是同步增强。\n\n商汤宣称，SenseNova U1在业内首个实现连续性的图文创作输出，单次单模型调用即可生成一系列图文内容，而传统范式需要多次调用多个模型。效率提升的同时，在逻辑推理与空间智能等方向上，模型能深度理解物理世界的复杂布局与精细关系。商汤还透露该模型未来将为机器人提供具身大脑，在单一模型闭环内完成从环境感知、逻辑推演到精准执行的全过程。\n\nSenseNova U1已全面开源，有助于降低多模态应用开发门槛，让更多研究者参与到原生统一架构的验证与迭代中。\n\nNEO-unify的思路有技术洞见——原生统一确实是多模态模型的未来方向，而非在语言模型上外挂视觉模块。但架构激进转型能否带来实质性能力提升，仍需社区实测数据验证。多模态模型的架构之争，才刚刚开始。","https:\u002F\u002Ffinance.sina.com.cn\u002Ftob\u002F2026-04-28\u002Fdoc-inhwaicx1570329.shtml","fe03ae88-d255-41c8-9e16-4f6c49b4b64e",[10,14,17,20],{"id":11,"name":12,"slug":12,"description":13,"color":13},"a8002d98-9df1-4ab9-94d4-a7625af634c4","china-ai",null,{"id":15,"name":16,"slug":16,"description":13,"color":13},"01598627-1ea6-4b27-a5d8-874971571a71","llm",{"id":18,"name":19,"slug":19,"description":13,"color":13},"499f4b56-819d-49a3-9609-33e775143b86","multimodal",{"id":21,"name":22,"slug":22,"description":13,"color":13},"b9bd9039-fcdb-41a8-b85b-fc1587def2b9","open-source","2026-04-28T16:10:00Z","2026-04-28T16:06:19.856136Z","2026-04-28T16:06:19.856148Z",true,"agent",3]