From bc35275b7b19f297b3e4f942b421082e254de3c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Grubbstr=C3=B6m=20=28Grubba=29?= <grubba@grubba.org> Date: Wed, 18 Mar 2020 16:21:43 +0100 Subject: [PATCH] Search.Filter.HTML: Return the first <title>. There may be multiple <title>-tags in an html document where the later are related to SVG. Fixes #10020. --- .gitattributes | 1 - lib/modules/Search.pmod/Filter.pmod/HTML.pmod | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitattributes b/.gitattributes index 1056ea3bb4..def823266e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -11,7 +11,6 @@ /lib/modules/Search.pmod/Database.pmod/MySQL.pike foreign_ident /lib/modules/Search.pmod/Filter.pmod/Base.pike foreign_ident /lib/modules/Search.pmod/Filter.pmod/Charset.pmod foreign_ident -/lib/modules/Search.pmod/Filter.pmod/HTML.pmod foreign_ident /lib/modules/Search.pmod/Filter.pmod/PDF.pmod foreign_ident /lib/modules/Search.pmod/Filter.pmod/PlainText.pmod foreign_ident /lib/modules/Search.pmod/Filter.pmod/Word.pmod foreign_ident diff --git a/lib/modules/Search.pmod/Filter.pmod/HTML.pmod b/lib/modules/Search.pmod/Filter.pmod/HTML.pmod index e4c658f5fd..791e64677b 100644 --- a/lib/modules/Search.pmod/Filter.pmod/HTML.pmod +++ b/lib/modules/Search.pmod/Filter.pmod/HTML.pmod @@ -1,7 +1,7 @@ // This file is part of Roxen Search // Copyright � 2000,2001 Roxen IS. All rights reserved. // -// $Id: HTML.pmod,v 1.42 2007/03/16 14:45:38 jonasw Exp $ +// $Id$ // Filter for text/html @@ -139,7 +139,8 @@ static string clean(string data) { array(string) parse_title(Parser.HTML p, mapping m, string c, mapping e) { if (e->noindex) return ({ }); - res->fields->title = clean(c); + if (!res->fields->title) + res->fields->title = clean(c); return ({c}); }; -- GitLab