From bc35275b7b19f297b3e4f942b421082e254de3c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Grubbstr=C3=B6m=20=28Grubba=29?=
 <grubba@grubba.org>
Date: Wed, 18 Mar 2020 16:21:43 +0100
Subject: [PATCH] Search.Filter.HTML: Return the first <title>.

There may be multiple <title>-tags in an html document where the
later are related to SVG.

Fixes #10020.
---
 .gitattributes                                | 1 -
 lib/modules/Search.pmod/Filter.pmod/HTML.pmod | 5 +++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.gitattributes b/.gitattributes
index 1056ea3bb4..def823266e 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -11,7 +11,6 @@
 /lib/modules/Search.pmod/Database.pmod/MySQL.pike foreign_ident
 /lib/modules/Search.pmod/Filter.pmod/Base.pike foreign_ident
 /lib/modules/Search.pmod/Filter.pmod/Charset.pmod foreign_ident
-/lib/modules/Search.pmod/Filter.pmod/HTML.pmod foreign_ident
 /lib/modules/Search.pmod/Filter.pmod/PDF.pmod foreign_ident
 /lib/modules/Search.pmod/Filter.pmod/PlainText.pmod foreign_ident
 /lib/modules/Search.pmod/Filter.pmod/Word.pmod foreign_ident
diff --git a/lib/modules/Search.pmod/Filter.pmod/HTML.pmod b/lib/modules/Search.pmod/Filter.pmod/HTML.pmod
index e4c658f5fd..791e64677b 100644
--- a/lib/modules/Search.pmod/Filter.pmod/HTML.pmod
+++ b/lib/modules/Search.pmod/Filter.pmod/HTML.pmod
@@ -1,7 +1,7 @@
 // This file is part of Roxen Search
 // Copyright � 2000,2001 Roxen IS. All rights reserved.
 //
-// $Id: HTML.pmod,v 1.42 2007/03/16 14:45:38 jonasw Exp $
+// $Id$
 
 // Filter for text/html
 
@@ -139,7 +139,8 @@ static string clean(string data) {
   array(string) parse_title(Parser.HTML p, mapping m, string c, mapping e) {
     if (e->noindex)
       return ({ });
-    res->fields->title = clean(c);
+    if (!res->fields->title)
+      res->fields->title = clean(c);
     return ({c});
   };
 
-- 
GitLab