From 878f210cd92e86307905288ed841c3b63f62d1ee Mon Sep 17 00:00:00 2001 From: Andreas Lange <andreas@lange.cx> Date: Mon, 10 Jul 2000 19:47:08 +0200 Subject: [PATCH] Added some warnings. More handling of charsets and encoding of entities Rev: bin/extract.pike:1.2 --- bin/extract.pike | 81 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/bin/extract.pike b/bin/extract.pike index 56a9e61d84..0c60ed3543 100755 --- a/bin/extract.pike +++ b/bin/extract.pike @@ -2,7 +2,7 @@ // Copyright � 2000, Roxen IS. // By Martin Nilsson and Andreas Lange // -// $Id: extract.pike,v 1.1 2000/07/09 16:14:56 nilsson Exp $ +// $Id: extract.pike,v 1.2 2000/07/10 17:47:08 lange Exp $ // @@ -121,8 +121,14 @@ string quotemeta(string in) { function get_decoder(string encoding) { // If needed, returns a function which decodes a string + if(!encoding || encoding=="") + return 0; switch(lower_case(encoding)) { + case "iso-8859-1": + // The normal, no decode needed + return 0; + case "utf-8": case "utf8": return lambda(string s) { return utf8_to_string(s); @@ -134,13 +140,16 @@ function get_decoder(string encoding) { return unicode_to_string(s); }; - case "iso-8859-1": - // Default, no decode needed - return 0; - + default: + object dec; + if(catch(dec = Locale.Charset.decoder( encoding ))) { + werror("\n* Unknown encoding %O!\n", encoding); + exit(1); + } + return lambda(string s) { + return dec->clear()->feed(s)->drain(); + }; } - werror("\n* Unknown encoding %O!\n", encoding); - exit(1); } @@ -265,14 +274,14 @@ string parse_xml_file(string filename, void|mixed wipe_pass) { array hits = RE->split(c); if(hits) c = get_first_string(sprintf("%O",hits[0])); - if(decode) { - mixed err = catch{ c = decode(c); }; - if(err) { + // Replace encoded entities + c = replace(c,({"<",">","&"}),({"<",">","&"})); + if(decode) + if(catch( c = decode(c) )) { werror("\n* Warning: Decoding from %s failed for "+ "comment with id %s\n", args->encoding,id); return "\b"; } - } if(id!="" && c!="") // Save text for use in the t_tag function c_ids[id]=c; @@ -317,14 +326,21 @@ void write_xml_file(string out_name, string outdata) { write("Writing %s...",out_name); // Default nilencoding - function encode = lambda(string s) { return s; }; + function encode=0; + object _enc; if(args->encoding) { // Set encoder function if encoding known. switch(lower_case(args->encoding)) { default: - werror("\n* Unknown encoding %O, using default", args->encoding); - args->encoding=0; + if(catch(_enc = Locale.Charset.encoder( args->encoding ))) { + werror("\n* Unknown encoding %O, using default", args->encoding); + args->encoding=0; + break; + } + encode = lambda(string s) { + return _enc->clear()->feed(s)->drain(); + }; break; case "utf-8": case "utf8": @@ -340,8 +356,11 @@ void write_xml_file(string out_name, string outdata) { }; break; + case "": + args->encoding = 0; + case "iso-8859-1": - // Default + // No encoding needed } } @@ -377,10 +396,12 @@ void write_xml_file(string out_name, string outdata) { if(i==sizeof(id_xml_order)) { // Shrinking file? outdata=replace(outdata,marker,""); - break; + continue; } string id=id_xml_order[i]; - string str=encode(ids[id_xml_order[i]]); + string str=ids[id]; + if(encode) str=encode(str); // Encode and make parser-safe + str = replace(str, ({"<",">","&"}), ({"<",">","&"})); outdata = (outdata[0..n-1] + sprintf("<!-- [%s] %s\"%s\" -->\n<%s id=\"%s\"></%s>", id, info, str, tag, id, tag) + @@ -393,7 +414,9 @@ void write_xml_file(string out_name, string outdata) { // Dump new strings while(i<sizeof(id_xml_order)) { string id=id_xml_order[i]; - string str=encode(ids[id_xml_order[i]]); + string str=ids[id]; + if(encode) str=encode(str); // Encode and make parser-safe + str = replace(str, ({"<",">","&"}), ({"<",">","&"})); out->write("\n<!-- [%s] %s\"%s\" -->\n<%s id=\"%s\"></%s>\n", id, info, str, tag, id, tag); i++; @@ -476,7 +499,7 @@ void update_pike_sourcefiles(array filelist) { array hits; array id_pike_order=({}); foreach(tokens, string token) { - RE = Regexp("^#define[ \t\n]*"+token); + RE = Regexp("^#[ \t]*define[ \t\n]*"+token); string newdata = ""; foreach(indata/"\n", string line) { if(RE->match(line)) @@ -521,6 +544,9 @@ void update_pike_sourcefiles(array filelist) { exit(1); } } + if(r_ids[fstr] && r_ids[fstr]!=id && id_origin[r_ids[fstr]]) + werror("\n* Warning: %O has id %O in%{ %s%}, id %O in %s", + fstr, r_ids[fstr], id_origin[r_ids[fstr]], id, filename); } if(!has_value(id_xml_order,id)) // Id not in xml-structure, add to list @@ -653,6 +679,11 @@ void update_xml_sourcefiles(array filelist) { exit(1); } } + if(r_ids[fstr] && r_ids[fstr]!=id && + id_origin[r_ids[fstr]]) + werror("\n* Warning: %O has id %O in%{ %s%}, " + "id %O in %s", fstr, r_ids[fstr], + id_origin[r_ids[fstr]], id, filename); } if(!has_value(id_xml_order,id)) // Id not in xml-structure, add to list @@ -661,14 +692,10 @@ void update_xml_sourcefiles(array filelist) { ids[id] = fstr; // Store id:text r_ids[fstr] = id; // Store text:id if(updated) { - // Returning this will actually make the Parser - // parse the tag twice - unnecessary perhaps, but - // good for detecting if there are errors in the - // decoding/encoding --> "inconsistant use of id" string ret="<translate id=\""+id+"\""; - if(m->project) - ret+=" project=\""+m->project+"\""; - return ret+">"+c+"</translate>"; + foreach(indices(m)-({"id"}), string param) + ret+=" "+param+"=\""+m[param]+"\""; + return ({ ret+">"+c+"</translate>" }); } // Not updated, do not change return 0; @@ -871,7 +898,7 @@ int main(int argc, array(string) argv) { xml_name = filename; if(!sizeof(files) || args->help) { - sscanf("$Revision: 1.1 $", "$"+"Revision: %s $", string v); + sscanf("$Revision: 1.2 $", "$"+"Revision: %s $", string v); werror("\n Locale Extractor Utility "+v+"\n\n"); werror(" Syntax: extract.pike [arguments] infile(s)\n\n"); werror(" Arguments: --project=name default: first found in infile\n"); -- GitLab