Skip to content
Snippets Groups Projects
Commit 878f210c authored by Andreas Lange's avatar Andreas Lange
Browse files

Added some warnings. More handling of charsets and encoding of entities

Rev: bin/extract.pike:1.2
parent 31fb1d98
Branches
Tags
No related merge requests found
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Copyright © 2000, Roxen IS. // Copyright © 2000, Roxen IS.
// By Martin Nilsson and Andreas Lange // By Martin Nilsson and Andreas Lange
// //
// $Id: extract.pike,v 1.1 2000/07/09 16:14:56 nilsson Exp $ // $Id: extract.pike,v 1.2 2000/07/10 17:47:08 lange Exp $
// //
...@@ -121,8 +121,14 @@ string quotemeta(string in) { ...@@ -121,8 +121,14 @@ string quotemeta(string in) {
function get_decoder(string encoding) { function get_decoder(string encoding) {
// If needed, returns a function which decodes a string // If needed, returns a function which decodes a string
if(!encoding || encoding=="")
return 0;
switch(lower_case(encoding)) switch(lower_case(encoding))
{ {
case "iso-8859-1":
// The normal, no decode needed
return 0;
case "utf-8": case "utf8": case "utf-8": case "utf8":
return lambda(string s) { return lambda(string s) {
return utf8_to_string(s); return utf8_to_string(s);
...@@ -134,14 +140,17 @@ function get_decoder(string encoding) { ...@@ -134,14 +140,17 @@ function get_decoder(string encoding) {
return unicode_to_string(s); return unicode_to_string(s);
}; };
case "iso-8859-1": default:
// Default, no decode needed object dec;
return 0; if(catch(dec = Locale.Charset.decoder( encoding ))) {
}
werror("\n* Unknown encoding %O!\n", encoding); werror("\n* Unknown encoding %O!\n", encoding);
exit(1); exit(1);
} }
return lambda(string s) {
return dec->clear()->feed(s)->drain();
};
}
}
string parse_xml_file(string filename, void|mixed wipe_pass) { string parse_xml_file(string filename, void|mixed wipe_pass) {
...@@ -265,14 +274,14 @@ string parse_xml_file(string filename, void|mixed wipe_pass) { ...@@ -265,14 +274,14 @@ string parse_xml_file(string filename, void|mixed wipe_pass) {
array hits = RE->split(c); array hits = RE->split(c);
if(hits) if(hits)
c = get_first_string(sprintf("%O",hits[0])); c = get_first_string(sprintf("%O",hits[0]));
if(decode) { // Replace encoded entities
mixed err = catch{ c = decode(c); }; c = replace(c,({"&lt;","&gt;","&amp;"}),({"<",">","&"}));
if(err) { if(decode)
if(catch( c = decode(c) )) {
werror("\n* Warning: Decoding from %s failed for "+ werror("\n* Warning: Decoding from %s failed for "+
"comment with id %s\n", args->encoding,id); "comment with id %s\n", args->encoding,id);
return "\b"; return "\b";
} }
}
if(id!="" && c!="") if(id!="" && c!="")
// Save text for use in the t_tag function // Save text for use in the t_tag function
c_ids[id]=c; c_ids[id]=c;
...@@ -317,15 +326,22 @@ void write_xml_file(string out_name, string outdata) { ...@@ -317,15 +326,22 @@ void write_xml_file(string out_name, string outdata) {
write("Writing %s...",out_name); write("Writing %s...",out_name);
// Default nilencoding // Default nilencoding
function encode = lambda(string s) { return s; }; function encode=0;
object _enc;
if(args->encoding) { if(args->encoding) {
// Set encoder function if encoding known. // Set encoder function if encoding known.
switch(lower_case(args->encoding)) switch(lower_case(args->encoding))
{ {
default: default:
if(catch(_enc = Locale.Charset.encoder( args->encoding ))) {
werror("\n* Unknown encoding %O, using default", args->encoding); werror("\n* Unknown encoding %O, using default", args->encoding);
args->encoding=0; args->encoding=0;
break; break;
}
encode = lambda(string s) {
return _enc->clear()->feed(s)->drain();
};
break;
case "utf-8": case "utf8": case "utf-8": case "utf8":
encode = lambda(string s) { encode = lambda(string s) {
...@@ -340,8 +356,11 @@ void write_xml_file(string out_name, string outdata) { ...@@ -340,8 +356,11 @@ void write_xml_file(string out_name, string outdata) {
}; };
break; break;
case "":
args->encoding = 0;
case "iso-8859-1": case "iso-8859-1":
// Default // No encoding needed
} }
} }
...@@ -377,10 +396,12 @@ void write_xml_file(string out_name, string outdata) { ...@@ -377,10 +396,12 @@ void write_xml_file(string out_name, string outdata) {
if(i==sizeof(id_xml_order)) { if(i==sizeof(id_xml_order)) {
// Shrinking file? // Shrinking file?
outdata=replace(outdata,marker,""); outdata=replace(outdata,marker,"");
break; continue;
} }
string id=id_xml_order[i]; string id=id_xml_order[i];
string str=encode(ids[id_xml_order[i]]); string str=ids[id];
if(encode) str=encode(str); // Encode and make parser-safe
str = replace(str, ({"<",">","&"}), ({"&lt;","&gt;","&amp;"}));
outdata = (outdata[0..n-1] + outdata = (outdata[0..n-1] +
sprintf("<!-- [%s] %s\"%s\" -->\n<%s id=\"%s\"></%s>", sprintf("<!-- [%s] %s\"%s\" -->\n<%s id=\"%s\"></%s>",
id, info, str, tag, id, tag) + id, info, str, tag, id, tag) +
...@@ -393,7 +414,9 @@ void write_xml_file(string out_name, string outdata) { ...@@ -393,7 +414,9 @@ void write_xml_file(string out_name, string outdata) {
// Dump new strings // Dump new strings
while(i<sizeof(id_xml_order)) { while(i<sizeof(id_xml_order)) {
string id=id_xml_order[i]; string id=id_xml_order[i];
string str=encode(ids[id_xml_order[i]]); string str=ids[id];
if(encode) str=encode(str); // Encode and make parser-safe
str = replace(str, ({"<",">","&"}), ({"&lt;","&gt;","&amp;"}));
out->write("\n<!-- [%s] %s\"%s\" -->\n<%s id=\"%s\"></%s>\n", out->write("\n<!-- [%s] %s\"%s\" -->\n<%s id=\"%s\"></%s>\n",
id, info, str, tag, id, tag); id, info, str, tag, id, tag);
i++; i++;
...@@ -476,7 +499,7 @@ void update_pike_sourcefiles(array filelist) { ...@@ -476,7 +499,7 @@ void update_pike_sourcefiles(array filelist) {
array hits; array hits;
array id_pike_order=({}); array id_pike_order=({});
foreach(tokens, string token) { foreach(tokens, string token) {
RE = Regexp("^#define[ \t\n]*"+token); RE = Regexp("^#[ \t]*define[ \t\n]*"+token);
string newdata = ""; string newdata = "";
foreach(indata/"\n", string line) { foreach(indata/"\n", string line) {
if(RE->match(line)) if(RE->match(line))
...@@ -521,6 +544,9 @@ void update_pike_sourcefiles(array filelist) { ...@@ -521,6 +544,9 @@ void update_pike_sourcefiles(array filelist) {
exit(1); exit(1);
} }
} }
if(r_ids[fstr] && r_ids[fstr]!=id && id_origin[r_ids[fstr]])
werror("\n* Warning: %O has id %O in%{ %s%}, id %O in %s",
fstr, r_ids[fstr], id_origin[r_ids[fstr]], id, filename);
} }
if(!has_value(id_xml_order,id)) if(!has_value(id_xml_order,id))
// Id not in xml-structure, add to list // Id not in xml-structure, add to list
...@@ -653,6 +679,11 @@ void update_xml_sourcefiles(array filelist) { ...@@ -653,6 +679,11 @@ void update_xml_sourcefiles(array filelist) {
exit(1); exit(1);
} }
} }
if(r_ids[fstr] && r_ids[fstr]!=id &&
id_origin[r_ids[fstr]])
werror("\n* Warning: %O has id %O in%{ %s%}, "
"id %O in %s", fstr, r_ids[fstr],
id_origin[r_ids[fstr]], id, filename);
} }
if(!has_value(id_xml_order,id)) if(!has_value(id_xml_order,id))
// Id not in xml-structure, add to list // Id not in xml-structure, add to list
...@@ -661,14 +692,10 @@ void update_xml_sourcefiles(array filelist) { ...@@ -661,14 +692,10 @@ void update_xml_sourcefiles(array filelist) {
ids[id] = fstr; // Store id:text ids[id] = fstr; // Store id:text
r_ids[fstr] = id; // Store text:id r_ids[fstr] = id; // Store text:id
if(updated) { if(updated) {
// Returning this will actually make the Parser
// parse the tag twice - unnecessary perhaps, but
// good for detecting if there are errors in the
// decoding/encoding --> "inconsistant use of id"
string ret="<translate id=\""+id+"\""; string ret="<translate id=\""+id+"\"";
if(m->project) foreach(indices(m)-({"id"}), string param)
ret+=" project=\""+m->project+"\""; ret+=" "+param+"=\""+m[param]+"\"";
return ret+">"+c+"</translate>"; return ({ ret+">"+c+"</translate>" });
} }
// Not updated, do not change // Not updated, do not change
return 0; return 0;
...@@ -871,7 +898,7 @@ int main(int argc, array(string) argv) { ...@@ -871,7 +898,7 @@ int main(int argc, array(string) argv) {
xml_name = filename; xml_name = filename;
if(!sizeof(files) || args->help) { if(!sizeof(files) || args->help) {
sscanf("$Revision: 1.1 $", "$"+"Revision: %s $", string v); sscanf("$Revision: 1.2 $", "$"+"Revision: %s $", string v);
werror("\n Locale Extractor Utility "+v+"\n\n"); werror("\n Locale Extractor Utility "+v+"\n\n");
werror(" Syntax: extract.pike [arguments] infile(s)\n\n"); werror(" Syntax: extract.pike [arguments] infile(s)\n\n");
werror(" Arguments: --project=name default: first found in infile\n"); werror(" Arguments: --project=name default: first found in infile\n");
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment