#!/usr/local/bin/pike 
// Copyright � 2000, Roxen IS.
// By Martin Nilsson and Andreas Lange
//
// $Id: extract.pike,v 1.8 2000/11/27 10:28:07 nilsson Exp $
//


// The arguments given to the program
mapping args = ([]);
// All the files to gather strings from
array(string) files = ({});
// All ids used, id:mapping(info)
mapping ids = ([]);
// Reversed id mapping, text:id
mapping(string:string|int) r_ids = ([]);
// Order of the ids in the xml outdata file
array(string|int) id_xml_order = ({});
// Code to add to xml outfile, id:code  
mapping(string:string) add = ([]);
// List of ids already in the xml outfile
multiset(string) added = (<>);
// The highest int with all lower ids set; see make_id()
int high_int_id = 0;


int make_id() {
  // Returns the next unused unique id
  while ( has_value(id_xml_order, ++high_int_id) );
  return high_int_id;
}


string get_first_string(string in) {
  // Merges parts, compiles and returns the first string in a line from cpp
  // ie '"a\\n"  "b: " "%s!", string' --> "a\nb: %s!"
  string ret = "";
  int instr = 0;
  for(int i = 0; i<sizeof(in); i++) {
    if(in[i]=='\"')
      if(!(i>0 && in[i-1]=='\\')) {
	instr= instr? 0 : 1;
	if(instr) i++;
      }
    if(instr) ret+=in[i..i];
    else
      if(in[i]==',') break;
  }
  return compile_string("constant q=#\""+ret+"\";")->q;
}


string quotemeta(string in) {
  // Takes a string from cpp and quotes it so it will be
  // regexp-safe and match the string in the source-file
  string ret = "";
  int instr = 0;
  for(int i = 0; i<sizeof(in); i++) {
    switch (in[i]) 
      {
      case '\"':
	if(!(i>0 && in[i-1]=='\\')) {
	  instr = instr? 0 : 1;
	  if(instr && i>0)
	    ret += ".*";
	}
	ret += "\"";
	break;

      case '\\':
	if((i+1)<sizeof(in) && in[i+1]=='n') {
	  if(instr) { 
	    ret += "[\n|\\\\]n*"; // Must handle both "\\n" and '\n'
	    i++;
	  }
	  break;
	}

      case '.': case '+': case '*':
      case '^': case '(': case ')':
      case '$': case '[': case ']':
      case '|': 
	if(instr) ret += "\\";

      default:
	if(instr) ret += in[i..i];	
      }
  }
  return ret;
}


function get_encoder(string encoding) {
  // If needed, returns a function which encodes a string
  if(!encoding || encoding=="")
    return 0;
  switch( lower_case(encoding) ) 
    {
    case "utf-8": case "utf8":
      return lambda(string s) { 
	       return string_to_utf8(s);
	     };

    case "utf-16": case "utf16":
    case "unicode":
      return lambda(string s) { 
	       return string_to_unicode(s);
	     };
      
    default:
      object enc;
      if(catch( enc = Locale.Charset.encoder( encoding ) )) {
	werror("\n* Error: Unknown encoding %O!\n", encoding);
	exit(1);
      }
      return lambda(string s) { 
	       return enc->clear()->feed(s)->drain();
	     };
    }
}


function get_decoder(string encoding) {
  // If needed, returns a function which decodes a string
  if(!encoding || encoding=="")
    return 0;
  switch( lower_case(encoding) ) 
    {
    case "iso-8859-1":
      // The normal, no decode needed
      return 0;
      
    case "utf-8": case "utf8":
      return lambda(string s) { 
	       return utf8_to_string(s);
	     };
      
    case "utf-16": case "utf16":
    case "unicode":
      return lambda(string s) { 
	       return unicode_to_string(s);
	     };
      
    default:
      object dec;
      if(catch( dec = Locale.Charset.decoder( encoding ) )) {
	werror("\n* Error: Unknown encoding %O!\n", encoding);
	exit(1);
      }
      return lambda(string s) { 
	       return dec->clear()->feed(s)->drain();
	     };
    }
}


array(mapping) languagefiles(string searchpath, void|string skiplang) {
  // Based on the searchpath, returns list of files - skiplang-file
  string pattern = replace(searchpath, "%%", "%");
  string dirbase = (pattern/"%L")[0];
  if(dirbase[-1]!='/') {
    array split = dirbase/"/";
    dirbase = split[..sizeof(split)-2]*"/"+"/";
  }
  string s_patt;
  if(search(pattern, "/", sizeof(dirbase))==-1)
    s_patt=pattern[sizeof(dirbase)..];
  else
    s_patt=pattern[sizeof(dirbase)..search(pattern, "/", sizeof(dirbase))-1];
  s_patt = replace(s_patt, "%L", "%3s");

  array dirlist = get_dir(dirbase);
  if(!dirlist)
    return ({});
  array list = ({});
  foreach(dirlist, string path) {
    string lang;
    if(!sscanf(path, s_patt, lang)) continue;
    if(lang==skiplang) continue;
    string file = replace(pattern, "%L", lang);
    if(!file_stat(file)) continue;
    list += ({ (["name":file, "lang":lang]) });
  }
  return list;
}


mapping parse_xml_file(string filename, string language) {
  // Reads a language-xml (like project_eng.xml)
  // Marks used ids in ids([]), also adds r_ids([text]) 
  // Returns mapping, 
  //   'encoding' = file encoding,
  //   'data'= file with markers instead of <str>-blocks
  // write_xml_file uses the returned data+id_xml_order to build a new one
  added = (<>);
  id_xml_order = ({});

  if(!filename || filename=="")
    return ([]);
  Stdio.File in=Stdio.FILE();
  if(!in->open(filename, "r"))
    return ([]);
  write("Reading %s%s", 	
	language ? "["+language+"] " : "",
	(filename/"/")[-1]);
  string line = in->gets();
  string indata = in->read();
  in->close();
  if(!indata) {
    write("\n");
    return ([]);
  } 

  // Check encoding
  string encoding;
  if(!line)
    line = indata;
  sscanf(line, "%*sencoding=\"%s\"", encoding);
  if(encoding && encoding!="") {
    function decode = get_decoder(encoding);
    if(decode && catch( indata = decode(indata) )) {
      werror("\n* Error: unable to decode from %O in %O\n",
	     encoding, filename);
      exit(1);
    }
  } 
  else if(line!=indata)
    indata += line+"\n"+indata;

  write(" - parsing xml...");

  // Parse... First the <str>-parser
  mapping current = ([]);
  Parser.HTML str_parser = Parser.HTML();
  str_parser->case_insensitive_tag(1);  

  str_parser->
    add_tag("changed", 
	    lambda(object foo, mapping m) {
	      current->changetag = str_parser->current()+"\n";
	      return 0;
	    });

  function t_container =  
    lambda(object foo, mapping m, string c) {
      if((int)m->id) m->id = (int)m->id;
      if(!current->id) {
	if(!m->id || m->id=="") {
	  werror("\n* Warning: String %O has no id.", c||current->original);
	  return 0;
	}
	current->id = m->id;
      }
      if(m->id && (m->id != current->id)) {
	werror("\n* Warning: Ignoring string %O. "
	       "Contained in id %O but marked with id %O.", 
	       c, current->id, m->id);
	return 0;
      }
      if(has_value(id_xml_order, current->id)) {
	werror("\n* Error: Id %O used more than once.\n", current->id);
	exit(1);
      }
      id_xml_order += ({ current->id });
      c = replace(c, ({"&lt;","&gt;","&amp;"}), ({"<",">","&"}));
      current->text = c;   
      current->textargs = m-({"id"});   
      return 0;
    };  
  str_parser->add_containers( ([ "t"         : t_container,
				 "translate" : t_container ]) );

  function o_container =  
    lambda(object foo, mapping m, string c) {
      if(String.trim_whites(c)!="") {
	// Replace encoded entities
	c = replace(c, ({"&lt;","&gt;","&amp;"}), ({"<",">","&"}));
	current->original = c;
	current->originalargs = m-({"id"});
      }
      return 0;
    };
  str_parser->add_containers( ([ "o"        : o_container,
				 "original" : o_container ]) );

  // Main xml file parser
  // "\b" is used as a marker for lines to remove from returned data
  Parser.HTML xml_parser = Parser.HTML();
  xml_parser->case_insensitive_tag(1);  
  xml_parser->add_quote_tag("!--", lambda() {return 0;}, "--");
  xml_parser->
    add_container("str", 
		  lambda(object foo, mapping m, string c) {
		    current = ([]);  // New <str>, clear slate
		    if(m->id && m->id!="") {
		      if((int)m->id) m->id = (int)m->id;
		      current->id = m->id;
		    }
		    str_parser->feed( c )->finish();
		    if(current->id) {
		      ids[current->id] = current;
		      if(String.trim_whites(current->original)!="")
			r_ids[current->original] = current->id;
		    }
		    if(has_value(id_xml_order, current->id))
		      // Return marker for write_xml_file()
		      // - where to re-insert <str> again.
		      // This is done to make sure the file 
		      // really is updated.
		      return "\7\7\7\7";  // Should be unique enough
		    return "\b";
		  });
  xml_parser->
    add_tag("locale", 
	    // Verify the <locale>-xml version
	    lambda(object foo, mapping m) {
	      array n = m->version/".";
	      if(n[0]!="1") {
		werror("\n* Unknown locale version %O!\n", m->version);
		exit(1);
	      }
	      return "\b";
	    });
  xml_parser->
    add_container("project", 
		  // Verify that the file is for the this project
		  lambda(object foo, mapping m, string c) {
		    c = String.trim_whites(c);
		    if(args->project && args->project!=c) {
		      werror("\n* xml data is for project %O, not %O!\n",
			     c, args->project);
		      exit(1);
		    } else
		      args->project = c;
		    return "\b";
		  });
  xml_parser->add_tag("added",
		      // Make sure <add>-tags don't get added more than once
		      lambda(object foo, mapping m) {
			m_delete(add, m->id);
			added[m->id] = 1;
			return "\b";
		      });
  // These tags will always be rewritten anyway, so remove them.
  xml_parser->add_quote_tag("?xml", "\b", "?");
  xml_parser->add_containers( (["file"     : "\b",
				"dumped"   : "\b",
				"language" : "\b"]) );
  xml_parser->feed(indata)->finish();

  // Remove markers and lines from removed tags
  string ret = "";
  object RE = Regexp("^[\b \t\n]+$");
  foreach(xml_parser->read()/"\n", string line) {
    if(!RE->match(line))
      ret += line+"\n";
  }
  // Remove silly lines in end of data
  RE = Regexp("^(.*[^\n \t]\n)[ \n\t]*$");
  array hits = RE->split( ret );
  if(hits) ret = hits[0]; 
  ret = replace(ret, "\n\n\n\n", "\n\n");

  write("\n");
  return ([ "encoding":encoding, "data":ret ]);
}


void write_xml_file(string filename, string language, string encoding,
		    string outdata, void|mapping old_ids)
  // Updates/creates a language xml-file with id:text-info
  // Reuses a present structure if fead with it in outdata
  // Some headers are always rewritten.
  // The old_ids mapping is supplied when the file is updated in comparison
  // with a base xml file.
{
  if(!sizeof(id_xml_order))
    // No ids changed or read with parse_xml_file()
    return;
  Stdio.File out=Stdio.File();
  if(!out->open(filename, "cw")) {
    werror("* Error: Could not open %s for writing\n", filename);
    exit(1);
  }

  write("Writing %s%s... (%d ids) ", 
	language ? "["+language+"] " : "",
	(filename/"/")[-1], sizeof(id_xml_order));

  // Dump some headers
  string newfile = "";
  newfile += "<locale version=\"1.0\"/>\n";
  newfile += "<project>"+args->project+"</project>\n";
  newfile += "<language>" +
#ifdef constant(Standards.ISO639_2)
    Standards.ISO639_2.get_language(language) ||
#endif
    language + "</language>\n";

  if(!args->notime)
    newfile += "<dumped>"+time()+"</dumped>\n";

  // List files included in the project
  foreach(sort(files), string inname)
    newfile += "<file>"+inname+"</file>\n";

  // List blocks added from the config
  foreach(sort(indices(added)+indices(add)), string blockname)
    newfile += "<added id=\""+blockname+"\"/>\n";

  string o_tag = "o";
  string t_tag = "t";
  if(args->verbose) {
    o_tag = "original";
    t_tag = "translate";
  }

  mapping stats = ([]);
  function gen_tag = 
    lambda(mixed id) {
      stats->written++;
      string diff = ((old_ids && old_ids[id] && old_ids[id]->changetag) ? 
		     old_ids[id]->changetag : "");
      if(old_ids) {
	if(diff!="")
	  stats->changed++;	  
	else if(!old_ids[id] || !old_ids[id]->text || 
	   String.trim_whites(old_ids[id]->text)=="" ) {
	  diff = "<new/>\n";
	  stats->new++;
	}
	else if(old_ids[id] && old_ids[id]->original != ids[id]->original) {
	  diff = replace(old_ids[id]->original||"",  
			 ({"<",">","&"}), ({"&lt;","&gt;","&amp;"}));
	  diff = "<changed from=\""+ diff +"\"/>\n";
	  stats->changed++;
	}
	else
	  stats->ok++;
      }
      // Make parser-safe
      string original = 
	replace(ids[id]->original, ({"<",">","&"}), ({"&lt;","&gt;","&amp;"}));
      string text = 
	replace( ( (old_ids && old_ids[id] && old_ids[id]->text) ? 
		   old_ids[id]->text : ""),
		({"<",">","&"}),({"&lt;","&gt;","&amp;"}));
      return sprintf("<str id=\"%s\">\n"
		     "%s<%s>%s</%[2]s>\n"
		     "<%s>%s</%[4]s>\n"
		     "</str>",
		     (string)id, diff, o_tag, original, t_tag, text);
    };
    
  // Reuse structure of old xml
  int i = 0;
  if(outdata) {
    string marker = "\7\7\7\7";    // Marker from parse_xml_file()
    string newstr;
    while( int n=search(outdata, marker) ) {  
      if(n<0) break;
      if(i==sizeof(id_xml_order)) {
	// Shrinking file?
	outdata = replace(outdata, marker, "");
	continue;
      }
      if(args->wipe && !ids[id_xml_order[i]]->origin)
	newstr = "";  // Wipe this old string
      else
	newstr = gen_tag(id_xml_order[i]);
      outdata = (outdata[0..n-1] + newstr +
		 outdata[n+sizeof(marker)..sizeof(outdata)-1]);
      i++;
    }
    newfile += outdata;
  }

  // Dump new strings
  for(; i<sizeof(id_xml_order); i++) {
    if(!(args->wipe && !ids[id_xml_order[i]]->origin))
      newfile += "\n" + gen_tag(id_xml_order[i]) + "\n";
  }
 
  // If any, add missing <add>-blocks from config
  foreach(indices(add), string blockname)
    newfile += "\n"+add[blockname];

  // Determine encoding
  if(!encoding || encoding=="") {
    int width = String.width( newfile );
    if(width==16)
      encoding = "utf-8";
    else if(width==32)
      encoding = "utf-16";
    else
      encoding = "iso-8859-1";
  }
  function encode = get_encoder( encoding );
  if(encode && catch( newfile = encode(newfile) )) {
    werror("\n* Error: unable to encode file %O in %O\n", 
	   filename, args->encoding);
    exit(1);
  }
  newfile = "<?xml version=\"1.0\" encoding=\""+ encoding +"\"?>\n"+ newfile;

  out->write( newfile );
  out->truncate( out->tell() );
  out->close();

  // Dump some statistics
  if(args->wipe && stats->written!=sizeof(id_xml_order))
    write("(wiped to %d) ", stats->written);
  if(old_ids) {
    if(stats->written==stats->ok)
      write("all translated");
    else {
      array ret= ({});
      if(stats->ok) ret += ({ sprintf("%d translated", stats->ok) });
      if(stats->new) ret += ({ sprintf("%d new", stats->new) });
      if(stats->changed) ret += ({ sprintf("%d changed", stats->changed) });
      write(String.implode_nicely( ret ));
    }
  }
  write("\n");
}


array(string) get_tokens(string in, mapping args, string filename) {
  // Picks out tokens from <locale-token>-tag in pikesource
  // The order between // blocks and /* */ blocks is not important
  // for our purposes.
  string comments = "";
  foreach(in/"//", string line) {
    sscanf(line, "%s\n", line);
    comments += line+"\n";
  }
  foreach(in/"/\052", string block) {
    string c = "";
    sscanf(block, "%s\052/", c);
    comments += c+"\n";
  }

  array(string) tokens = ({});
  Parser.HTML()->      
    add_container("locale-token",
		  lambda(object foo, mapping m, string c) {
		    if(args->project && m->project!=args->project) 
		      return 0;
		    c = String.trim_whites(c);
		    if(has_value(tokens, c))
		      werror("\n* Warning: Token %O already found\n", c);
		    tokens += ({c});
		    if (m->project)
		      args->project = m->project;
		    else
		      args->project = "";
		    return 0;
		  })
    ->feed( comments )->finish();
  if(!sizeof(tokens)) {
    if(args->project)
      werror("\n* Warning: No token for project %O in %s\n", 
	     args->project, filename);
    else
      werror("\n* Warning: No token found in file %s\n", filename);
  }
  return tokens;
}

void update_pike_sourcefiles(array filelist) {
  // Extracts strings from pike sourcefiles in filelist
  // Updates ids, r_ids and  id_xml_order with ids and strings
  // If new ids, updates the sourcefile or a copy
  foreach(filelist, string filename) {
    Stdio.File file = Stdio.File();
    if(!file->open(filename, "r")) {
      werror("* Error: Could not open sourcefile %s.\n", filename);
      exit(1);
    }
    write("Reading %s", filename);
    string indata = file->read();
    file->close();

    // Get locale tokens, tokenize pike file
    write(", parsing...");
    array tokens = get_tokens(indata, args, filename);
    if(!sizeof(tokens))
      continue;
    mixed pdata = Parser.Pike.split(indata);
    pdata = Parser.Pike.tokenize(pdata);
    pdata = Parser.Pike.hide_whitespaces(pdata);

    array id_pike_order = ({});
    int no_of_ids = 0;
    string|int id;
    string fstr, token;
    for(int i=0; i<sizeof(pdata); i++) {
      //// Search for tokens
      foreach(tokens, token) 
	if(token==pdata[i]) break;  // Loop tokens
      if(token!=pdata[i]) continue; // Verify token
      if(pdata[++i]!="(") continue; // Verify "("

      //// Get id
      id = (string)pdata[++i];
      if(id=="\"\"") 
	id = "";
      else if((int)id) 
	id = (int)id;
      else
	id = get_first_string(id);

      //// Get string
      string instr = "";
      i++;      // Skip ","
      while( ++i<sizeof(pdata) && pdata[i]!=")" )
	instr += (string)pdata[i];
      if(instr=="\"\"")
	fstr = "";
      else
	fstr = get_first_string(instr);
      if(fstr=="" && id=="")
	continue;  // Neither string nor id, skip!

      //// Check and store id and string
      no_of_ids++;
      if(!id || id=="") {
	if (r_ids[fstr])
	  id = r_ids[fstr];   // Re-use old id with identical string
	else
	  id = make_id();     // New string --> Get new id
	// New id for string --> file needs update, save info.
	id_pike_order += ({ ({id, token, quotemeta(instr)}) });
      } else {
	// Verify old id
	if(!ids[id] || (ids[id] && !ids[id]->origin)) {
	  // Remove preread string in r_ids lookup, might be updated
	  m_delete(r_ids, ids[id]);  
	} else {
	  if(ids[id] && ids[id]->original!=fstr) {
	    werror("\n* Error: inconsistant use of id.\n");
	    werror("    In file:%{ %s%}\n", ids[id]->origin);
	    werror("     id %O -> string %O\n", id, ids[id]->original);
	    werror("    In file: %s\n", filename);
	    werror("     id %O -> string %O\n", id, fstr);
	    exit(1);
	  }
	}
	if(r_ids[fstr] && r_ids[fstr]!=id && ids[r_ids[fstr]]->origin)
	  werror("\n* Warning: %O has id %O in%{ %s%}, id %O in %s",
		 fstr, r_ids[fstr], ids[r_ids[fstr]]->origin, id, filename);
      }
      if(!has_value(id_xml_order, id))
	// Id not in xml-structure, add to list
	id_xml_order += ({ id });
      if(!ids[id]) 
	ids[id] = ([]);
      ids[id]->original = fstr;         // Store id:text
      ids[id]->origin += ({filename});  // Add  origin
      if(String.trim_whites(fstr)!="") 
	r_ids[fstr] = id;               // Store text:id
    }

    // Done parsing, rebuild sourcefile if needed
    write(" (%d localization%s)\n", no_of_ids, (no_of_ids==1?"":"s"));  
    if(!sizeof(id_pike_order)) {
      continue;
    }
    if(!args->nocopy) 
      filename += ".new"; // Create new file instead of overwriting
    write("-> Writing %s (%d new)", filename, sizeof(id_pike_order));  
    if(!file->open(filename, "cw")) {
      werror("\n* Error: Could not open %s for writing\n", filename);
      exit(1);
    }

    foreach(id_pike_order, array id) {
      // Insert ids based on tokens and the now regexp-safe string
      object(Regexp) RE;
      // RE = ^(.*TOKEN\( ")(", string \).*)$
      RE = Regexp("^(.*" + id[1] + "\\([ \n\t]*)[\"0]*" + 
		  "([ ,\n\t]*"+id[2]+"[ \t\n]*\\).*)$");
      array hits = RE->split(indata);
      if(hits)
	indata = hits[0] + (intp(id[0])?id[0]:"\""+id[0]+"\"") + hits[1];
      else {
	werror("\n* Warning: Failed to set id %O for string %O in %s",
	       id[0], ids[id[0]]->original, filename);      
	if(sizeof(ids[id[0]]->origin)<2)
	  id_xml_order -= ({ id[0] });
      }
    }
    write("\n");

    file->write( indata );
    file->truncate( file->tell() );
    file->close();
  } 
}


void update_xml_sourcefiles(array filelist) {
  // Extracts strings from html/xml files in filelist
  // Updates ids, r_ids, id_xml_order with ids and strings
  // If new ids, updates the sourcefile or a copy
  foreach(filelist, string filename) {
    Stdio.File file = Stdio.FILE();
    if(!file->open(filename, "r")) {
      werror("* Error: Could not open sourcefile %s.\n", filename);
      exit(1);
    }
    write("Reading %s", filename);
    string line = file->gets();
    string data = file->read();
    file->close();
    if(!data && !line)
      continue;

    // Check encoding
    if(!line)
      line = data;
    string encoding;
    sscanf(line, "%*sencoding=\"%s\"", encoding);
    if(encoding && encoding!="") {
      function decode = get_decoder(encoding);
      if(decode && catch( data = decode(data) )) {
	werror("\n* Error: unable to decode from %O in %O\n",
	       encoding, filename);
	exit(1);
      }
    } 
    else if(line!=data)
      data = line+"\n"+data;

    write(", parsing...");
    int new = 0;
    int ignoretag = 0;
    int no_of_ids = 0;
    Parser.HTML xml_parser = Parser.HTML();
    xml_parser->case_insensitive_tag(1);  
    xml_parser->add_quote_tag("!--", lambda() {return 0;}, "--");
    xml_parser->
      add_tag("trans-reg",
	      // Check the registertag for the right project
	      lambda(object foo, mapping m) {
		if(!m->project || m->project=="") {
		  werror("\n * Error: Missing project in %s\n", 
			 m->project, filename);
		  exit(1);
		}
		if(args->project && m->project!=args->project)
		  ignoretag = 1; // Tags might be from another project
		else
		  ignoretag = 0;
		if(!args->project)
		  args->project = m->project;
		return 0;
	      });
    xml_parser->		
      add_container("translate",
		    // This is the string container
		    lambda(object foo, mapping m, string c) {
		      if(m->project && m->project!="") {
			if(m->project!=args->project)
			  return 0; // Tag belongs to another project
			// else correct project, proceed
		      } else // No proj specified
			if(ignoretag) 
			  return 0; // Check if last proj was another
		      string|int id = m->id;
		      if((int)id) id = (int)id;
		      string fstr = c;
		      int updated = 0;
		      if (String.trim_whites(fstr)=="")
			return 0;         // No need to store empty strings
		      no_of_ids++;
		      if(!id || id=="") {
			if (r_ids[fstr])
			  id = r_ids[fstr];   // Re-use old id with same string
			else
			  id = make_id();     // New string --> Get new id
			// Mark that we have a new id here
			updated = ++new;
		      } else {
			// Verify old id
			if(!ids[id] || (ids[id] && !ids[id]->origin)) {
			  // Remove preread string in r_ids, might be updated
			  m_delete(r_ids, ids[id]);
			} else {
			  if(ids[id] && ids[id]->original!=fstr) {
			    werror("\n* Error: inconsistant use of id.\n");
			    werror("    In file:%{ %s%}\n", ids[id]->origin);
			    werror("     id %O -> string %O\n", 
				   id, ids[id]->original);
			    werror("    In file: %s\n", filename);
			    werror("     id %O -> string %O\n", id, fstr);
			    exit(1);
			  }
			}
			if(r_ids[fstr] && r_ids[fstr]!=id && 
			   ids[r_ids[fstr]]->origin)
			  werror("\n* Warning: %O has id %O in%{ %s%}, "
				 "id %O in %s", fstr, r_ids[fstr],
				 ids[r_ids[fstr]]->origin, id, filename);
		      }
		      if(!has_value(id_xml_order, id))
			// Id not in xml-structure, add to list
			id_xml_order += ({ id });
		      if(!ids[id]) 
			ids[id] = ([]);
		      ids[id]->original = fstr;         // Store id:text
		      ids[id]->origin += ({filename});  // Add  origin
		      if(String.trim_whites(fstr)!="") 
			r_ids[fstr] = id;               // Store text:id
		      if(updated) {
			string ret="<translate id=\""+id+"\"";
			foreach(indices(m)-({"id"}), string param)
			  ret+=" "+param+"=\""+m[param]+"\"";
		        return ({ ret+">"+c+"</translate>" });
		      }
		      // Not updated, do not change
		      return 0;
		    });
    xml_parser->feed(data)->finish();

    // Done parsing, rebuild sourcefile if needed
    write(" (%d localization%s)\n", no_of_ids, no_of_ids==1?"":"s");  
    if(!new) {
      continue;
    }
    data = xml_parser->read();
    if(encoding && encoding!="") {
      function encode = get_encoder(encoding);
      if(encode && catch( data = encode(data) )) {
	werror("\n* Error: unable to encode data in %O\n", encoding);
	exit(1);
      }
    } 

    if(!args->nocopy) 
      filename += ".new"; // Create new file instead of overwriting
    write("-> Writing %s (%d new)", filename, new);  
    if(!file->open(filename, "cw")) {
      werror("\n* Error: Could not open %s for writing\n", filename);
      exit(1);
    }

    file->write( data );
    file->truncate( file->tell() );
    file->close();
    write("\n");
  } 
}


string parse_config(string filename) {
  // Read config in xml-format and update args([]) and files({})
  // Commandline arguments have precedence
  // Returns name of outfile (ie project_eng.xml)
  if(!filename || filename=="")
    return "";
  Stdio.File in=Stdio.FILE();
  if(!in->open(filename, "r"))
    return "";
  string line = in->gets();
  string indata = in->read();
  in->close();
  if(!indata)
    return "";

  // Check encoding
  if(!line)
    line = indata;
  sscanf(line, "%*sencoding=\"%s\"", string encoding);
  if(encoding && encoding!="") {
    function decode = get_decoder(encoding);
    if(decode && catch( indata = decode(indata) )) {
      werror("\n* Error: unable to decode from %O in %O\n",
	     encoding, filename);
      exit(1);
    }
  } 
  else if(line!=indata)
    indata = line+"\n"+indata;

  string xml_name="";
  Parser.HTML xml_parser = Parser.HTML();
  xml_parser->case_insensitive_tag(1);
  xml_parser->add_quote_tag("!--", lambda() {return 0;}, "--");
  xml_parser->
    add_container("project", 
		  // Only read config for the right project, or the
		  // first found if unspecified
		  lambda(object foo, mapping m, string c) {
		    if(!m->name || m->name=="") {
		      werror("\n* Projectname missing in %s!\n", filename);
		      exit(1);
		    }
		    if(args->project && args->project!="" &&
		       args->project!=m->name)
		      return "";  // Skip this project-tag
		    else
		      args->project = m->name;
		    write("Reading config for project %O in %s\n",
			  args->project, filename);
		    return c;
		  });
  xml_parser->
    add_container("out", 
		  // Set outname (default: project_eng.xml)
		  lambda(object foo, mapping m, string c) {
		    c = String.trim_whites(c);
		    if(c && c!="")
		      xml_name = c;
		    return 0;
		  });
  xml_parser->
    add_container("file", 
		  // Add a file to be parsed
		  lambda(object foo, mapping m, string c) {
		    c = String.trim_whites(c);
		    if(c && c!="")
		      files += ({ c });
		    return 0;
		  });
  xml_parser->
    add_container("encoding", 
		  // Set default encoding
		  lambda(object foo, mapping m, string c) {
		    if(args->encoding=="")
		      args->encoding = 0;
		    c = String.trim_whites(c);
		    if(c && c!="" && !args->encoding) {
		      args->encoding = c;
		      get_encoder( c );   // Check if known
		    }
		    return 0;
		  });
  xml_parser->
    add_container("xmlpath", 
		  // Project file path
		  lambda(object foo, mapping m, string c) {
		    if(!args->xmlpath) {
		      c = String.trim_whites(c);
		      args->xmlpath = c;
		    }
		    return 0;
		  });
  xml_parser->
    add_container("baselang", 
		  // Project file path
		  lambda(object foo, mapping m, string c) {
		    if(!args->baselang) {
		      c = String.trim_whites(c);
		      args->baselang = c;
		    }
		    return 0;
		  });
  xml_parser->
    add_container("add",
		  // Block to add to project-xml-files
		  lambda(object foo, mapping m, string c) {
		    if(!m->id || m->id=="") {
		      werror("\n* Missing id in <add> in %s!\n", filename);
		      exit(1);
		    }
		    add[m->id] = c;
		    return 0;
		  });
  xml_parser->add_tag("nocopy", 
		      // Update the infile instead of creating infile.new
		      lambda(object foo, mapping m) {
			args->nocopy = 1;
			return 0;
		      });
  xml_parser->add_tag("verbose", 
		      // More informative text in xml
		      lambda(object foo, mapping m) {
			args->verbose = 1;
			return 0;
		      });
  xml_parser->add_tag("wipe", 
		      // Remove all id:strings not used in xml anymore
		      lambda(object foo, mapping m) {
			args->wipe = 1;
			return 0;
		      });
  xml_parser->feed(indata)->finish();

  if(xml_name=="")
    // Try to crate name of outfile
    if(args->xmlpath && args->baselang)
      xml_name = replace(args->xmlpath, "%L", args->baselang);
    else if( args->project)
      xml_name = args->project+"_eng.xml";
  return xml_name;
}


// ------------------------ The main program --------------------------

int main(int argc, array(string) argv) {

  // Parse arguments
  argv=argv[1..sizeof(argv)-1];
  for(int i=0; i<sizeof(argv); i++) {
    if(argv[i][0]!='-') {
      files += ({argv[i]});
      continue;
    }
    string key, val = "";
    if(sscanf(argv[i], "--%s", key)) {
      sscanf(key, "%s=%s", key, val);
      args[key] = val;
      continue;
    }
    args[argv[i][1..]] = 1;
  }

  // Get name of outfile (something like project_eng.xml)
  string xml_name=args->out;

  // Read configfile
  string configname = args->config;
  if(!configname && args->project)
    configname = args->project+".xml";
  string filename = parse_config(configname);
  if(!xml_name || xml_name=="") 
    if(filename!="")
      xml_name = filename;
    else if(args->xmlpath && args->baselang)
      xml_name = replace(args->xmlpath, "%L", args->baselang);

  if( (!(xml_name && args->sync && args->xmlpath && args->baselang)) && 
      (!sizeof(files) || args->help) ) {
    sscanf("$Revision: 1.8 $", "$"+"Revision: %s $", string v);
    werror("\n  Locale Extractor Utility "+v+"\n\n");
    werror("  Syntax: extract.pike [arguments] infile(s)\n\n");
    werror("  Arguments: --project=name  default: first found in infile\n");
    werror("             --config=file   default: [project].xml\n");
    werror("             --out=file      default: [project]_eng.xml\n");
    werror("             --nocopy        update infile instead of infile.new\n");
    werror("             --notime        don't include dump time in xml files\n");
    werror("             --wipe          remove unused ids from xml\n");
    werror("             --sync          synchronize all locale projects\n");
    werror("             --encoding=enc  default: ISO-8859-1\n");
    werror("             --verbose       more informative text in xml\n");
    werror("\n");
    return 1;
  }

  // Try to read and parse xml-file
  mapping xml_data;
  xml_data = parse_xml_file(xml_name, args->baselang);
  write("\n");

  // Read, parse and (if necessary) update the sourcefiles
  object R = Regexp("(\.pike|\.pmod)$");
  foreach(files, string filename)
    if(R->match(filename))
      update_pike_sourcefiles( ({ filename }) );
    else
      update_xml_sourcefiles( ({ filename }) );

  // Save all strings to outfile xml
  if(!xml_name)
    if(args->project && args->project!="")
      xml_name = args->project+"_eng.xml";
    else {
      xml_name = files[0];
      sscanf(xml_name, "%s.pike", xml_name);
      xml_name += "_eng.xml";
    }
  write("\n");
  write_xml_file( xml_name, args->baselang, 
		  args->encoding || xml_data->encoding, xml_data->data);

  // Synchronize xmls in other languages
  if (args->sync) {
    write("\n");
    mapping base_ids = ids;
    array base_order = id_xml_order;
    foreach(languagefiles(args->xmlpath, args->baselang), mapping file) {
      ids = ([]);
      string enc = parse_xml_file(file->name, file->lang)->encoding;
      id_xml_order = base_order;
      mapping old_ids = ids;
      ids = base_ids;        
      write_xml_file(file->name, file->lang, 
		     args->encoding || enc, xml_data->data, old_ids);
    }
  }

  write("\n");
  return 0;
}