Skip to content
Snippets Groups Projects
Commit bd8e70b3 authored by Per Hedbor's avatar Per Hedbor
Browse files

Use the existing low-level tokenizer for Parser.C.split

This is significantly faster, and the code was already there anyway.
parent 36e0823e
No related branches found
No related tags found
No related merge requests found
...@@ -5,248 +5,70 @@ ...@@ -5,248 +5,70 @@
// #pike __REAL_VERSION__ // #pike __REAL_VERSION__
// //
//! Splits the @[data] string into an array of tokens. An additional protected constant splitter = Parser._parser._Pike.tokenize;
//! element with a newline will be added to the resulting array of
//! tokens. If the optional argument @[state] is provided the split
//! function is able to pause and resume splitting inside /**/ tokens.
//! The @[state] argument should be an initially empty mapping, in
//! which split will store its state between successive calls.
array(string) split(string data, void|mapping state)
{
int line=1;
array(string) ret=({});
int pos;
if(data=="") return ({"\n"});
data += "\n\0"; /* End sentinel. */
if(state && state->in_token) {
switch(state->remains[0..1]) {
case "/*":
if(sizeof(state->remains)>2 && state->remains[-1]=='*'
&& data[0]=='/') {
ret += ({ state->remains + "/" });
pos++;
m_delete(state, "remains");
break;
}
pos = search(data, "*/");
if(pos==-1) {
state->in_token = 1;
state->remains += data[..<1];
return ret;
}
ret += ({ state->remains + data[..pos+1] });
m_delete(state, "remains");
pos+=2;
break;
}
state->in_token = 0;
}
while(1) class UnterminatedStringError
//! Error thrown when an unterminated string token is encountered.
{ {
int start=pos; inherit Error.Generic;
constant error_type = "unterminated_string";
constant is_unterminated_string_error = 1;
switch(data[pos]) string err_str;
{ //! The string that failed to be tokenized
case '\0':
return ret;
case '#':
{
pos=search(data,"\n",pos);
if(pos==-1)
error("Failed to find end of preprocessor statement.\n");
while(data[pos-1]=='\\' || (data[pos-1]=='\r' && data[pos-2]=='\\'))
pos=search(data,"\n",pos+1);
break;
case 'a'..'z': protected void create(string pre, string post)
case 'A'..'Z':
case 128..65536: // Lets simplify things for now...
case '_':
while(1)
{ {
switch(data[pos]) int line = String.count(pre, "\n")+1;
{ err_str = pre+post;
case '$': // allowed in some C (notably digital) if( sizeof(post) > 100 )
case 'a'..'z': ::create(sprintf("Unterminated string %O[%d] at line %d\n",
case 'A'..'Z': post[..100], sizeof(post)-100, line));
case '0'..'9': else
case 128..65536: // Lets simplify things for now... ::create(sprintf("Unterminated string %O at line %d\n",
case '_': post, line));
pos++;
continue;
} }
break;
} }
break;
case '.': private array(string) low_split(string data, void|mapping(string:string) state)
if(data[start..start+2]=="...")
{ {
pos+=3; if(state && state->remains)
break; data = (string)m_delete(state, "remains") + data;
} // Cast to string above to work around old Pike 7.0 bug.
if(data[start..start+1]=="..")
{
pos+=3;
break;
}
case '0'..'9': array(string) ret;
if(data[pos]=='0' && (data[pos+1]=='x' || data[pos+1]=='X')) string rem;
{ [ret, rem] = splitter(data);
pos+=2; if(sizeof(rem)) {
while(1) if(rem[0]=='"')
{ throw(UnterminatedStringError(ret*"", rem));
switch(data[pos]) if(state) state->remains=rem;
{
case '0'..'9':
case 'a'..'f':
case 'A'..'F':
pos++;
continue;
}
break;
}
break;
}
while(data[pos]>='0' && data[pos]<='9') pos++;
if(data[pos]=='.')
{
pos++;
while(data[pos]>='0' && data[pos]<='9') pos++;
if(data[pos]=='e' || data[pos]=='E')
{
pos++;
if(data[pos]=='-') pos++;
while(data[pos]>='0' && data[pos]<='9') pos++;
}
break;
}
if(data[pos]=='e' || data[pos]=='E')
{
pos++;
while(data[pos]>='0' && data[pos]<='9') pos++;
} }
break;
default:
error("Unknown token %O\n",data[pos..pos+20]);
case '`':
while(data[pos]=='`') data[pos]++;
case '\\': pos++; continue; /* IGNORED */
case '/':
case '{': case '}':
case '[': case ']':
case '(': case ')':
case ';':
case ',':
case '*': case '%':
case '?': case ':':
case '&': case '|': case '^':
case '!': case '~':
case '=':
case '@':
case '+':
case '-':
case '<': case '>':
switch(data[pos..pos+1])
{
case "//":
pos=search(data,"\n",pos);
break;
case "/*":
pos=search(data,"*/",pos);
if(pos==-1) {
if(state) {
state->remains = data[start..<2];
state->in_token = 1;
return ret; return ret;
} }
error("Failed to find end of comment.\n");
}
pos+=2;
break;
case "<<": case ">>":
if(data[pos+2]=='=') pos++;
case "==": case "!=": case "<=": case ">=":
case "*=": case "/=": case "%=":
case "&=": case "|=": case "^=":
case "+=": case "-=":
case "++": case "--":
case "&&": case "||":
case "->":
pos++;
default:
pos++;
}
break;
case ' ':
case '\n':
case '\r':
case '\t':
case '\14':
while(1)
{
switch(data[pos])
{
case ' ':
case '\n':
case '\r':
case '\t':
case '\14':
pos++;
continue;
}
break;
}
break;
case '\'':
pos++;
if(data[pos]=='\\') pos+=2;
int end=search(data, "'", pos)+1;
if(!end)
throw( ({sprintf("Unknown token: %O\n",data[pos-1..pos+19]) }) );
pos=end;
break;
case '"':
{
int q,s;
while(1)
{
q=search(data,"\"",pos+1);
s=search(data,"\\",pos+1);
if(q==-1) q=sizeof(data)-1;
if(s==-1) s=sizeof(data)-1;
if(q<s)
{
pos=q+1;
break;
}else{
pos=s+1;
}
}
break;
}
}
}
ret+=({ data[start..pos-1] }); //! Splits the @[data] string into an array of tokens. An additional
} //! element with a newline will be added to the resulting array of
//! tokens. If the optional argument @[state] is provided the split
//! function is able to pause and resume splitting inside #"" and
//! /**/ tokens. The @[state] argument should be an initially empty
//! mapping, in which split will store its state between successive
//! calls.
array(string) split(string data, void|mapping(string:string) state) {
array r = low_split(data, state);
array new = ({});
for(int i; i<sizeof(r); i++)
if(r[i][..1]=="//" && r[i][-1]=='\n')
new += ({ r[i][..<1], "\n" });
else
new += ({ r[i] });
if(sizeof(new) && (< "\n", " " >)[new[-1]])
new[-1] += "\n";
else
new += ({ "\n" });
return new;
} }
//! Represents a C token, along with a selection of associated data and //! Represents a C token, along with a selection of associated data and
......
...@@ -7,68 +7,5 @@ ...@@ -7,68 +7,5 @@
//! This module parses and tokenizes Pike source code. //! This module parses and tokenizes Pike source code.
protected constant splitter = Parser._parser._Pike.tokenize;
inherit "C.pmod"; inherit "C.pmod";
array(string) low_split(string data, void|mapping(string:string) state)
{
if(state && state->remains)
data = (string)m_delete(state, "remains") + data;
// Cast to string above to work around old Pike 7.0 bug.
array ret;
string rem;
[ret, rem] = Parser._parser._Pike.tokenize(data);
if(sizeof(rem)) {
if(rem[0]=='"')
throw(UnterminatedStringError(ret*"", rem));
if(state) state->remains=rem;
}
return ret;
}
//! Splits the @[data] string into an array of tokens. An additional
//! element with a newline will be added to the resulting array of
//! tokens. If the optional argument @[state] is provided the split
//! function is able to pause and resume splitting inside #"" and
//! /**/ tokens. The @[state] argument should be an initially empty
//! mapping, in which split will store its state between successive
//! calls.
array(string) split(string data, void|mapping(string:string) state) {
array r = low_split(data, state);
array new = ({});
for(int i; i<sizeof(r); i++)
if(r[i][..1]=="//" && r[i][-1]=='\n')
new += ({ r[i][..<1], "\n" });
else
new += ({ r[i] });
if(sizeof(new) && (< "\n", " " >)[new[-1]])
new[-1] += "\n";
else
new += ({ "\n" });
return new;
}
class UnterminatedStringError
//! Error thrown when an unterminated string token is encountered.
{
inherit Error.Generic;
constant error_type = "unterminated_string";
constant is_unterminated_string_error = 1;
string err_str;
//! The string that failed to be tokenized
protected void create(string pre, string post)
{
int line = String.count(pre, "\n")+1;
err_str = pre+post;
if( sizeof(post) > 100 )
::create(sprintf("Unterminated string %O[%d] at line %d\n",
post[..100], sizeof(post)-100, line));
else
::create(sprintf("Unterminated string %O at line %d\n",
post, line));
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment