class Token: def __init__(self,pos=(0,0),type='symbol',val=None,items=None): self.pos,self.type,self.val,self.items=pos,type,val,items def u_error(ctx,s,i): y,x = i line = s.split('\n')[y-1] p = '' if y < 10: p += ' ' if y < 100: p += ' ' r = p + str(y) + ": " + line + "\n" r += " "+" "*x+"^" +'\n' raise 'error: '+ctx+'\n'+r ISYMBOLS = '`-=[];,./~!@$%^&*()+{}:<>?' SYMBOLS = [ 'def','class','yield','return','pass','and','or','not','in','import', 'is','while','break','for','continue','if','else','elif','try', 'except','raise','True','False','None','global','del','from', '-','+','*','**','/','%','<<','>>', '-=','+=','*=','/=','=','==','!=','<','>', '<=','>=','[',']','{','}','(',')','.',':',',',';','&','|','!', ] B_BEGIN,B_END = ['[','(','{'],[']',')','}'] class TData: def __init__(self): self.y,self.yi,self.nl = 1,0,True self.res,self.indent,self.braces = [],[0],0 def add(self,t,v): self.res.append(Token(self.f,t,v)) def clean(s): s = s.replace('\r\n','\n') s = s.replace('\r','\n') return s def tokenize(s): s = clean(s) try: return do_tokenize(s) except: u_error('tokenize',s,T.f) def do_tokenize(s): global T T,i,l = TData(),0,len(s) T.f = (T.y,i-T.yi+1) while i < l: c = s[i]; T.f = (T.y,i-T.yi+1) if T.nl: T.nl = False; i = do_indent(s,i,l) elif c == '\n': i = do_nl(s,i,l) elif c in ISYMBOLS: i = do_symbol(s,i,l) elif c >= '0' and c <= '9': i = do_number(s,i,l) elif (c >= 'a' and c <= 'z') or \ (c >= 'A' and c <= 'Z') or c == '_': i = do_name(s,i,l) elif c=='"' or c=="'": i = do_string(s,i,l) elif c=='#': i = do_comment(s,i,l) elif c == '\\' and s[i+1] == '\n': i += 2; T.y,T.yi = T.y+1,i elif c == ' ' or c == '\t': i += 1 else: u_error('tokenize',s,T.f) indent(0) r = T.res; T = None return r def do_nl(s,i,l): if not T.braces: T.add('nl',None) i,T.nl = i+1,True T.y,T.yi = T.y+1,i return i def do_indent(s,i,l): v = 0 while i T.indent[-1]: T.indent.append(v) T.add('indent',v) elif v < T.indent[-1]: n = T.indent.index(v) while len(T.indent) > n+1: v = T.indent.pop() T.add('dedent',v) def do_symbol(s,i,l): symbols = [] v,f,i = s[i],i,i+1 if v in SYMBOLS: symbols.append(v) while i '9') and (c < 'a' or c > 'f') and c != 'x': break v,i = v+c,i+1 if c == '.': v,i = v+c,i+1 while i '9': break v,i = v+c,i+1 T.add('number',v) return i def do_name(s,i,l): v,i =s[i],i+1 while i 'z') and (c < 'A' or c > 'Z') and (c < '0' or c > '9') and c != '_': break v,i = v+c,i+1 if v in SYMBOLS: T.add('symbol',v) else: T.add('name',v) return i def do_string(s,i,l): v,q,i = '',s[i],i+1 if (l-i) >= 5 and s[i] == q and s[i+1] == q: # """ i += 2 while i