Back to home page

sPhenix code displayed by LXR

 
 

    


File indexing completed on 2025-08-06 08:19:53

0001 #!/usr/bin/env python
0002 #
0003 # Copyright 2007 Neal Norwitz
0004 # Portions Copyright 2007 Google Inc.
0005 #
0006 # Licensed under the Apache License, Version 2.0 (the "License");
0007 # you may not use this file except in compliance with the License.
0008 # You may obtain a copy of the License at
0009 #
0010 #      http://www.apache.org/licenses/LICENSE-2.0
0011 #
0012 # Unless required by applicable law or agreed to in writing, software
0013 # distributed under the License is distributed on an "AS IS" BASIS,
0014 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0015 # See the License for the specific language governing permissions and
0016 # limitations under the License.
0017 
0018 """Generate an Abstract Syntax Tree (AST) for C++."""
0019 
0020 __author__ = 'nnorwitz@google.com (Neal Norwitz)'
0021 
0022 
0023 # TODO:
0024 #  * Tokens should never be exported, need to convert to Nodes
0025 #    (return types, parameters, etc.)
0026 #  * Handle static class data for templatized classes
0027 #  * Handle casts (both C++ and C-style)
0028 #  * Handle conditions and loops (if/else, switch, for, while/do)
0029 #
0030 # TODO much, much later:
0031 #  * Handle #define
0032 #  * exceptions
0033 
0034 
0035 try:
0036     # Python 3.x
0037     import builtins
0038 except ImportError:
0039     # Python 2.x
0040     import __builtin__ as builtins
0041 
0042 import sys
0043 import traceback
0044 
0045 from cpp import keywords
0046 from cpp import tokenize
0047 from cpp import utils
0048 
0049 
0050 if not hasattr(builtins, 'reversed'):
0051     # Support Python 2.3 and earlier.
0052     def reversed(seq):
0053         for i in range(len(seq)-1, -1, -1):
0054             yield seq[i]
0055 
0056 if not hasattr(builtins, 'next'):
0057     # Support Python 2.5 and earlier.
0058     def next(obj):
0059         return obj.next()
0060 
0061 
0062 VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
0063 
0064 FUNCTION_NONE = 0x00
0065 FUNCTION_CONST = 0x01
0066 FUNCTION_VIRTUAL = 0x02
0067 FUNCTION_PURE_VIRTUAL = 0x04
0068 FUNCTION_CTOR = 0x08
0069 FUNCTION_DTOR = 0x10
0070 FUNCTION_ATTRIBUTE = 0x20
0071 FUNCTION_UNKNOWN_ANNOTATION = 0x40
0072 FUNCTION_THROW = 0x80
0073 FUNCTION_OVERRIDE = 0x100
0074 
0075 """
0076 These are currently unused.  Should really handle these properly at some point.
0077 
0078 TYPE_MODIFIER_INLINE   = 0x010000
0079 TYPE_MODIFIER_EXTERN   = 0x020000
0080 TYPE_MODIFIER_STATIC   = 0x040000
0081 TYPE_MODIFIER_CONST    = 0x080000
0082 TYPE_MODIFIER_REGISTER = 0x100000
0083 TYPE_MODIFIER_VOLATILE = 0x200000
0084 TYPE_MODIFIER_MUTABLE  = 0x400000
0085 
0086 TYPE_MODIFIER_MAP = {
0087     'inline': TYPE_MODIFIER_INLINE,
0088     'extern': TYPE_MODIFIER_EXTERN,
0089     'static': TYPE_MODIFIER_STATIC,
0090     'const': TYPE_MODIFIER_CONST,
0091     'register': TYPE_MODIFIER_REGISTER,
0092     'volatile': TYPE_MODIFIER_VOLATILE,
0093     'mutable': TYPE_MODIFIER_MUTABLE,
0094     }
0095 """
0096 
0097 _INTERNAL_TOKEN = 'internal'
0098 _NAMESPACE_POP = 'ns-pop'
0099 
0100 
0101 # TODO(nnorwitz): use this as a singleton for templated_types, etc
0102 # where we don't want to create a new empty dict each time.  It is also const.
0103 class _NullDict(object):
0104     __contains__ = lambda self: False
0105     keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
0106 
0107 
0108 # TODO(nnorwitz): move AST nodes into a separate module.
0109 class Node(object):
0110     """Base AST node."""
0111 
0112     def __init__(self, start, end):
0113         self.start = start
0114         self.end = end
0115 
0116     def IsDeclaration(self):
0117         """Returns bool if this node is a declaration."""
0118         return False
0119 
0120     def IsDefinition(self):
0121         """Returns bool if this node is a definition."""
0122         return False
0123 
0124     def IsExportable(self):
0125         """Returns bool if this node exportable from a header file."""
0126         return False
0127 
0128     def Requires(self, node):
0129         """Does this AST node require the definition of the node passed in?"""
0130         return False
0131 
0132     def XXX__str__(self):
0133         return self._StringHelper(self.__class__.__name__, '')
0134 
0135     def _StringHelper(self, name, suffix):
0136         if not utils.DEBUG:
0137             return '%s(%s)' % (name, suffix)
0138         return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
0139 
0140     def __repr__(self):
0141         return str(self)
0142 
0143 
0144 class Define(Node):
0145     def __init__(self, start, end, name, definition):
0146         Node.__init__(self, start, end)
0147         self.name = name
0148         self.definition = definition
0149 
0150     def __str__(self):
0151         value = '%s %s' % (self.name, self.definition)
0152         return self._StringHelper(self.__class__.__name__, value)
0153 
0154 
0155 class Include(Node):
0156     def __init__(self, start, end, filename, system):
0157         Node.__init__(self, start, end)
0158         self.filename = filename
0159         self.system = system
0160 
0161     def __str__(self):
0162         fmt = '"%s"'
0163         if self.system:
0164             fmt = '<%s>'
0165         return self._StringHelper(self.__class__.__name__, fmt % self.filename)
0166 
0167 
0168 class Goto(Node):
0169     def __init__(self, start, end, label):
0170         Node.__init__(self, start, end)
0171         self.label = label
0172 
0173     def __str__(self):
0174         return self._StringHelper(self.__class__.__name__, str(self.label))
0175 
0176 
0177 class Expr(Node):
0178     def __init__(self, start, end, expr):
0179         Node.__init__(self, start, end)
0180         self.expr = expr
0181 
0182     def Requires(self, node):
0183         # TODO(nnorwitz): impl.
0184         return False
0185 
0186     def __str__(self):
0187         return self._StringHelper(self.__class__.__name__, str(self.expr))
0188 
0189 
0190 class Return(Expr):
0191     pass
0192 
0193 
0194 class Delete(Expr):
0195     pass
0196 
0197 
0198 class Friend(Expr):
0199     def __init__(self, start, end, expr, namespace):
0200         Expr.__init__(self, start, end, expr)
0201         self.namespace = namespace[:]
0202 
0203 
0204 class Using(Node):
0205     def __init__(self, start, end, names):
0206         Node.__init__(self, start, end)
0207         self.names = names
0208 
0209     def __str__(self):
0210         return self._StringHelper(self.__class__.__name__, str(self.names))
0211 
0212 
0213 class Parameter(Node):
0214     def __init__(self, start, end, name, parameter_type, default):
0215         Node.__init__(self, start, end)
0216         self.name = name
0217         self.type = parameter_type
0218         self.default = default
0219 
0220     def Requires(self, node):
0221         # TODO(nnorwitz): handle namespaces, etc.
0222         return self.type.name == node.name
0223 
0224     def __str__(self):
0225         name = str(self.type)
0226         suffix = '%s %s' % (name, self.name)
0227         if self.default:
0228             suffix += ' = ' + ''.join([d.name for d in self.default])
0229         return self._StringHelper(self.__class__.__name__, suffix)
0230 
0231 
0232 class _GenericDeclaration(Node):
0233     def __init__(self, start, end, name, namespace):
0234         Node.__init__(self, start, end)
0235         self.name = name
0236         self.namespace = namespace[:]
0237 
0238     def FullName(self):
0239         prefix = ''
0240         if self.namespace and self.namespace[-1]:
0241             prefix = '::'.join(self.namespace) + '::'
0242         return prefix + self.name
0243 
0244     def _TypeStringHelper(self, suffix):
0245         if self.namespace:
0246             names = [n or '<anonymous>' for n in self.namespace]
0247             suffix += ' in ' + '::'.join(names)
0248         return self._StringHelper(self.__class__.__name__, suffix)
0249 
0250 
0251 # TODO(nnorwitz): merge with Parameter in some way?
0252 class VariableDeclaration(_GenericDeclaration):
0253     def __init__(self, start, end, name, var_type, initial_value, namespace):
0254         _GenericDeclaration.__init__(self, start, end, name, namespace)
0255         self.type = var_type
0256         self.initial_value = initial_value
0257 
0258     def Requires(self, node):
0259         # TODO(nnorwitz): handle namespaces, etc.
0260         return self.type.name == node.name
0261 
0262     def ToString(self):
0263         """Return a string that tries to reconstitute the variable decl."""
0264         suffix = '%s %s' % (self.type, self.name)
0265         if self.initial_value:
0266             suffix += ' = ' + self.initial_value
0267         return suffix
0268 
0269     def __str__(self):
0270         return self._StringHelper(self.__class__.__name__, self.ToString())
0271 
0272 
0273 class Typedef(_GenericDeclaration):
0274     def __init__(self, start, end, name, alias, namespace):
0275         _GenericDeclaration.__init__(self, start, end, name, namespace)
0276         self.alias = alias
0277 
0278     def IsDefinition(self):
0279         return True
0280 
0281     def IsExportable(self):
0282         return True
0283 
0284     def Requires(self, node):
0285         # TODO(nnorwitz): handle namespaces, etc.
0286         name = node.name
0287         for token in self.alias:
0288             if token is not None and name == token.name:
0289                 return True
0290         return False
0291 
0292     def __str__(self):
0293         suffix = '%s, %s' % (self.name, self.alias)
0294         return self._TypeStringHelper(suffix)
0295 
0296 
0297 class _NestedType(_GenericDeclaration):
0298     def __init__(self, start, end, name, fields, namespace):
0299         _GenericDeclaration.__init__(self, start, end, name, namespace)
0300         self.fields = fields
0301 
0302     def IsDefinition(self):
0303         return True
0304 
0305     def IsExportable(self):
0306         return True
0307 
0308     def __str__(self):
0309         suffix = '%s, {%s}' % (self.name, self.fields)
0310         return self._TypeStringHelper(suffix)
0311 
0312 
0313 class Union(_NestedType):
0314     pass
0315 
0316 
0317 class Enum(_NestedType):
0318     pass
0319 
0320 
0321 class Class(_GenericDeclaration):
0322     def __init__(self, start, end, name, bases, templated_types, body, namespace):
0323         _GenericDeclaration.__init__(self, start, end, name, namespace)
0324         self.bases = bases
0325         self.body = body
0326         self.templated_types = templated_types
0327 
0328     def IsDeclaration(self):
0329         return self.bases is None and self.body is None
0330 
0331     def IsDefinition(self):
0332         return not self.IsDeclaration()
0333 
0334     def IsExportable(self):
0335         return not self.IsDeclaration()
0336 
0337     def Requires(self, node):
0338         # TODO(nnorwitz): handle namespaces, etc.
0339         if self.bases:
0340             for token_list in self.bases:
0341                 # TODO(nnorwitz): bases are tokens, do name comparision.
0342                 for token in token_list:
0343                     if token.name == node.name:
0344                         return True
0345         # TODO(nnorwitz): search in body too.
0346         return False
0347 
0348     def __str__(self):
0349         name = self.name
0350         if self.templated_types:
0351             name += '<%s>' % self.templated_types
0352         suffix = '%s, %s, %s' % (name, self.bases, self.body)
0353         return self._TypeStringHelper(suffix)
0354 
0355 
0356 class Struct(Class):
0357     pass
0358 
0359 
0360 class Function(_GenericDeclaration):
0361     def __init__(self, start, end, name, return_type, parameters,
0362                  modifiers, templated_types, body, namespace):
0363         _GenericDeclaration.__init__(self, start, end, name, namespace)
0364         converter = TypeConverter(namespace)
0365         self.return_type = converter.CreateReturnType(return_type)
0366         self.parameters = converter.ToParameters(parameters)
0367         self.modifiers = modifiers
0368         self.body = body
0369         self.templated_types = templated_types
0370 
0371     def IsDeclaration(self):
0372         return self.body is None
0373 
0374     def IsDefinition(self):
0375         return self.body is not None
0376 
0377     def IsExportable(self):
0378         if self.return_type and 'static' in self.return_type.modifiers:
0379             return False
0380         return None not in self.namespace
0381 
0382     def Requires(self, node):
0383         if self.parameters:
0384             # TODO(nnorwitz): parameters are tokens, do name comparision.
0385             for p in self.parameters:
0386                 if p.name == node.name:
0387                     return True
0388         # TODO(nnorwitz): search in body too.
0389         return False
0390 
0391     def __str__(self):
0392         # TODO(nnorwitz): add templated_types.
0393         suffix = ('%s %s(%s), 0x%02x, %s' %
0394                   (self.return_type, self.name, self.parameters,
0395                    self.modifiers, self.body))
0396         return self._TypeStringHelper(suffix)
0397 
0398 
0399 class Method(Function):
0400     def __init__(self, start, end, name, in_class, return_type, parameters,
0401                  modifiers, templated_types, body, namespace):
0402         Function.__init__(self, start, end, name, return_type, parameters,
0403                           modifiers, templated_types, body, namespace)
0404         # TODO(nnorwitz): in_class could also be a namespace which can
0405         # mess up finding functions properly.
0406         self.in_class = in_class
0407 
0408 
0409 class Type(_GenericDeclaration):
0410     """Type used for any variable (eg class, primitive, struct, etc)."""
0411 
0412     def __init__(self, start, end, name, templated_types, modifiers,
0413                  reference, pointer, array):
0414         """
0415         Args:
0416           name: str name of main type
0417           templated_types: [Class (Type?)] template type info between <>
0418           modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
0419           reference, pointer, array: bools
0420         """
0421         _GenericDeclaration.__init__(self, start, end, name, [])
0422         self.templated_types = templated_types
0423         if not name and modifiers:
0424             self.name = modifiers.pop()
0425         self.modifiers = modifiers
0426         self.reference = reference
0427         self.pointer = pointer
0428         self.array = array
0429 
0430     def __str__(self):
0431         prefix = ''
0432         if self.modifiers:
0433             prefix = ' '.join(self.modifiers) + ' '
0434         name = str(self.name)
0435         if self.templated_types:
0436             name += '<%s>' % self.templated_types
0437         suffix = prefix + name
0438         if self.reference:
0439             suffix += '&'
0440         if self.pointer:
0441             suffix += '*'
0442         if self.array:
0443             suffix += '[]'
0444         return self._TypeStringHelper(suffix)
0445 
0446     # By definition, Is* are always False.  A Type can only exist in
0447     # some sort of variable declaration, parameter, or return value.
0448     def IsDeclaration(self):
0449         return False
0450 
0451     def IsDefinition(self):
0452         return False
0453 
0454     def IsExportable(self):
0455         return False
0456 
0457 
0458 class TypeConverter(object):
0459 
0460     def __init__(self, namespace_stack):
0461         self.namespace_stack = namespace_stack
0462 
0463     def _GetTemplateEnd(self, tokens, start):
0464         count = 1
0465         end = start
0466         while 1:
0467             token = tokens[end]
0468             end += 1
0469             if token.name == '<':
0470                 count += 1
0471             elif token.name == '>':
0472                 count -= 1
0473                 if count == 0:
0474                     break
0475         return tokens[start:end-1], end
0476 
0477     def ToType(self, tokens):
0478         """Convert [Token,...] to [Class(...), ] useful for base classes.
0479         For example, code like class Foo : public Bar<x, y> { ... };
0480         the "Bar<x, y>" portion gets converted to an AST.
0481 
0482         Returns:
0483           [Class(...), ...]
0484         """
0485         result = []
0486         name_tokens = []
0487         reference = pointer = array = False
0488 
0489         def AddType(templated_types):
0490             # Partition tokens into name and modifier tokens.
0491             names = []
0492             modifiers = []
0493             for t in name_tokens:
0494                 if keywords.IsKeyword(t.name):
0495                     modifiers.append(t.name)
0496                 else:
0497                     names.append(t.name)
0498             name = ''.join(names)
0499             if name_tokens:
0500                 result.append(Type(name_tokens[0].start, name_tokens[-1].end,
0501                                    name, templated_types, modifiers,
0502                                    reference, pointer, array))
0503             del name_tokens[:]
0504 
0505         i = 0
0506         end = len(tokens)
0507         while i < end:
0508             token = tokens[i]
0509             if token.name == '<':
0510                 new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
0511                 AddType(self.ToType(new_tokens))
0512                 # If there is a comma after the template, we need to consume
0513                 # that here otherwise it becomes part of the name.
0514                 i = new_end
0515                 reference = pointer = array = False
0516             elif token.name == ',':
0517                 AddType([])
0518                 reference = pointer = array = False
0519             elif token.name == '*':
0520                 pointer = True
0521             elif token.name == '&':
0522                 reference = True
0523             elif token.name == '[':
0524                pointer = True
0525             elif token.name == ']':
0526                 pass
0527             else:
0528                 name_tokens.append(token)
0529             i += 1
0530 
0531         if name_tokens:
0532             # No '<' in the tokens, just a simple name and no template.
0533             AddType([])
0534         return result
0535 
0536     def DeclarationToParts(self, parts, needs_name_removed):
0537         name = None
0538         default = []
0539         if needs_name_removed:
0540             # Handle default (initial) values properly.
0541             for i, t in enumerate(parts):
0542                 if t.name == '=':
0543                     default = parts[i+1:]
0544                     name = parts[i-1].name
0545                     if name == ']' and parts[i-2].name == '[':
0546                         name = parts[i-3].name
0547                         i -= 1
0548                     parts = parts[:i-1]
0549                     break
0550             else:
0551                 if parts[-1].token_type == tokenize.NAME:
0552                     name = parts.pop().name
0553                 else:
0554                     # TODO(nnorwitz): this is a hack that happens for code like
0555                     # Register(Foo<T>); where it thinks this is a function call
0556                     # but it's actually a declaration.
0557                     name = '???'
0558         modifiers = []
0559         type_name = []
0560         other_tokens = []
0561         templated_types = []
0562         i = 0
0563         end = len(parts)
0564         while i < end:
0565             p = parts[i]
0566             if keywords.IsKeyword(p.name):
0567                 modifiers.append(p.name)
0568             elif p.name == '<':
0569                 templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
0570                 templated_types = self.ToType(templated_tokens)
0571                 i = new_end - 1
0572                 # Don't add a spurious :: to data members being initialized.
0573                 next_index = i + 1
0574                 if next_index < end and parts[next_index].name == '::':
0575                     i += 1
0576             elif p.name in ('[', ']', '='):
0577                 # These are handled elsewhere.
0578                 other_tokens.append(p)
0579             elif p.name not in ('*', '&', '>'):
0580                 # Ensure that names have a space between them.
0581                 if (type_name and type_name[-1].token_type == tokenize.NAME and
0582                     p.token_type == tokenize.NAME):
0583                     type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
0584                 type_name.append(p)
0585             else:
0586                 other_tokens.append(p)
0587             i += 1
0588         type_name = ''.join([t.name for t in type_name])
0589         return name, type_name, templated_types, modifiers, default, other_tokens
0590 
0591     def ToParameters(self, tokens):
0592         if not tokens:
0593             return []
0594 
0595         result = []
0596         name = type_name = ''
0597         type_modifiers = []
0598         pointer = reference = array = False
0599         first_token = None
0600         default = []
0601 
0602         def AddParameter(end):
0603             if default:
0604                 del default[0]  # Remove flag.
0605             parts = self.DeclarationToParts(type_modifiers, True)
0606             (name, type_name, templated_types, modifiers,
0607              unused_default, unused_other_tokens) = parts
0608             parameter_type = Type(first_token.start, first_token.end,
0609                                   type_name, templated_types, modifiers,
0610                                   reference, pointer, array)
0611             p = Parameter(first_token.start, end, name,
0612                           parameter_type, default)
0613             result.append(p)
0614 
0615         template_count = 0
0616         for s in tokens:
0617             if not first_token:
0618                 first_token = s
0619             if s.name == '<':
0620                 template_count += 1
0621             elif s.name == '>':
0622                 template_count -= 1
0623             if template_count > 0:
0624                 type_modifiers.append(s)
0625                 continue
0626 
0627             if s.name == ',':
0628                 AddParameter(s.start)
0629                 name = type_name = ''
0630                 type_modifiers = []
0631                 pointer = reference = array = False
0632                 first_token = None
0633                 default = []
0634             elif s.name == '*':
0635                 pointer = True
0636             elif s.name == '&':
0637                 reference = True
0638             elif s.name == '[':
0639                 array = True
0640             elif s.name == ']':
0641                 pass  # Just don't add to type_modifiers.
0642             elif s.name == '=':
0643                 # Got a default value.  Add any value (None) as a flag.
0644                 default.append(None)
0645             elif default:
0646                 default.append(s)
0647             else:
0648                 type_modifiers.append(s)
0649         AddParameter(tokens[-1].end)
0650         return result
0651 
0652     def CreateReturnType(self, return_type_seq):
0653         if not return_type_seq:
0654             return None
0655         start = return_type_seq[0].start
0656         end = return_type_seq[-1].end
0657         _, name, templated_types, modifiers, default, other_tokens = \
0658            self.DeclarationToParts(return_type_seq, False)
0659         names = [n.name for n in other_tokens]
0660         reference = '&' in names
0661         pointer = '*' in names
0662         array = '[' in names
0663         return Type(start, end, name, templated_types, modifiers,
0664                     reference, pointer, array)
0665 
0666     def GetTemplateIndices(self, names):
0667         # names is a list of strings.
0668         start = names.index('<')
0669         end = len(names) - 1
0670         while end > 0:
0671             if names[end] == '>':
0672                 break
0673             end -= 1
0674         return start, end+1
0675 
0676 class AstBuilder(object):
0677     def __init__(self, token_stream, filename, in_class='', visibility=None,
0678                  namespace_stack=[]):
0679         self.tokens = token_stream
0680         self.filename = filename
0681         # TODO(nnorwitz): use a better data structure (deque) for the queue.
0682         # Switching directions of the "queue" improved perf by about 25%.
0683         # Using a deque should be even better since we access from both sides.
0684         self.token_queue = []
0685         self.namespace_stack = namespace_stack[:]
0686         self.in_class = in_class
0687         if in_class is None:
0688             self.in_class_name_only = None
0689         else:
0690             self.in_class_name_only = in_class.split('::')[-1]
0691         self.visibility = visibility
0692         self.in_function = False
0693         self.current_token = None
0694         # Keep the state whether we are currently handling a typedef or not.
0695         self._handling_typedef = False
0696 
0697         self.converter = TypeConverter(self.namespace_stack)
0698 
0699     def HandleError(self, msg, token):
0700         printable_queue = list(reversed(self.token_queue[-20:]))
0701         sys.stderr.write('Got %s in %s @ %s %s\n' %
0702                          (msg, self.filename, token, printable_queue))
0703 
0704     def Generate(self):
0705         while 1:
0706             token = self._GetNextToken()
0707             if not token:
0708                 break
0709 
0710             # Get the next token.
0711             self.current_token = token
0712 
0713             # Dispatch on the next token type.
0714             if token.token_type == _INTERNAL_TOKEN:
0715                 if token.name == _NAMESPACE_POP:
0716                     self.namespace_stack.pop()
0717                 continue
0718 
0719             try:
0720                 result = self._GenerateOne(token)
0721                 if result is not None:
0722                     yield result
0723             except:
0724                 self.HandleError('exception', token)
0725                 raise
0726 
0727     def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
0728                         ref_pointer_name_seq, templated_types, value=None):
0729         reference = '&' in ref_pointer_name_seq
0730         pointer = '*' in ref_pointer_name_seq
0731         array = '[' in ref_pointer_name_seq
0732         var_type = Type(pos_token.start, pos_token.end, type_name,
0733                         templated_types, type_modifiers,
0734                         reference, pointer, array)
0735         return VariableDeclaration(pos_token.start, pos_token.end,
0736                                    name, var_type, value, self.namespace_stack)
0737 
0738     def _GenerateOne(self, token):
0739         if token.token_type == tokenize.NAME:
0740             if (keywords.IsKeyword(token.name) and
0741                 not keywords.IsBuiltinType(token.name)):
0742                 method = getattr(self, 'handle_' + token.name)
0743                 return method()
0744             elif token.name == self.in_class_name_only:
0745                 # The token name is the same as the class, must be a ctor if
0746                 # there is a paren.  Otherwise, it's the return type.
0747                 # Peek ahead to get the next token to figure out which.
0748                 next = self._GetNextToken()
0749                 self._AddBackToken(next)
0750                 if next.token_type == tokenize.SYNTAX and next.name == '(':
0751                     return self._GetMethod([token], FUNCTION_CTOR, None, True)
0752                 # Fall through--handle like any other method.
0753 
0754             # Handle data or function declaration/definition.
0755             syntax = tokenize.SYNTAX
0756             temp_tokens, last_token = \
0757                 self._GetVarTokensUpTo(syntax, '(', ';', '{', '[')
0758             temp_tokens.insert(0, token)
0759             if last_token.name == '(':
0760                 # If there is an assignment before the paren,
0761                 # this is an expression, not a method.
0762                 expr = bool([e for e in temp_tokens if e.name == '='])
0763                 if expr:
0764                     new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
0765                     temp_tokens.append(last_token)
0766                     temp_tokens.extend(new_temp)
0767                     last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
0768 
0769             if last_token.name == '[':
0770                 # Handle array, this isn't a method, unless it's an operator.
0771                 # TODO(nnorwitz): keep the size somewhere.
0772                 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
0773                 temp_tokens.append(last_token)
0774                 if temp_tokens[-2].name == 'operator':
0775                     temp_tokens.append(self._GetNextToken())
0776                 else:
0777                     temp_tokens2, last_token = \
0778                         self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
0779                     temp_tokens.extend(temp_tokens2)
0780 
0781             if last_token.name == ';':
0782                 # Handle data, this isn't a method.
0783                 parts = self.converter.DeclarationToParts(temp_tokens, True)
0784                 (name, type_name, templated_types, modifiers, default,
0785                  unused_other_tokens) = parts
0786 
0787                 t0 = temp_tokens[0]
0788                 names = [t.name for t in temp_tokens]
0789                 if templated_types:
0790                     start, end = self.converter.GetTemplateIndices(names)
0791                     names = names[:start] + names[end:]
0792                 default = ''.join([t.name for t in default])
0793                 return self._CreateVariable(t0, name, type_name, modifiers,
0794                                             names, templated_types, default)
0795             if last_token.name == '{':
0796                 self._AddBackTokens(temp_tokens[1:])
0797                 self._AddBackToken(last_token)
0798                 method_name = temp_tokens[0].name
0799                 method = getattr(self, 'handle_' + method_name, None)
0800                 if not method:
0801                     # Must be declaring a variable.
0802                     # TODO(nnorwitz): handle the declaration.
0803                     return None
0804                 return method()
0805             return self._GetMethod(temp_tokens, 0, None, False)
0806         elif token.token_type == tokenize.SYNTAX:
0807             if token.name == '~' and self.in_class:
0808                 # Must be a dtor (probably not in method body).
0809                 token = self._GetNextToken()
0810                 # self.in_class can contain A::Name, but the dtor will only
0811                 # be Name.  Make sure to compare against the right value.
0812                 if (token.token_type == tokenize.NAME and
0813                     token.name == self.in_class_name_only):
0814                     return self._GetMethod([token], FUNCTION_DTOR, None, True)
0815             # TODO(nnorwitz): handle a lot more syntax.
0816         elif token.token_type == tokenize.PREPROCESSOR:
0817             # TODO(nnorwitz): handle more preprocessor directives.
0818             # token starts with a #, so remove it and strip whitespace.
0819             name = token.name[1:].lstrip()
0820             if name.startswith('include'):
0821                 # Remove "include".
0822                 name = name[7:].strip()
0823                 assert name
0824                 # Handle #include \<newline> "header-on-second-line.h".
0825                 if name.startswith('\\'):
0826                     name = name[1:].strip()
0827                 assert name[0] in '<"', token
0828                 assert name[-1] in '>"', token
0829                 system = name[0] == '<'
0830                 filename = name[1:-1]
0831                 return Include(token.start, token.end, filename, system)
0832             if name.startswith('define'):
0833                 # Remove "define".
0834                 name = name[6:].strip()
0835                 assert name
0836                 value = ''
0837                 for i, c in enumerate(name):
0838                     if c.isspace():
0839                         value = name[i:].lstrip()
0840                         name = name[:i]
0841                         break
0842                 return Define(token.start, token.end, name, value)
0843             if name.startswith('if') and name[2:3].isspace():
0844                 condition = name[3:].strip()
0845                 if condition.startswith('0') or condition.startswith('(0)'):
0846                     self._SkipIf0Blocks()
0847         return None
0848 
0849     def _GetTokensUpTo(self, expected_token_type, expected_token):
0850         return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
0851 
0852     def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
0853         last_token = self._GetNextToken()
0854         tokens = []
0855         while (last_token.token_type != expected_token_type or
0856                last_token.name not in expected_tokens):
0857             tokens.append(last_token)
0858             last_token = self._GetNextToken()
0859         return tokens, last_token
0860 
0861     # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary.
0862     def _IgnoreUpTo(self, token_type, token):
0863         unused_tokens = self._GetTokensUpTo(token_type, token)
0864 
0865     def _SkipIf0Blocks(self):
0866         count = 1
0867         while 1:
0868             token = self._GetNextToken()
0869             if token.token_type != tokenize.PREPROCESSOR:
0870                 continue
0871 
0872             name = token.name[1:].lstrip()
0873             if name.startswith('endif'):
0874                 count -= 1
0875                 if count == 0:
0876                     break
0877             elif name.startswith('if'):
0878                 count += 1
0879 
0880     def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
0881         if GetNextToken is None:
0882             GetNextToken = self._GetNextToken
0883         # Assumes the current token is open_paren and we will consume
0884         # and return up to the close_paren.
0885         count = 1
0886         token = GetNextToken()
0887         while 1:
0888             if token.token_type == tokenize.SYNTAX:
0889                 if token.name == open_paren:
0890                     count += 1
0891                 elif token.name == close_paren:
0892                     count -= 1
0893                     if count == 0:
0894                         break
0895             yield token
0896             token = GetNextToken()
0897         yield token
0898 
0899     def _GetParameters(self):
0900         return self._GetMatchingChar('(', ')')
0901 
0902     def GetScope(self):
0903         return self._GetMatchingChar('{', '}')
0904 
0905     def _GetNextToken(self):
0906         if self.token_queue:
0907             return self.token_queue.pop()
0908         return next(self.tokens)
0909 
0910     def _AddBackToken(self, token):
0911         if token.whence == tokenize.WHENCE_STREAM:
0912             token.whence = tokenize.WHENCE_QUEUE
0913             self.token_queue.insert(0, token)
0914         else:
0915             assert token.whence == tokenize.WHENCE_QUEUE, token
0916             self.token_queue.append(token)
0917 
0918     def _AddBackTokens(self, tokens):
0919         if tokens:
0920             if tokens[-1].whence == tokenize.WHENCE_STREAM:
0921                 for token in tokens:
0922                     token.whence = tokenize.WHENCE_QUEUE
0923                 self.token_queue[:0] = reversed(tokens)
0924             else:
0925                 assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
0926                 self.token_queue.extend(reversed(tokens))
0927 
0928     def GetName(self, seq=None):
0929         """Returns ([tokens], next_token_info)."""
0930         GetNextToken = self._GetNextToken
0931         if seq is not None:
0932             it = iter(seq)
0933             GetNextToken = lambda: next(it)
0934         next_token = GetNextToken()
0935         tokens = []
0936         last_token_was_name = False
0937         while (next_token.token_type == tokenize.NAME or
0938                (next_token.token_type == tokenize.SYNTAX and
0939                 next_token.name in ('::', '<'))):
0940             # Two NAMEs in a row means the identifier should terminate.
0941             # It's probably some sort of variable declaration.
0942             if last_token_was_name and next_token.token_type == tokenize.NAME:
0943                 break
0944             last_token_was_name = next_token.token_type == tokenize.NAME
0945             tokens.append(next_token)
0946             # Handle templated names.
0947             if next_token.name == '<':
0948                 tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
0949                 last_token_was_name = True
0950             next_token = GetNextToken()
0951         return tokens, next_token
0952 
0953     def GetMethod(self, modifiers, templated_types):
0954         return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
0955         assert len(return_type_and_name) >= 1
0956         return self._GetMethod(return_type_and_name, modifiers, templated_types,
0957                                False)
0958 
0959     def _GetMethod(self, return_type_and_name, modifiers, templated_types,
0960                    get_paren):
0961         template_portion = None
0962         if get_paren:
0963             token = self._GetNextToken()
0964             assert token.token_type == tokenize.SYNTAX, token
0965             if token.name == '<':
0966                 # Handle templatized dtors.
0967                 template_portion = [token]
0968                 template_portion.extend(self._GetMatchingChar('<', '>'))
0969                 token = self._GetNextToken()
0970             assert token.token_type == tokenize.SYNTAX, token
0971             assert token.name == '(', token
0972 
0973         name = return_type_and_name.pop()
0974         # Handle templatized ctors.
0975         if name.name == '>':
0976             index = 1
0977             while return_type_and_name[index].name != '<':
0978                 index += 1
0979             template_portion = return_type_and_name[index:] + [name]
0980             del return_type_and_name[index:]
0981             name = return_type_and_name.pop()
0982         elif name.name == ']':
0983             rt = return_type_and_name
0984             assert rt[-1].name == '[', return_type_and_name
0985             assert rt[-2].name == 'operator', return_type_and_name
0986             name_seq = return_type_and_name[-2:]
0987             del return_type_and_name[-2:]
0988             name = tokenize.Token(tokenize.NAME, 'operator[]',
0989                                   name_seq[0].start, name.end)
0990             # Get the open paren so _GetParameters() below works.
0991             unused_open_paren = self._GetNextToken()
0992 
0993         # TODO(nnorwitz): store template_portion.
0994         return_type = return_type_and_name
0995         indices = name
0996         if return_type:
0997             indices = return_type[0]
0998 
0999         # Force ctor for templatized ctors.
1000         if name.name == self.in_class and not modifiers:
1001             modifiers |= FUNCTION_CTOR
1002         parameters = list(self._GetParameters())
1003         del parameters[-1]              # Remove trailing ')'.
1004 
1005         # Handling operator() is especially weird.
1006         if name.name == 'operator' and not parameters:
1007             token = self._GetNextToken()
1008             assert token.name == '(', token
1009             parameters = list(self._GetParameters())
1010             del parameters[-1]          # Remove trailing ')'.
1011 
1012         token = self._GetNextToken()
1013         while token.token_type == tokenize.NAME:
1014             modifier_token = token
1015             token = self._GetNextToken()
1016             if modifier_token.name == 'const':
1017                 modifiers |= FUNCTION_CONST
1018             elif modifier_token.name == '__attribute__':
1019                 # TODO(nnorwitz): handle more __attribute__ details.
1020                 modifiers |= FUNCTION_ATTRIBUTE
1021                 assert token.name == '(', token
1022                 # Consume everything between the (parens).
1023                 unused_tokens = list(self._GetMatchingChar('(', ')'))
1024                 token = self._GetNextToken()
1025             elif modifier_token.name == 'throw':
1026                 modifiers |= FUNCTION_THROW
1027                 assert token.name == '(', token
1028                 # Consume everything between the (parens).
1029                 unused_tokens = list(self._GetMatchingChar('(', ')'))
1030                 token = self._GetNextToken()
1031             elif modifier_token.name == 'override':
1032                 modifiers |= FUNCTION_OVERRIDE
1033             elif modifier_token.name == modifier_token.name.upper():
1034                 # HACK(nnorwitz):  assume that all upper-case names
1035                 # are some macro we aren't expanding.
1036                 modifiers |= FUNCTION_UNKNOWN_ANNOTATION
1037             else:
1038                 self.HandleError('unexpected token', modifier_token)
1039 
1040         assert token.token_type == tokenize.SYNTAX, token
1041         # Handle ctor initializers.
1042         if token.name == ':':
1043             # TODO(nnorwitz): anything else to handle for initializer list?
1044             while token.name != ';' and token.name != '{':
1045                 token = self._GetNextToken()
1046 
1047         # Handle pointer to functions that are really data but look
1048         # like method declarations.
1049         if token.name == '(':
1050             if parameters[0].name == '*':
1051                 # name contains the return type.
1052                 name = parameters.pop()
1053                 # parameters contains the name of the data.
1054                 modifiers = [p.name for p in parameters]
1055                 # Already at the ( to open the parameter list.
1056                 function_parameters = list(self._GetMatchingChar('(', ')'))
1057                 del function_parameters[-1]  # Remove trailing ')'.
1058                 # TODO(nnorwitz): store the function_parameters.
1059                 token = self._GetNextToken()
1060                 assert token.token_type == tokenize.SYNTAX, token
1061                 assert token.name == ';', token
1062                 return self._CreateVariable(indices, name.name, indices.name,
1063                                             modifiers, '', None)
1064             # At this point, we got something like:
1065             #  return_type (type::*name_)(params);
1066             # This is a data member called name_ that is a function pointer.
1067             # With this code: void (sq_type::*field_)(string&);
1068             # We get: name=void return_type=[] parameters=sq_type ... field_
1069             # TODO(nnorwitz): is return_type always empty?
1070             # TODO(nnorwitz): this isn't even close to being correct.
1071             # Just put in something so we don't crash and can move on.
1072             real_name = parameters[-1]
1073             modifiers = [p.name for p in self._GetParameters()]
1074             del modifiers[-1]           # Remove trailing ')'.
1075             return self._CreateVariable(indices, real_name.name, indices.name,
1076                                         modifiers, '', None)
1077 
1078         if token.name == '{':
1079             body = list(self.GetScope())
1080             del body[-1]                # Remove trailing '}'.
1081         else:
1082             body = None
1083             if token.name == '=':
1084                 token = self._GetNextToken()
1085 
1086                 if token.name == 'default' or token.name == 'delete':
1087                     # Ignore explicitly defaulted and deleted special members
1088                     # in C++11.
1089                     token = self._GetNextToken()
1090                 else:
1091                     # Handle pure-virtual declarations.
1092                     assert token.token_type == tokenize.CONSTANT, token
1093                     assert token.name == '0', token
1094                     modifiers |= FUNCTION_PURE_VIRTUAL
1095                     token = self._GetNextToken()
1096 
1097             if token.name == '[':
1098                 # TODO(nnorwitz): store tokens and improve parsing.
1099                 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
1100                 tokens = list(self._GetMatchingChar('[', ']'))
1101                 token = self._GetNextToken()
1102 
1103             assert token.name == ';', (token, return_type_and_name, parameters)
1104 
1105         # Looks like we got a method, not a function.
1106         if len(return_type) > 2 and return_type[-1].name == '::':
1107             return_type, in_class = \
1108                          self._GetReturnTypeAndClassName(return_type)
1109             return Method(indices.start, indices.end, name.name, in_class,
1110                           return_type, parameters, modifiers, templated_types,
1111                           body, self.namespace_stack)
1112         return Function(indices.start, indices.end, name.name, return_type,
1113                         parameters, modifiers, templated_types, body,
1114                         self.namespace_stack)
1115 
1116     def _GetReturnTypeAndClassName(self, token_seq):
1117         # Splitting the return type from the class name in a method
1118         # can be tricky.  For example, Return::Type::Is::Hard::To::Find().
1119         # Where is the return type and where is the class name?
1120         # The heuristic used is to pull the last name as the class name.
1121         # This includes all the templated type info.
1122         # TODO(nnorwitz): if there is only One name like in the
1123         # example above, punt and assume the last bit is the class name.
1124 
1125         # Ignore a :: prefix, if exists so we can find the first real name.
1126         i = 0
1127         if token_seq[0].name == '::':
1128             i = 1
1129         # Ignore a :: suffix, if exists.
1130         end = len(token_seq) - 1
1131         if token_seq[end-1].name == '::':
1132             end -= 1
1133 
1134         # Make a copy of the sequence so we can append a sentinel
1135         # value. This is required for GetName will has to have some
1136         # terminating condition beyond the last name.
1137         seq_copy = token_seq[i:end]
1138         seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
1139         names = []
1140         while i < end:
1141             # Iterate through the sequence parsing out each name.
1142             new_name, next = self.GetName(seq_copy[i:])
1143             assert new_name, 'Got empty new_name, next=%s' % next
1144             # We got a pointer or ref.  Add it to the name.
1145             if next and next.token_type == tokenize.SYNTAX:
1146                 new_name.append(next)
1147             names.append(new_name)
1148             i += len(new_name)
1149 
1150         # Now that we have the names, it's time to undo what we did.
1151 
1152         # Remove the sentinel value.
1153         names[-1].pop()
1154         # Flatten the token sequence for the return type.
1155         return_type = [e for seq in names[:-1] for e in seq]
1156         # The class name is the last name.
1157         class_name = names[-1]
1158         return return_type, class_name
1159 
1160     def handle_bool(self):
1161         pass
1162 
1163     def handle_char(self):
1164         pass
1165 
1166     def handle_int(self):
1167         pass
1168 
1169     def handle_long(self):
1170         pass
1171 
1172     def handle_short(self):
1173         pass
1174 
1175     def handle_double(self):
1176         pass
1177 
1178     def handle_float(self):
1179         pass
1180 
1181     def handle_void(self):
1182         pass
1183 
1184     def handle_wchar_t(self):
1185         pass
1186 
1187     def handle_unsigned(self):
1188         pass
1189 
1190     def handle_signed(self):
1191         pass
1192 
1193     def _GetNestedType(self, ctor):
1194         name = None
1195         name_tokens, token = self.GetName()
1196         if name_tokens:
1197             name = ''.join([t.name for t in name_tokens])
1198 
1199         # Handle forward declarations.
1200         if token.token_type == tokenize.SYNTAX and token.name == ';':
1201             return ctor(token.start, token.end, name, None,
1202                         self.namespace_stack)
1203 
1204         if token.token_type == tokenize.NAME and self._handling_typedef:
1205             self._AddBackToken(token)
1206             return ctor(token.start, token.end, name, None,
1207                         self.namespace_stack)
1208 
1209         # Must be the type declaration.
1210         fields = list(self._GetMatchingChar('{', '}'))
1211         del fields[-1]                  # Remove trailing '}'.
1212         if token.token_type == tokenize.SYNTAX and token.name == '{':
1213             next = self._GetNextToken()
1214             new_type = ctor(token.start, token.end, name, fields,
1215                             self.namespace_stack)
1216             # A name means this is an anonymous type and the name
1217             # is the variable declaration.
1218             if next.token_type != tokenize.NAME:
1219                 return new_type
1220             name = new_type
1221             token = next
1222 
1223         # Must be variable declaration using the type prefixed with keyword.
1224         assert token.token_type == tokenize.NAME, token
1225         return self._CreateVariable(token, token.name, name, [], '', None)
1226 
1227     def handle_struct(self):
1228         # Special case the handling typedef/aliasing of structs here.
1229         # It would be a pain to handle in the class code.
1230         name_tokens, var_token = self.GetName()
1231         if name_tokens:
1232             next_token = self._GetNextToken()
1233             is_syntax = (var_token.token_type == tokenize.SYNTAX and
1234                          var_token.name[0] in '*&')
1235             is_variable = (var_token.token_type == tokenize.NAME and
1236                            next_token.name == ';')
1237             variable = var_token
1238             if is_syntax and not is_variable:
1239                 variable = next_token
1240                 temp = self._GetNextToken()
1241                 if temp.token_type == tokenize.SYNTAX and temp.name == '(':
1242                     # Handle methods declared to return a struct.
1243                     t0 = name_tokens[0]
1244                     struct = tokenize.Token(tokenize.NAME, 'struct',
1245                                             t0.start-7, t0.start-2)
1246                     type_and_name = [struct]
1247                     type_and_name.extend(name_tokens)
1248                     type_and_name.extend((var_token, next_token))
1249                     return self._GetMethod(type_and_name, 0, None, False)
1250                 assert temp.name == ';', (temp, name_tokens, var_token)
1251             if is_syntax or (is_variable and not self._handling_typedef):
1252                 modifiers = ['struct']
1253                 type_name = ''.join([t.name for t in name_tokens])
1254                 position = name_tokens[0]
1255                 return self._CreateVariable(position, variable.name, type_name,
1256                                             modifiers, var_token.name, None)
1257             name_tokens.extend((var_token, next_token))
1258             self._AddBackTokens(name_tokens)
1259         else:
1260             self._AddBackToken(var_token)
1261         return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
1262 
1263     def handle_union(self):
1264         return self._GetNestedType(Union)
1265 
1266     def handle_enum(self):
1267         return self._GetNestedType(Enum)
1268 
1269     def handle_auto(self):
1270         # TODO(nnorwitz): warn about using auto?  Probably not since it
1271         # will be reclaimed and useful for C++0x.
1272         pass
1273 
1274     def handle_register(self):
1275         pass
1276 
1277     def handle_const(self):
1278         pass
1279 
1280     def handle_inline(self):
1281         pass
1282 
1283     def handle_extern(self):
1284         pass
1285 
1286     def handle_static(self):
1287         pass
1288 
1289     def handle_virtual(self):
1290         # What follows must be a method.
1291         token = token2 = self._GetNextToken()
1292         if token.name == 'inline':
1293             # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
1294             token2 = self._GetNextToken()
1295         if token2.token_type == tokenize.SYNTAX and token2.name == '~':
1296             return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
1297         assert token.token_type == tokenize.NAME or token.name == '::', token
1298         return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')  # )
1299         return_type_and_name.insert(0, token)
1300         if token2 is not token:
1301             return_type_and_name.insert(1, token2)
1302         return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
1303                                None, False)
1304 
1305     def handle_volatile(self):
1306         pass
1307 
1308     def handle_mutable(self):
1309         pass
1310 
1311     def handle_public(self):
1312         assert self.in_class
1313         self.visibility = VISIBILITY_PUBLIC
1314 
1315     def handle_protected(self):
1316         assert self.in_class
1317         self.visibility = VISIBILITY_PROTECTED
1318 
1319     def handle_private(self):
1320         assert self.in_class
1321         self.visibility = VISIBILITY_PRIVATE
1322 
1323     def handle_friend(self):
1324         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1325         assert tokens
1326         t0 = tokens[0]
1327         return Friend(t0.start, t0.end, tokens, self.namespace_stack)
1328 
1329     def handle_static_cast(self):
1330         pass
1331 
1332     def handle_const_cast(self):
1333         pass
1334 
1335     def handle_dynamic_cast(self):
1336         pass
1337 
1338     def handle_reinterpret_cast(self):
1339         pass
1340 
1341     def handle_new(self):
1342         pass
1343 
1344     def handle_delete(self):
1345         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1346         assert tokens
1347         return Delete(tokens[0].start, tokens[0].end, tokens)
1348 
1349     def handle_typedef(self):
1350         token = self._GetNextToken()
1351         if (token.token_type == tokenize.NAME and
1352             keywords.IsKeyword(token.name)):
1353             # Token must be struct/enum/union/class.
1354             method = getattr(self, 'handle_' + token.name)
1355             self._handling_typedef = True
1356             tokens = [method()]
1357             self._handling_typedef = False
1358         else:
1359             tokens = [token]
1360 
1361         # Get the remainder of the typedef up to the semi-colon.
1362         tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
1363 
1364         # TODO(nnorwitz): clean all this up.
1365         assert tokens
1366         name = tokens.pop()
1367         indices = name
1368         if tokens:
1369             indices = tokens[0]
1370         if not indices:
1371             indices = token
1372         if name.name == ')':
1373             # HACK(nnorwitz): Handle pointers to functions "properly".
1374             if (len(tokens) >= 4 and
1375                 tokens[1].name == '(' and tokens[2].name == '*'):
1376                 tokens.append(name)
1377                 name = tokens[3]
1378         elif name.name == ']':
1379             # HACK(nnorwitz): Handle arrays properly.
1380             if len(tokens) >= 2:
1381                 tokens.append(name)
1382                 name = tokens[1]
1383         new_type = tokens
1384         if tokens and isinstance(tokens[0], tokenize.Token):
1385             new_type = self.converter.ToType(tokens)[0]
1386         return Typedef(indices.start, indices.end, name.name,
1387                        new_type, self.namespace_stack)
1388 
1389     def handle_typeid(self):
1390         pass  # Not needed yet.
1391 
1392     def handle_typename(self):
1393         pass  # Not needed yet.
1394 
1395     def _GetTemplatedTypes(self):
1396         result = {}
1397         tokens = list(self._GetMatchingChar('<', '>'))
1398         len_tokens = len(tokens) - 1    # Ignore trailing '>'.
1399         i = 0
1400         while i < len_tokens:
1401             key = tokens[i].name
1402             i += 1
1403             if keywords.IsKeyword(key) or key == ',':
1404                 continue
1405             type_name = default = None
1406             if i < len_tokens:
1407                 i += 1
1408                 if tokens[i-1].name == '=':
1409                     assert i < len_tokens, '%s %s' % (i, tokens)
1410                     default, unused_next_token = self.GetName(tokens[i:])
1411                     i += len(default)
1412                 else:
1413                     if tokens[i-1].name != ',':
1414                         # We got something like: Type variable.
1415                         # Re-adjust the key (variable) and type_name (Type).
1416                         key = tokens[i-1].name
1417                         type_name = tokens[i-2]
1418 
1419             result[key] = (type_name, default)
1420         return result
1421 
1422     def handle_template(self):
1423         token = self._GetNextToken()
1424         assert token.token_type == tokenize.SYNTAX, token
1425         assert token.name == '<', token
1426         templated_types = self._GetTemplatedTypes()
1427         # TODO(nnorwitz): for now, just ignore the template params.
1428         token = self._GetNextToken()
1429         if token.token_type == tokenize.NAME:
1430             if token.name == 'class':
1431                 return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
1432             elif token.name == 'struct':
1433                 return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
1434             elif token.name == 'friend':
1435                 return self.handle_friend()
1436         self._AddBackToken(token)
1437         tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
1438         tokens.append(last)
1439         self._AddBackTokens(tokens)
1440         if last.name == '(':
1441             return self.GetMethod(FUNCTION_NONE, templated_types)
1442         # Must be a variable definition.
1443         return None
1444 
1445     def handle_true(self):
1446         pass  # Nothing to do.
1447 
1448     def handle_false(self):
1449         pass  # Nothing to do.
1450 
1451     def handle_asm(self):
1452         pass  # Not needed yet.
1453 
1454     def handle_class(self):
1455         return self._GetClass(Class, VISIBILITY_PRIVATE, None)
1456 
1457     def _GetBases(self):
1458         # Get base classes.
1459         bases = []
1460         while 1:
1461             token = self._GetNextToken()
1462             assert token.token_type == tokenize.NAME, token
1463             # TODO(nnorwitz): store kind of inheritance...maybe.
1464             if token.name not in ('public', 'protected', 'private'):
1465                 # If inheritance type is not specified, it is private.
1466                 # Just put the token back so we can form a name.
1467                 # TODO(nnorwitz): it would be good to warn about this.
1468                 self._AddBackToken(token)
1469             else:
1470                 # Check for virtual inheritance.
1471                 token = self._GetNextToken()
1472                 if token.name != 'virtual':
1473                     self._AddBackToken(token)
1474                 else:
1475                     # TODO(nnorwitz): store that we got virtual for this base.
1476                     pass
1477             base, next_token = self.GetName()
1478             bases_ast = self.converter.ToType(base)
1479             assert len(bases_ast) == 1, bases_ast
1480             bases.append(bases_ast[0])
1481             assert next_token.token_type == tokenize.SYNTAX, next_token
1482             if next_token.name == '{':
1483                 token = next_token
1484                 break
1485             # Support multiple inheritance.
1486             assert next_token.name == ',', next_token
1487         return bases, token
1488 
1489     def _GetClass(self, class_type, visibility, templated_types):
1490         class_name = None
1491         class_token = self._GetNextToken()
1492         if class_token.token_type != tokenize.NAME:
1493             assert class_token.token_type == tokenize.SYNTAX, class_token
1494             token = class_token
1495         else:
1496             # Skip any macro (e.g. storage class specifiers) after the
1497             # 'class' keyword.
1498             next_token = self._GetNextToken()
1499             if next_token.token_type == tokenize.NAME:
1500                 self._AddBackToken(next_token)
1501             else:
1502                 self._AddBackTokens([class_token, next_token])
1503             name_tokens, token = self.GetName()
1504             class_name = ''.join([t.name for t in name_tokens])
1505         bases = None
1506         if token.token_type == tokenize.SYNTAX:
1507             if token.name == ';':
1508                 # Forward declaration.
1509                 return class_type(class_token.start, class_token.end,
1510                                   class_name, None, templated_types, None,
1511                                   self.namespace_stack)
1512             if token.name in '*&':
1513                 # Inline forward declaration.  Could be method or data.
1514                 name_token = self._GetNextToken()
1515                 next_token = self._GetNextToken()
1516                 if next_token.name == ';':
1517                     # Handle data
1518                     modifiers = ['class']
1519                     return self._CreateVariable(class_token, name_token.name,
1520                                                 class_name,
1521                                                 modifiers, token.name, None)
1522                 else:
1523                     # Assume this is a method.
1524                     tokens = (class_token, token, name_token, next_token)
1525                     self._AddBackTokens(tokens)
1526                     return self.GetMethod(FUNCTION_NONE, None)
1527             if token.name == ':':
1528                 bases, token = self._GetBases()
1529 
1530         body = None
1531         if token.token_type == tokenize.SYNTAX and token.name == '{':
1532             assert token.token_type == tokenize.SYNTAX, token
1533             assert token.name == '{', token
1534 
1535             ast = AstBuilder(self.GetScope(), self.filename, class_name,
1536                              visibility, self.namespace_stack)
1537             body = list(ast.Generate())
1538 
1539             if not self._handling_typedef:
1540                 token = self._GetNextToken()
1541                 if token.token_type != tokenize.NAME:
1542                     assert token.token_type == tokenize.SYNTAX, token
1543                     assert token.name == ';', token
1544                 else:
1545                     new_class = class_type(class_token.start, class_token.end,
1546                                            class_name, bases, None,
1547                                            body, self.namespace_stack)
1548 
1549                     modifiers = []
1550                     return self._CreateVariable(class_token,
1551                                                 token.name, new_class,
1552                                                 modifiers, token.name, None)
1553         else:
1554             if not self._handling_typedef:
1555                 self.HandleError('non-typedef token', token)
1556             self._AddBackToken(token)
1557 
1558         return class_type(class_token.start, class_token.end, class_name,
1559                           bases, templated_types, body, self.namespace_stack)
1560 
1561     def handle_namespace(self):
1562         token = self._GetNextToken()
1563         # Support anonymous namespaces.
1564         name = None
1565         if token.token_type == tokenize.NAME:
1566             name = token.name
1567             token = self._GetNextToken()
1568         self.namespace_stack.append(name)
1569         assert token.token_type == tokenize.SYNTAX, token
1570         # Create an internal token that denotes when the namespace is complete.
1571         internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
1572                                         None, None)
1573         internal_token.whence = token.whence
1574         if token.name == '=':
1575             # TODO(nnorwitz): handle aliasing namespaces.
1576             name, next_token = self.GetName()
1577             assert next_token.name == ';', next_token
1578             self._AddBackToken(internal_token)
1579         else:
1580             assert token.name == '{', token
1581             tokens = list(self.GetScope())
1582             # Replace the trailing } with the internal namespace pop token.
1583             tokens[-1] = internal_token
1584             # Handle namespace with nothing in it.
1585             self._AddBackTokens(tokens)
1586         return None
1587 
1588     def handle_using(self):
1589         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1590         assert tokens
1591         return Using(tokens[0].start, tokens[0].end, tokens)
1592 
1593     def handle_explicit(self):
1594         assert self.in_class
1595         # Nothing much to do.
1596         # TODO(nnorwitz): maybe verify the method name == class name.
1597         # This must be a ctor.
1598         return self.GetMethod(FUNCTION_CTOR, None)
1599 
1600     def handle_this(self):
1601         pass  # Nothing to do.
1602 
1603     def handle_operator(self):
1604         # Pull off the next token(s?) and make that part of the method name.
1605         pass
1606 
1607     def handle_sizeof(self):
1608         pass
1609 
1610     def handle_case(self):
1611         pass
1612 
1613     def handle_switch(self):
1614         pass
1615 
1616     def handle_default(self):
1617         token = self._GetNextToken()
1618         assert token.token_type == tokenize.SYNTAX
1619         assert token.name == ':'
1620 
1621     def handle_if(self):
1622         pass
1623 
1624     def handle_else(self):
1625         pass
1626 
1627     def handle_return(self):
1628         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1629         if not tokens:
1630             return Return(self.current_token.start, self.current_token.end, None)
1631         return Return(tokens[0].start, tokens[0].end, tokens)
1632 
1633     def handle_goto(self):
1634         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1635         assert len(tokens) == 1, str(tokens)
1636         return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
1637 
1638     def handle_try(self):
1639         pass  # Not needed yet.
1640 
1641     def handle_catch(self):
1642         pass  # Not needed yet.
1643 
1644     def handle_throw(self):
1645         pass  # Not needed yet.
1646 
1647     def handle_while(self):
1648         pass
1649 
1650     def handle_do(self):
1651         pass
1652 
1653     def handle_for(self):
1654         pass
1655 
1656     def handle_break(self):
1657         self._IgnoreUpTo(tokenize.SYNTAX, ';')
1658 
1659     def handle_continue(self):
1660         self._IgnoreUpTo(tokenize.SYNTAX, ';')
1661 
1662 
1663 def BuilderFromSource(source, filename):
1664     """Utility method that returns an AstBuilder from source code.
1665 
1666     Args:
1667       source: 'C++ source code'
1668       filename: 'file1'
1669 
1670     Returns:
1671       AstBuilder
1672     """
1673     return AstBuilder(tokenize.GetTokens(source), filename)
1674 
1675 
1676 def PrintIndentifiers(filename, should_print):
1677     """Prints all identifiers for a C++ source file.
1678 
1679     Args:
1680       filename: 'file1'
1681       should_print: predicate with signature: bool Function(token)
1682     """
1683     source = utils.ReadFile(filename, False)
1684     if source is None:
1685         sys.stderr.write('Unable to find: %s\n' % filename)
1686         return
1687 
1688     #print('Processing %s' % actual_filename)
1689     builder = BuilderFromSource(source, filename)
1690     try:
1691         for node in builder.Generate():
1692             if should_print(node):
1693                 print(node.name)
1694     except KeyboardInterrupt:
1695         return
1696     except:
1697         pass
1698 
1699 
1700 def PrintAllIndentifiers(filenames, should_print):
1701     """Prints all identifiers for each C++ source file in filenames.
1702 
1703     Args:
1704       filenames: ['file1', 'file2', ...]
1705       should_print: predicate with signature: bool Function(token)
1706     """
1707     for path in filenames:
1708         PrintIndentifiers(path, should_print)
1709 
1710 
1711 def main(argv):
1712     for filename in argv[1:]:
1713         source = utils.ReadFile(filename)
1714         if source is None:
1715             continue
1716 
1717         print('Processing %s' % filename)
1718         builder = BuilderFromSource(source, filename)
1719         try:
1720             entire_ast = filter(None, builder.Generate())
1721         except KeyboardInterrupt:
1722             return
1723         except:
1724             # Already printed a warning, print the traceback and continue.
1725             traceback.print_exc()
1726         else:
1727             if utils.DEBUG:
1728                 for ast in entire_ast:
1729                     print(ast)
1730 
1731 
1732 if __name__ == '__main__':
1733     main(sys.argv)