店播爬取Python脚本

descriptor_database.py 6.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. # Protocol Buffers - Google's data interchange format
  2. # Copyright 2008 Google Inc. All rights reserved.
  3. # https://developers.google.com/protocol-buffers/
  4. #
  5. # Redistribution and use in source and binary forms, with or without
  6. # modification, are permitted provided that the following conditions are
  7. # met:
  8. #
  9. # * Redistributions of source code must retain the above copyright
  10. # notice, this list of conditions and the following disclaimer.
  11. # * Redistributions in binary form must reproduce the above
  12. # copyright notice, this list of conditions and the following disclaimer
  13. # in the documentation and/or other materials provided with the
  14. # distribution.
  15. # * Neither the name of Google Inc. nor the names of its
  16. # contributors may be used to endorse or promote products derived from
  17. # this software without specific prior written permission.
  18. #
  19. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. """Provides a container for DescriptorProtos."""
  31. __author__ = 'matthewtoia@google.com (Matt Toia)'
  32. import warnings
  33. class Error(Exception):
  34. pass
  35. class DescriptorDatabaseConflictingDefinitionError(Error):
  36. """Raised when a proto is added with the same name & different descriptor."""
  37. class DescriptorDatabase(object):
  38. """A container accepting FileDescriptorProtos and maps DescriptorProtos."""
  39. def __init__(self):
  40. self._file_desc_protos_by_file = {}
  41. self._file_desc_protos_by_symbol = {}
  42. def Add(self, file_desc_proto):
  43. """Adds the FileDescriptorProto and its types to this database.
  44. Args:
  45. file_desc_proto: The FileDescriptorProto to add.
  46. Raises:
  47. DescriptorDatabaseConflictingDefinitionError: if an attempt is made to
  48. add a proto with the same name but different definition than an
  49. existing proto in the database.
  50. """
  51. proto_name = file_desc_proto.name
  52. if proto_name not in self._file_desc_protos_by_file:
  53. self._file_desc_protos_by_file[proto_name] = file_desc_proto
  54. elif self._file_desc_protos_by_file[proto_name] != file_desc_proto:
  55. raise DescriptorDatabaseConflictingDefinitionError(
  56. '%s already added, but with different descriptor.' % proto_name)
  57. else:
  58. return
  59. # Add all the top-level descriptors to the index.
  60. package = file_desc_proto.package
  61. for message in file_desc_proto.message_type:
  62. for name in _ExtractSymbols(message, package):
  63. self._AddSymbol(name, file_desc_proto)
  64. for enum in file_desc_proto.enum_type:
  65. self._AddSymbol(('.'.join((package, enum.name))), file_desc_proto)
  66. for enum_value in enum.value:
  67. self._file_desc_protos_by_symbol[
  68. '.'.join((package, enum_value.name))] = file_desc_proto
  69. for extension in file_desc_proto.extension:
  70. self._AddSymbol(('.'.join((package, extension.name))), file_desc_proto)
  71. for service in file_desc_proto.service:
  72. self._AddSymbol(('.'.join((package, service.name))), file_desc_proto)
  73. def FindFileByName(self, name):
  74. """Finds the file descriptor proto by file name.
  75. Typically the file name is a relative path ending to a .proto file. The
  76. proto with the given name will have to have been added to this database
  77. using the Add method or else an error will be raised.
  78. Args:
  79. name: The file name to find.
  80. Returns:
  81. The file descriptor proto matching the name.
  82. Raises:
  83. KeyError if no file by the given name was added.
  84. """
  85. return self._file_desc_protos_by_file[name]
  86. def FindFileContainingSymbol(self, symbol):
  87. """Finds the file descriptor proto containing the specified symbol.
  88. The symbol should be a fully qualified name including the file descriptor's
  89. package and any containing messages. Some examples:
  90. 'some.package.name.Message'
  91. 'some.package.name.Message.NestedEnum'
  92. 'some.package.name.Message.some_field'
  93. The file descriptor proto containing the specified symbol must be added to
  94. this database using the Add method or else an error will be raised.
  95. Args:
  96. symbol: The fully qualified symbol name.
  97. Returns:
  98. The file descriptor proto containing the symbol.
  99. Raises:
  100. KeyError if no file contains the specified symbol.
  101. """
  102. try:
  103. return self._file_desc_protos_by_symbol[symbol]
  104. except KeyError:
  105. # Fields, enum values, and nested extensions are not in
  106. # _file_desc_protos_by_symbol. Try to find the top level
  107. # descriptor. Non-existent nested symbol under a valid top level
  108. # descriptor can also be found. The behavior is the same with
  109. # protobuf C++.
  110. top_level, _, _ = symbol.rpartition('.')
  111. try:
  112. return self._file_desc_protos_by_symbol[top_level]
  113. except KeyError:
  114. # Raise the original symbol as a KeyError for better diagnostics.
  115. raise KeyError(symbol)
  116. def FindFileContainingExtension(self, extendee_name, extension_number):
  117. # TODO(jieluo): implement this API.
  118. return None
  119. def FindAllExtensionNumbers(self, extendee_name):
  120. # TODO(jieluo): implement this API.
  121. return []
  122. def _AddSymbol(self, name, file_desc_proto):
  123. if name in self._file_desc_protos_by_symbol:
  124. warn_msg = ('Conflict register for file "' + file_desc_proto.name +
  125. '": ' + name +
  126. ' is already defined in file "' +
  127. self._file_desc_protos_by_symbol[name].name + '"')
  128. warnings.warn(warn_msg, RuntimeWarning)
  129. self._file_desc_protos_by_symbol[name] = file_desc_proto
  130. def _ExtractSymbols(desc_proto, package):
  131. """Pulls out all the symbols from a descriptor proto.
  132. Args:
  133. desc_proto: The proto to extract symbols from.
  134. package: The package containing the descriptor type.
  135. Yields:
  136. The fully qualified name found in the descriptor.
  137. """
  138. message_name = package + '.' + desc_proto.name if package else desc_proto.name
  139. yield message_name
  140. for nested_type in desc_proto.nested_type:
  141. for symbol in _ExtractSymbols(nested_type, message_name):
  142. yield symbol
  143. for enum_type in desc_proto.enum_type:
  144. yield '.'.join((message_name, enum_type.name))