店播爬取Python脚本

json_format.py 32KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866
  1. # Protocol Buffers - Google's data interchange format
  2. # Copyright 2008 Google Inc. All rights reserved.
  3. # https://developers.google.com/protocol-buffers/
  4. #
  5. # Redistribution and use in source and binary forms, with or without
  6. # modification, are permitted provided that the following conditions are
  7. # met:
  8. #
  9. # * Redistributions of source code must retain the above copyright
  10. # notice, this list of conditions and the following disclaimer.
  11. # * Redistributions in binary form must reproduce the above
  12. # copyright notice, this list of conditions and the following disclaimer
  13. # in the documentation and/or other materials provided with the
  14. # distribution.
  15. # * Neither the name of Google Inc. nor the names of its
  16. # contributors may be used to endorse or promote products derived from
  17. # this software without specific prior written permission.
  18. #
  19. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. """Contains routines for printing protocol messages in JSON format.
  31. Simple usage example:
  32. # Create a proto object and serialize it to a json format string.
  33. message = my_proto_pb2.MyMessage(foo='bar')
  34. json_string = json_format.MessageToJson(message)
  35. # Parse a json format string to proto object.
  36. message = json_format.Parse(json_string, my_proto_pb2.MyMessage())
  37. """
  38. __author__ = 'jieluo@google.com (Jie Luo)'
  39. # pylint: disable=g-statement-before-imports,g-import-not-at-top
  40. try:
  41. from collections import OrderedDict
  42. except ImportError:
  43. from ordereddict import OrderedDict # PY26
  44. # pylint: enable=g-statement-before-imports,g-import-not-at-top
  45. import base64
  46. import json
  47. import math
  48. from operator import methodcaller
  49. import re
  50. import sys
  51. import six
  52. from google.protobuf.internal import type_checkers
  53. from google.protobuf import descriptor
  54. from google.protobuf import symbol_database
  55. _TIMESTAMPFOMAT = '%Y-%m-%dT%H:%M:%S'
  56. _INT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_INT32,
  57. descriptor.FieldDescriptor.CPPTYPE_UINT32,
  58. descriptor.FieldDescriptor.CPPTYPE_INT64,
  59. descriptor.FieldDescriptor.CPPTYPE_UINT64])
  60. _INT64_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_INT64,
  61. descriptor.FieldDescriptor.CPPTYPE_UINT64])
  62. _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
  63. descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
  64. _INFINITY = 'Infinity'
  65. _NEG_INFINITY = '-Infinity'
  66. _NAN = 'NaN'
  67. _UNPAIRED_SURROGATE_PATTERN = re.compile(six.u(
  68. r'[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]'
  69. ))
  70. _VALID_EXTENSION_NAME = re.compile(r'\[[a-zA-Z0-9\._]*\]$')
  71. class Error(Exception):
  72. """Top-level module error for json_format."""
  73. class SerializeToJsonError(Error):
  74. """Thrown if serialization to JSON fails."""
  75. class ParseError(Error):
  76. """Thrown in case of parsing error."""
  77. def MessageToJson(
  78. message,
  79. including_default_value_fields=False,
  80. preserving_proto_field_name=False,
  81. indent=2,
  82. sort_keys=False,
  83. use_integers_for_enums=False,
  84. descriptor_pool=None,
  85. float_precision=None):
  86. """Converts protobuf message to JSON format.
  87. Args:
  88. message: The protocol buffers message instance to serialize.
  89. including_default_value_fields: If True, singular primitive fields,
  90. repeated fields, and map fields will always be serialized. If
  91. False, only serialize non-empty fields. Singular message fields
  92. and oneof fields are not affected by this option.
  93. preserving_proto_field_name: If True, use the original proto field
  94. names as defined in the .proto file. If False, convert the field
  95. names to lowerCamelCase.
  96. indent: The JSON object will be pretty-printed with this indent level.
  97. An indent level of 0 or negative will only insert newlines.
  98. sort_keys: If True, then the output will be sorted by field names.
  99. use_integers_for_enums: If true, print integers instead of enum names.
  100. descriptor_pool: A Descriptor Pool for resolving types. If None use the
  101. default.
  102. float_precision: If set, use this to specify float field valid digits.
  103. Returns:
  104. A string containing the JSON formatted protocol buffer message.
  105. """
  106. printer = _Printer(
  107. including_default_value_fields,
  108. preserving_proto_field_name,
  109. use_integers_for_enums,
  110. descriptor_pool,
  111. float_precision=float_precision)
  112. return printer.ToJsonString(message, indent, sort_keys)
  113. def MessageToDict(
  114. message,
  115. including_default_value_fields=False,
  116. preserving_proto_field_name=False,
  117. use_integers_for_enums=False,
  118. descriptor_pool=None,
  119. float_precision=None):
  120. """Converts protobuf message to a dictionary.
  121. When the dictionary is encoded to JSON, it conforms to proto3 JSON spec.
  122. Args:
  123. message: The protocol buffers message instance to serialize.
  124. including_default_value_fields: If True, singular primitive fields,
  125. repeated fields, and map fields will always be serialized. If
  126. False, only serialize non-empty fields. Singular message fields
  127. and oneof fields are not affected by this option.
  128. preserving_proto_field_name: If True, use the original proto field
  129. names as defined in the .proto file. If False, convert the field
  130. names to lowerCamelCase.
  131. use_integers_for_enums: If true, print integers instead of enum names.
  132. descriptor_pool: A Descriptor Pool for resolving types. If None use the
  133. default.
  134. float_precision: If set, use this to specify float field valid digits.
  135. Returns:
  136. A dict representation of the protocol buffer message.
  137. """
  138. printer = _Printer(
  139. including_default_value_fields,
  140. preserving_proto_field_name,
  141. use_integers_for_enums,
  142. descriptor_pool,
  143. float_precision=float_precision)
  144. # pylint: disable=protected-access
  145. return printer._MessageToJsonObject(message)
  146. def _IsMapEntry(field):
  147. return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
  148. field.message_type.has_options and
  149. field.message_type.GetOptions().map_entry)
  150. class _Printer(object):
  151. """JSON format printer for protocol message."""
  152. def __init__(
  153. self,
  154. including_default_value_fields=False,
  155. preserving_proto_field_name=False,
  156. use_integers_for_enums=False,
  157. descriptor_pool=None,
  158. float_precision=None):
  159. self.including_default_value_fields = including_default_value_fields
  160. self.preserving_proto_field_name = preserving_proto_field_name
  161. self.use_integers_for_enums = use_integers_for_enums
  162. self.descriptor_pool = descriptor_pool
  163. if float_precision:
  164. self.float_format = '.{}g'.format(float_precision)
  165. else:
  166. self.float_format = None
  167. def ToJsonString(self, message, indent, sort_keys):
  168. js = self._MessageToJsonObject(message)
  169. return json.dumps(js, indent=indent, sort_keys=sort_keys)
  170. def _MessageToJsonObject(self, message):
  171. """Converts message to an object according to Proto3 JSON Specification."""
  172. message_descriptor = message.DESCRIPTOR
  173. full_name = message_descriptor.full_name
  174. if _IsWrapperMessage(message_descriptor):
  175. return self._WrapperMessageToJsonObject(message)
  176. if full_name in _WKTJSONMETHODS:
  177. return methodcaller(_WKTJSONMETHODS[full_name][0], message)(self)
  178. js = {}
  179. return self._RegularMessageToJsonObject(message, js)
  180. def _RegularMessageToJsonObject(self, message, js):
  181. """Converts normal message according to Proto3 JSON Specification."""
  182. fields = message.ListFields()
  183. try:
  184. for field, value in fields:
  185. if self.preserving_proto_field_name:
  186. name = field.name
  187. else:
  188. name = field.json_name
  189. if _IsMapEntry(field):
  190. # Convert a map field.
  191. v_field = field.message_type.fields_by_name['value']
  192. js_map = {}
  193. for key in value:
  194. if isinstance(key, bool):
  195. if key:
  196. recorded_key = 'true'
  197. else:
  198. recorded_key = 'false'
  199. else:
  200. recorded_key = key
  201. js_map[recorded_key] = self._FieldToJsonObject(
  202. v_field, value[key])
  203. js[name] = js_map
  204. elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
  205. # Convert a repeated field.
  206. js[name] = [self._FieldToJsonObject(field, k)
  207. for k in value]
  208. elif field.is_extension:
  209. name = '[%s]' % field.full_name
  210. js[name] = self._FieldToJsonObject(field, value)
  211. else:
  212. js[name] = self._FieldToJsonObject(field, value)
  213. # Serialize default value if including_default_value_fields is True.
  214. if self.including_default_value_fields:
  215. message_descriptor = message.DESCRIPTOR
  216. for field in message_descriptor.fields:
  217. # Singular message fields and oneof fields will not be affected.
  218. if ((field.label != descriptor.FieldDescriptor.LABEL_REPEATED and
  219. field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE) or
  220. field.containing_oneof):
  221. continue
  222. if self.preserving_proto_field_name:
  223. name = field.name
  224. else:
  225. name = field.json_name
  226. if name in js:
  227. # Skip the field which has been serialized already.
  228. continue
  229. if _IsMapEntry(field):
  230. js[name] = {}
  231. elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
  232. js[name] = []
  233. else:
  234. js[name] = self._FieldToJsonObject(field, field.default_value)
  235. except ValueError as e:
  236. raise SerializeToJsonError(
  237. 'Failed to serialize {0} field: {1}.'.format(field.name, e))
  238. return js
  239. def _FieldToJsonObject(self, field, value):
  240. """Converts field value according to Proto3 JSON Specification."""
  241. if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
  242. return self._MessageToJsonObject(value)
  243. elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
  244. if self.use_integers_for_enums:
  245. return value
  246. if field.enum_type.full_name == 'google.protobuf.NullValue':
  247. return None
  248. enum_value = field.enum_type.values_by_number.get(value, None)
  249. if enum_value is not None:
  250. return enum_value.name
  251. else:
  252. if field.file.syntax == 'proto3':
  253. return value
  254. raise SerializeToJsonError('Enum field contains an integer value '
  255. 'which can not mapped to an enum value.')
  256. elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
  257. if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
  258. # Use base64 Data encoding for bytes
  259. return base64.b64encode(value).decode('utf-8')
  260. else:
  261. return value
  262. elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
  263. return bool(value)
  264. elif field.cpp_type in _INT64_TYPES:
  265. return str(value)
  266. elif field.cpp_type in _FLOAT_TYPES:
  267. if math.isinf(value):
  268. if value < 0.0:
  269. return _NEG_INFINITY
  270. else:
  271. return _INFINITY
  272. if math.isnan(value):
  273. return _NAN
  274. if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT:
  275. if self.float_format:
  276. return float(format(value, self.float_format))
  277. else:
  278. return type_checkers.ToShortestFloat(value)
  279. return value
  280. def _AnyMessageToJsonObject(self, message):
  281. """Converts Any message according to Proto3 JSON Specification."""
  282. if not message.ListFields():
  283. return {}
  284. # Must print @type first, use OrderedDict instead of {}
  285. js = OrderedDict()
  286. type_url = message.type_url
  287. js['@type'] = type_url
  288. sub_message = _CreateMessageFromTypeUrl(type_url, self.descriptor_pool)
  289. sub_message.ParseFromString(message.value)
  290. message_descriptor = sub_message.DESCRIPTOR
  291. full_name = message_descriptor.full_name
  292. if _IsWrapperMessage(message_descriptor):
  293. js['value'] = self._WrapperMessageToJsonObject(sub_message)
  294. return js
  295. if full_name in _WKTJSONMETHODS:
  296. js['value'] = methodcaller(_WKTJSONMETHODS[full_name][0],
  297. sub_message)(self)
  298. return js
  299. return self._RegularMessageToJsonObject(sub_message, js)
  300. def _GenericMessageToJsonObject(self, message):
  301. """Converts message according to Proto3 JSON Specification."""
  302. # Duration, Timestamp and FieldMask have ToJsonString method to do the
  303. # convert. Users can also call the method directly.
  304. return message.ToJsonString()
  305. def _ValueMessageToJsonObject(self, message):
  306. """Converts Value message according to Proto3 JSON Specification."""
  307. which = message.WhichOneof('kind')
  308. # If the Value message is not set treat as null_value when serialize
  309. # to JSON. The parse back result will be different from original message.
  310. if which is None or which == 'null_value':
  311. return None
  312. if which == 'list_value':
  313. return self._ListValueMessageToJsonObject(message.list_value)
  314. if which == 'struct_value':
  315. value = message.struct_value
  316. else:
  317. value = getattr(message, which)
  318. oneof_descriptor = message.DESCRIPTOR.fields_by_name[which]
  319. return self._FieldToJsonObject(oneof_descriptor, value)
  320. def _ListValueMessageToJsonObject(self, message):
  321. """Converts ListValue message according to Proto3 JSON Specification."""
  322. return [self._ValueMessageToJsonObject(value)
  323. for value in message.values]
  324. def _StructMessageToJsonObject(self, message):
  325. """Converts Struct message according to Proto3 JSON Specification."""
  326. fields = message.fields
  327. ret = {}
  328. for key in fields:
  329. ret[key] = self._ValueMessageToJsonObject(fields[key])
  330. return ret
  331. def _WrapperMessageToJsonObject(self, message):
  332. return self._FieldToJsonObject(
  333. message.DESCRIPTOR.fields_by_name['value'], message.value)
  334. def _IsWrapperMessage(message_descriptor):
  335. return message_descriptor.file.name == 'google/protobuf/wrappers.proto'
  336. def _DuplicateChecker(js):
  337. result = {}
  338. for name, value in js:
  339. if name in result:
  340. raise ParseError('Failed to load JSON: duplicate key {0}.'.format(name))
  341. result[name] = value
  342. return result
  343. def _CreateMessageFromTypeUrl(type_url, descriptor_pool):
  344. """Creates a message from a type URL."""
  345. db = symbol_database.Default()
  346. pool = db.pool if descriptor_pool is None else descriptor_pool
  347. type_name = type_url.split('/')[-1]
  348. try:
  349. message_descriptor = pool.FindMessageTypeByName(type_name)
  350. except KeyError:
  351. raise TypeError(
  352. 'Can not find message descriptor by type_url: {0}.'.format(type_url))
  353. message_class = db.GetPrototype(message_descriptor)
  354. return message_class()
  355. def Parse(text, message, ignore_unknown_fields=False, descriptor_pool=None):
  356. """Parses a JSON representation of a protocol message into a message.
  357. Args:
  358. text: Message JSON representation.
  359. message: A protocol buffer message to merge into.
  360. ignore_unknown_fields: If True, do not raise errors for unknown fields.
  361. descriptor_pool: A Descriptor Pool for resolving types. If None use the
  362. default.
  363. Returns:
  364. The same message passed as argument.
  365. Raises::
  366. ParseError: On JSON parsing problems.
  367. """
  368. if not isinstance(text, six.text_type): text = text.decode('utf-8')
  369. try:
  370. js = json.loads(text, object_pairs_hook=_DuplicateChecker)
  371. except ValueError as e:
  372. raise ParseError('Failed to load JSON: {0}.'.format(str(e)))
  373. return ParseDict(js, message, ignore_unknown_fields, descriptor_pool)
  374. def ParseDict(js_dict,
  375. message,
  376. ignore_unknown_fields=False,
  377. descriptor_pool=None):
  378. """Parses a JSON dictionary representation into a message.
  379. Args:
  380. js_dict: Dict representation of a JSON message.
  381. message: A protocol buffer message to merge into.
  382. ignore_unknown_fields: If True, do not raise errors for unknown fields.
  383. descriptor_pool: A Descriptor Pool for resolving types. If None use the
  384. default.
  385. Returns:
  386. The same message passed as argument.
  387. """
  388. parser = _Parser(ignore_unknown_fields, descriptor_pool)
  389. parser.ConvertMessage(js_dict, message)
  390. return message
  391. _INT_OR_FLOAT = six.integer_types + (float,)
  392. class _Parser(object):
  393. """JSON format parser for protocol message."""
  394. def __init__(self, ignore_unknown_fields, descriptor_pool):
  395. self.ignore_unknown_fields = ignore_unknown_fields
  396. self.descriptor_pool = descriptor_pool
  397. def ConvertMessage(self, value, message):
  398. """Convert a JSON object into a message.
  399. Args:
  400. value: A JSON object.
  401. message: A WKT or regular protocol message to record the data.
  402. Raises:
  403. ParseError: In case of convert problems.
  404. """
  405. message_descriptor = message.DESCRIPTOR
  406. full_name = message_descriptor.full_name
  407. if _IsWrapperMessage(message_descriptor):
  408. self._ConvertWrapperMessage(value, message)
  409. elif full_name in _WKTJSONMETHODS:
  410. methodcaller(_WKTJSONMETHODS[full_name][1], value, message)(self)
  411. else:
  412. self._ConvertFieldValuePair(value, message)
  413. def _ConvertFieldValuePair(self, js, message):
  414. """Convert field value pairs into regular message.
  415. Args:
  416. js: A JSON object to convert the field value pairs.
  417. message: A regular protocol message to record the data.
  418. Raises:
  419. ParseError: In case of problems converting.
  420. """
  421. names = []
  422. message_descriptor = message.DESCRIPTOR
  423. fields_by_json_name = dict((f.json_name, f)
  424. for f in message_descriptor.fields)
  425. for name in js:
  426. try:
  427. field = fields_by_json_name.get(name, None)
  428. if not field:
  429. field = message_descriptor.fields_by_name.get(name, None)
  430. if not field and _VALID_EXTENSION_NAME.match(name):
  431. if not message_descriptor.is_extendable:
  432. raise ParseError('Message type {0} does not have extensions'.format(
  433. message_descriptor.full_name))
  434. identifier = name[1:-1] # strip [] brackets
  435. # pylint: disable=protected-access
  436. field = message.Extensions._FindExtensionByName(identifier)
  437. # pylint: enable=protected-access
  438. if not field:
  439. # Try looking for extension by the message type name, dropping the
  440. # field name following the final . separator in full_name.
  441. identifier = '.'.join(identifier.split('.')[:-1])
  442. # pylint: disable=protected-access
  443. field = message.Extensions._FindExtensionByName(identifier)
  444. # pylint: enable=protected-access
  445. if not field:
  446. if self.ignore_unknown_fields:
  447. continue
  448. raise ParseError(
  449. ('Message type "{0}" has no field named "{1}".\n'
  450. ' Available Fields(except extensions): {2}').format(
  451. message_descriptor.full_name, name,
  452. [f.json_name for f in message_descriptor.fields]))
  453. if name in names:
  454. raise ParseError('Message type "{0}" should not have multiple '
  455. '"{1}" fields.'.format(
  456. message.DESCRIPTOR.full_name, name))
  457. names.append(name)
  458. value = js[name]
  459. # Check no other oneof field is parsed.
  460. if field.containing_oneof is not None and value is not None:
  461. oneof_name = field.containing_oneof.name
  462. if oneof_name in names:
  463. raise ParseError('Message type "{0}" should not have multiple '
  464. '"{1}" oneof fields.'.format(
  465. message.DESCRIPTOR.full_name, oneof_name))
  466. names.append(oneof_name)
  467. if value is None:
  468. if (field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE
  469. and field.message_type.full_name == 'google.protobuf.Value'):
  470. sub_message = getattr(message, field.name)
  471. sub_message.null_value = 0
  472. elif (field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM
  473. and field.enum_type.full_name == 'google.protobuf.NullValue'):
  474. setattr(message, field.name, 0)
  475. else:
  476. message.ClearField(field.name)
  477. continue
  478. # Parse field value.
  479. if _IsMapEntry(field):
  480. message.ClearField(field.name)
  481. self._ConvertMapFieldValue(value, message, field)
  482. elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
  483. message.ClearField(field.name)
  484. if not isinstance(value, list):
  485. raise ParseError('repeated field {0} must be in [] which is '
  486. '{1}.'.format(name, value))
  487. if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
  488. # Repeated message field.
  489. for item in value:
  490. sub_message = getattr(message, field.name).add()
  491. # None is a null_value in Value.
  492. if (item is None and
  493. sub_message.DESCRIPTOR.full_name != 'google.protobuf.Value'):
  494. raise ParseError('null is not allowed to be used as an element'
  495. ' in a repeated field.')
  496. self.ConvertMessage(item, sub_message)
  497. else:
  498. # Repeated scalar field.
  499. for item in value:
  500. if item is None:
  501. raise ParseError('null is not allowed to be used as an element'
  502. ' in a repeated field.')
  503. getattr(message, field.name).append(
  504. _ConvertScalarFieldValue(item, field))
  505. elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
  506. if field.is_extension:
  507. sub_message = message.Extensions[field]
  508. else:
  509. sub_message = getattr(message, field.name)
  510. sub_message.SetInParent()
  511. self.ConvertMessage(value, sub_message)
  512. else:
  513. if field.is_extension:
  514. message.Extensions[field] = _ConvertScalarFieldValue(value, field)
  515. else:
  516. setattr(message, field.name, _ConvertScalarFieldValue(value, field))
  517. except ParseError as e:
  518. if field and field.containing_oneof is None:
  519. raise ParseError('Failed to parse {0} field: {1}.'.format(name, e))
  520. else:
  521. raise ParseError(str(e))
  522. except ValueError as e:
  523. raise ParseError('Failed to parse {0} field: {1}.'.format(name, e))
  524. except TypeError as e:
  525. raise ParseError('Failed to parse {0} field: {1}.'.format(name, e))
  526. def _ConvertAnyMessage(self, value, message):
  527. """Convert a JSON representation into Any message."""
  528. if isinstance(value, dict) and not value:
  529. return
  530. try:
  531. type_url = value['@type']
  532. except KeyError:
  533. raise ParseError('@type is missing when parsing any message.')
  534. sub_message = _CreateMessageFromTypeUrl(type_url, self.descriptor_pool)
  535. message_descriptor = sub_message.DESCRIPTOR
  536. full_name = message_descriptor.full_name
  537. if _IsWrapperMessage(message_descriptor):
  538. self._ConvertWrapperMessage(value['value'], sub_message)
  539. elif full_name in _WKTJSONMETHODS:
  540. methodcaller(
  541. _WKTJSONMETHODS[full_name][1], value['value'], sub_message)(self)
  542. else:
  543. del value['@type']
  544. self._ConvertFieldValuePair(value, sub_message)
  545. value['@type'] = type_url
  546. # Sets Any message
  547. message.value = sub_message.SerializeToString()
  548. message.type_url = type_url
  549. def _ConvertGenericMessage(self, value, message):
  550. """Convert a JSON representation into message with FromJsonString."""
  551. # Duration, Timestamp, FieldMask have a FromJsonString method to do the
  552. # conversion. Users can also call the method directly.
  553. try:
  554. message.FromJsonString(value)
  555. except ValueError as e:
  556. raise ParseError(e)
  557. def _ConvertValueMessage(self, value, message):
  558. """Convert a JSON representation into Value message."""
  559. if isinstance(value, dict):
  560. self._ConvertStructMessage(value, message.struct_value)
  561. elif isinstance(value, list):
  562. self. _ConvertListValueMessage(value, message.list_value)
  563. elif value is None:
  564. message.null_value = 0
  565. elif isinstance(value, bool):
  566. message.bool_value = value
  567. elif isinstance(value, six.string_types):
  568. message.string_value = value
  569. elif isinstance(value, _INT_OR_FLOAT):
  570. message.number_value = value
  571. else:
  572. raise ParseError('Value {0} has unexpected type {1}.'.format(
  573. value, type(value)))
  574. def _ConvertListValueMessage(self, value, message):
  575. """Convert a JSON representation into ListValue message."""
  576. if not isinstance(value, list):
  577. raise ParseError(
  578. 'ListValue must be in [] which is {0}.'.format(value))
  579. message.ClearField('values')
  580. for item in value:
  581. self._ConvertValueMessage(item, message.values.add())
  582. def _ConvertStructMessage(self, value, message):
  583. """Convert a JSON representation into Struct message."""
  584. if not isinstance(value, dict):
  585. raise ParseError(
  586. 'Struct must be in a dict which is {0}.'.format(value))
  587. # Clear will mark the struct as modified so it will be created even if
  588. # there are no values.
  589. message.Clear()
  590. for key in value:
  591. self._ConvertValueMessage(value[key], message.fields[key])
  592. return
  593. def _ConvertWrapperMessage(self, value, message):
  594. """Convert a JSON representation into Wrapper message."""
  595. field = message.DESCRIPTOR.fields_by_name['value']
  596. setattr(message, 'value', _ConvertScalarFieldValue(value, field))
  597. def _ConvertMapFieldValue(self, value, message, field):
  598. """Convert map field value for a message map field.
  599. Args:
  600. value: A JSON object to convert the map field value.
  601. message: A protocol message to record the converted data.
  602. field: The descriptor of the map field to be converted.
  603. Raises:
  604. ParseError: In case of convert problems.
  605. """
  606. if not isinstance(value, dict):
  607. raise ParseError(
  608. 'Map field {0} must be in a dict which is {1}.'.format(
  609. field.name, value))
  610. key_field = field.message_type.fields_by_name['key']
  611. value_field = field.message_type.fields_by_name['value']
  612. for key in value:
  613. key_value = _ConvertScalarFieldValue(key, key_field, True)
  614. if value_field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
  615. self.ConvertMessage(value[key], getattr(
  616. message, field.name)[key_value])
  617. else:
  618. getattr(message, field.name)[key_value] = _ConvertScalarFieldValue(
  619. value[key], value_field)
  620. def _ConvertScalarFieldValue(value, field, require_str=False):
  621. """Convert a single scalar field value.
  622. Args:
  623. value: A scalar value to convert the scalar field value.
  624. field: The descriptor of the field to convert.
  625. require_str: If True, the field value must be a str.
  626. Returns:
  627. The converted scalar field value
  628. Raises:
  629. ParseError: In case of convert problems.
  630. """
  631. if field.cpp_type in _INT_TYPES:
  632. return _ConvertInteger(value)
  633. elif field.cpp_type in _FLOAT_TYPES:
  634. return _ConvertFloat(value, field)
  635. elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
  636. return _ConvertBool(value, require_str)
  637. elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
  638. if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
  639. if isinstance(value, six.text_type):
  640. encoded = value.encode('utf-8')
  641. else:
  642. encoded = value
  643. # Add extra padding '='
  644. padded_value = encoded + b'=' * (4 - len(encoded) % 4)
  645. return base64.urlsafe_b64decode(padded_value)
  646. else:
  647. # Checking for unpaired surrogates appears to be unreliable,
  648. # depending on the specific Python version, so we check manually.
  649. if _UNPAIRED_SURROGATE_PATTERN.search(value):
  650. raise ParseError('Unpaired surrogate')
  651. return value
  652. elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
  653. # Convert an enum value.
  654. enum_value = field.enum_type.values_by_name.get(value, None)
  655. if enum_value is None:
  656. try:
  657. number = int(value)
  658. enum_value = field.enum_type.values_by_number.get(number, None)
  659. except ValueError:
  660. raise ParseError('Invalid enum value {0} for enum type {1}.'.format(
  661. value, field.enum_type.full_name))
  662. if enum_value is None:
  663. if field.file.syntax == 'proto3':
  664. # Proto3 accepts unknown enums.
  665. return number
  666. raise ParseError('Invalid enum value {0} for enum type {1}.'.format(
  667. value, field.enum_type.full_name))
  668. return enum_value.number
  669. def _ConvertInteger(value):
  670. """Convert an integer.
  671. Args:
  672. value: A scalar value to convert.
  673. Returns:
  674. The integer value.
  675. Raises:
  676. ParseError: If an integer couldn't be consumed.
  677. """
  678. if isinstance(value, float) and not value.is_integer():
  679. raise ParseError('Couldn\'t parse integer: {0}.'.format(value))
  680. if isinstance(value, six.text_type) and value.find(' ') != -1:
  681. raise ParseError('Couldn\'t parse integer: "{0}".'.format(value))
  682. if isinstance(value, bool):
  683. raise ParseError('Bool value {0} is not acceptable for '
  684. 'integer field.'.format(value))
  685. return int(value)
  686. def _ConvertFloat(value, field):
  687. """Convert an floating point number."""
  688. if isinstance(value, float):
  689. if math.isnan(value):
  690. raise ParseError('Couldn\'t parse NaN, use quoted "NaN" instead.')
  691. if math.isinf(value):
  692. if value > 0:
  693. raise ParseError('Couldn\'t parse Infinity or value too large, '
  694. 'use quoted "Infinity" instead.')
  695. else:
  696. raise ParseError('Couldn\'t parse -Infinity or value too small, '
  697. 'use quoted "-Infinity" instead.')
  698. if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT:
  699. # pylint: disable=protected-access
  700. if value > type_checkers._FLOAT_MAX:
  701. raise ParseError('Float value too large')
  702. # pylint: disable=protected-access
  703. if value < type_checkers._FLOAT_MIN:
  704. raise ParseError('Float value too small')
  705. if value == 'nan':
  706. raise ParseError('Couldn\'t parse float "nan", use "NaN" instead.')
  707. try:
  708. # Assume Python compatible syntax.
  709. return float(value)
  710. except ValueError:
  711. # Check alternative spellings.
  712. if value == _NEG_INFINITY:
  713. return float('-inf')
  714. elif value == _INFINITY:
  715. return float('inf')
  716. elif value == _NAN:
  717. return float('nan')
  718. else:
  719. raise ParseError('Couldn\'t parse float: {0}.'.format(value))
  720. def _ConvertBool(value, require_str):
  721. """Convert a boolean value.
  722. Args:
  723. value: A scalar value to convert.
  724. require_str: If True, value must be a str.
  725. Returns:
  726. The bool parsed.
  727. Raises:
  728. ParseError: If a boolean value couldn't be consumed.
  729. """
  730. if require_str:
  731. if value == 'true':
  732. return True
  733. elif value == 'false':
  734. return False
  735. else:
  736. raise ParseError('Expected "true" or "false", not {0}.'.format(value))
  737. if not isinstance(value, bool):
  738. raise ParseError('Expected true or false without quotes.')
  739. return value
  740. _WKTJSONMETHODS = {
  741. 'google.protobuf.Any': ['_AnyMessageToJsonObject',
  742. '_ConvertAnyMessage'],
  743. 'google.protobuf.Duration': ['_GenericMessageToJsonObject',
  744. '_ConvertGenericMessage'],
  745. 'google.protobuf.FieldMask': ['_GenericMessageToJsonObject',
  746. '_ConvertGenericMessage'],
  747. 'google.protobuf.ListValue': ['_ListValueMessageToJsonObject',
  748. '_ConvertListValueMessage'],
  749. 'google.protobuf.Struct': ['_StructMessageToJsonObject',
  750. '_ConvertStructMessage'],
  751. 'google.protobuf.Timestamp': ['_GenericMessageToJsonObject',
  752. '_ConvertGenericMessage'],
  753. 'google.protobuf.Value': ['_ValueMessageToJsonObject',
  754. '_ConvertValueMessage']
  755. }