店播爬取Python脚本

extension_dict.cc 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Author: anuraag@google.com (Anuraag Agrawal)
  31. // Author: tibell@google.com (Johan Tibell)
  32. #include <google/protobuf/pyext/extension_dict.h>
  33. #include <cstdint>
  34. #include <memory>
  35. #include <google/protobuf/stubs/logging.h>
  36. #include <google/protobuf/stubs/common.h>
  37. #include <google/protobuf/descriptor.pb.h>
  38. #include <google/protobuf/descriptor.h>
  39. #include <google/protobuf/dynamic_message.h>
  40. #include <google/protobuf/message.h>
  41. #include <google/protobuf/pyext/descriptor.h>
  42. #include <google/protobuf/pyext/message.h>
  43. #include <google/protobuf/pyext/message_factory.h>
  44. #include <google/protobuf/pyext/repeated_composite_container.h>
  45. #include <google/protobuf/pyext/repeated_scalar_container.h>
  46. #include <google/protobuf/pyext/scoped_pyobject_ptr.h>
  47. #if PY_MAJOR_VERSION >= 3
  48. #if PY_VERSION_HEX < 0x03030000
  49. #error "Python 3.0 - 3.2 are not supported."
  50. #endif
  51. #define PyString_AsStringAndSize(ob, charpp, sizep) \
  52. (PyUnicode_Check(ob) ? ((*(charpp) = const_cast<char*>( \
  53. PyUnicode_AsUTF8AndSize(ob, (sizep)))) == NULL \
  54. ? -1 \
  55. : 0) \
  56. : PyBytes_AsStringAndSize(ob, (charpp), (sizep)))
  57. #endif
  58. namespace google {
  59. namespace protobuf {
  60. namespace python {
  61. namespace extension_dict {
  62. static Py_ssize_t len(ExtensionDict* self) {
  63. Py_ssize_t size = 0;
  64. std::vector<const FieldDescriptor*> fields;
  65. self->parent->message->GetReflection()->ListFields(*self->parent->message,
  66. &fields);
  67. for (size_t i = 0; i < fields.size(); ++i) {
  68. if (fields[i]->is_extension()) {
  69. // With C++ descriptors, the field can always be retrieved, but for
  70. // unknown extensions which have not been imported in Python code, there
  71. // is no message class and we cannot retrieve the value.
  72. // ListFields() has the same behavior.
  73. if (fields[i]->message_type() != nullptr &&
  74. message_factory::GetMessageClass(
  75. cmessage::GetFactoryForMessage(self->parent),
  76. fields[i]->message_type()) == nullptr) {
  77. PyErr_Clear();
  78. continue;
  79. }
  80. ++size;
  81. }
  82. }
  83. return size;
  84. }
  85. struct ExtensionIterator {
  86. PyObject_HEAD;
  87. Py_ssize_t index;
  88. std::vector<const FieldDescriptor*> fields;
  89. // Owned reference, to keep the FieldDescriptors alive.
  90. ExtensionDict* extension_dict;
  91. };
  92. PyObject* GetIter(PyObject* _self) {
  93. ExtensionDict* self = reinterpret_cast<ExtensionDict*>(_self);
  94. ScopedPyObjectPtr obj(PyType_GenericAlloc(&ExtensionIterator_Type, 0));
  95. if (obj == nullptr) {
  96. return PyErr_Format(PyExc_MemoryError,
  97. "Could not allocate extension iterator");
  98. }
  99. ExtensionIterator* iter = reinterpret_cast<ExtensionIterator*>(obj.get());
  100. // Call "placement new" to initialize. So the constructor of
  101. // std::vector<...> fields will be called.
  102. new (iter) ExtensionIterator;
  103. self->parent->message->GetReflection()->ListFields(*self->parent->message,
  104. &iter->fields);
  105. iter->index = 0;
  106. Py_INCREF(self);
  107. iter->extension_dict = self;
  108. return obj.release();
  109. }
  110. static void DeallocExtensionIterator(PyObject* _self) {
  111. ExtensionIterator* self = reinterpret_cast<ExtensionIterator*>(_self);
  112. self->fields.clear();
  113. Py_XDECREF(self->extension_dict);
  114. self->~ExtensionIterator();
  115. Py_TYPE(_self)->tp_free(_self);
  116. }
  117. PyObject* subscript(ExtensionDict* self, PyObject* key) {
  118. const FieldDescriptor* descriptor = cmessage::GetExtensionDescriptor(key);
  119. if (descriptor == NULL) {
  120. return NULL;
  121. }
  122. if (!CheckFieldBelongsToMessage(descriptor, self->parent->message)) {
  123. return NULL;
  124. }
  125. if (descriptor->label() != FieldDescriptor::LABEL_REPEATED &&
  126. descriptor->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
  127. return cmessage::InternalGetScalar(self->parent->message, descriptor);
  128. }
  129. CMessage::CompositeFieldsMap::iterator iterator =
  130. self->parent->composite_fields->find(descriptor);
  131. if (iterator != self->parent->composite_fields->end()) {
  132. Py_INCREF(iterator->second);
  133. return iterator->second->AsPyObject();
  134. }
  135. if (descriptor->label() != FieldDescriptor::LABEL_REPEATED &&
  136. descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
  137. // TODO(plabatut): consider building the class on the fly!
  138. ContainerBase* sub_message = cmessage::InternalGetSubMessage(
  139. self->parent, descriptor);
  140. if (sub_message == NULL) {
  141. return NULL;
  142. }
  143. (*self->parent->composite_fields)[descriptor] = sub_message;
  144. return sub_message->AsPyObject();
  145. }
  146. if (descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
  147. if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
  148. // On the fly message class creation is needed to support the following
  149. // situation:
  150. // 1- add FileDescriptor to the pool that contains extensions of a message
  151. // defined by another proto file. Do not create any message classes.
  152. // 2- instantiate an extended message, and access the extension using
  153. // the field descriptor.
  154. // 3- the extension submessage fails to be returned, because no class has
  155. // been created.
  156. // It happens when deserializing text proto format, or when enumerating
  157. // fields of a deserialized message.
  158. CMessageClass* message_class = message_factory::GetOrCreateMessageClass(
  159. cmessage::GetFactoryForMessage(self->parent),
  160. descriptor->message_type());
  161. ScopedPyObjectPtr message_class_handler(
  162. reinterpret_cast<PyObject*>(message_class));
  163. if (message_class == NULL) {
  164. return NULL;
  165. }
  166. ContainerBase* py_container = repeated_composite_container::NewContainer(
  167. self->parent, descriptor, message_class);
  168. if (py_container == NULL) {
  169. return NULL;
  170. }
  171. (*self->parent->composite_fields)[descriptor] = py_container;
  172. return py_container->AsPyObject();
  173. } else {
  174. ContainerBase* py_container = repeated_scalar_container::NewContainer(
  175. self->parent, descriptor);
  176. if (py_container == NULL) {
  177. return NULL;
  178. }
  179. (*self->parent->composite_fields)[descriptor] = py_container;
  180. return py_container->AsPyObject();
  181. }
  182. }
  183. PyErr_SetString(PyExc_ValueError, "control reached unexpected line");
  184. return NULL;
  185. }
  186. int ass_subscript(ExtensionDict* self, PyObject* key, PyObject* value) {
  187. const FieldDescriptor* descriptor = cmessage::GetExtensionDescriptor(key);
  188. if (descriptor == NULL) {
  189. return -1;
  190. }
  191. if (!CheckFieldBelongsToMessage(descriptor, self->parent->message)) {
  192. return -1;
  193. }
  194. if (value == nullptr) {
  195. return cmessage::ClearFieldByDescriptor(self->parent, descriptor);
  196. }
  197. if (descriptor->label() != FieldDescriptor::LABEL_OPTIONAL ||
  198. descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
  199. PyErr_SetString(PyExc_TypeError, "Extension is repeated and/or composite "
  200. "type");
  201. return -1;
  202. }
  203. cmessage::AssureWritable(self->parent);
  204. if (cmessage::InternalSetScalar(self->parent, descriptor, value) < 0) {
  205. return -1;
  206. }
  207. return 0;
  208. }
  209. PyObject* _FindExtensionByName(ExtensionDict* self, PyObject* arg) {
  210. char* name;
  211. Py_ssize_t name_size;
  212. if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
  213. return NULL;
  214. }
  215. PyDescriptorPool* pool = cmessage::GetFactoryForMessage(self->parent)->pool;
  216. const FieldDescriptor* message_extension =
  217. pool->pool->FindExtensionByName(StringParam(name, name_size));
  218. if (message_extension == NULL) {
  219. // Is is the name of a message set extension?
  220. const Descriptor* message_descriptor =
  221. pool->pool->FindMessageTypeByName(StringParam(name, name_size));
  222. if (message_descriptor && message_descriptor->extension_count() > 0) {
  223. const FieldDescriptor* extension = message_descriptor->extension(0);
  224. if (extension->is_extension() &&
  225. extension->containing_type()->options().message_set_wire_format() &&
  226. extension->type() == FieldDescriptor::TYPE_MESSAGE &&
  227. extension->label() == FieldDescriptor::LABEL_OPTIONAL) {
  228. message_extension = extension;
  229. }
  230. }
  231. }
  232. if (message_extension == NULL) {
  233. Py_RETURN_NONE;
  234. }
  235. return PyFieldDescriptor_FromDescriptor(message_extension);
  236. }
  237. PyObject* _FindExtensionByNumber(ExtensionDict* self, PyObject* arg) {
  238. int64_t number = PyLong_AsLong(arg);
  239. if (number == -1 && PyErr_Occurred()) {
  240. return NULL;
  241. }
  242. PyDescriptorPool* pool = cmessage::GetFactoryForMessage(self->parent)->pool;
  243. const FieldDescriptor* message_extension = pool->pool->FindExtensionByNumber(
  244. self->parent->message->GetDescriptor(), number);
  245. if (message_extension == NULL) {
  246. Py_RETURN_NONE;
  247. }
  248. return PyFieldDescriptor_FromDescriptor(message_extension);
  249. }
  250. static int Contains(PyObject* _self, PyObject* key) {
  251. ExtensionDict* self = reinterpret_cast<ExtensionDict*>(_self);
  252. const FieldDescriptor* field_descriptor =
  253. cmessage::GetExtensionDescriptor(key);
  254. if (field_descriptor == nullptr) {
  255. return -1;
  256. }
  257. if (!field_descriptor->is_extension()) {
  258. PyErr_Format(PyExc_KeyError, "%s is not an extension",
  259. field_descriptor->full_name().c_str());
  260. return -1;
  261. }
  262. const Message* message = self->parent->message;
  263. const Reflection* reflection = message->GetReflection();
  264. if (field_descriptor->is_repeated()) {
  265. if (reflection->FieldSize(*message, field_descriptor) > 0) {
  266. return 1;
  267. }
  268. } else {
  269. if (reflection->HasField(*message, field_descriptor)) {
  270. return 1;
  271. }
  272. }
  273. return 0;
  274. }
  275. ExtensionDict* NewExtensionDict(CMessage *parent) {
  276. ExtensionDict* self = reinterpret_cast<ExtensionDict*>(
  277. PyType_GenericAlloc(&ExtensionDict_Type, 0));
  278. if (self == NULL) {
  279. return NULL;
  280. }
  281. Py_INCREF(parent);
  282. self->parent = parent;
  283. return self;
  284. }
  285. void dealloc(PyObject* pself) {
  286. ExtensionDict* self = reinterpret_cast<ExtensionDict*>(pself);
  287. Py_CLEAR(self->parent);
  288. Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
  289. }
  290. static PyObject* RichCompare(ExtensionDict* self, PyObject* other, int opid) {
  291. // Only equality comparisons are implemented.
  292. if (opid != Py_EQ && opid != Py_NE) {
  293. Py_INCREF(Py_NotImplemented);
  294. return Py_NotImplemented;
  295. }
  296. bool equals = false;
  297. if (PyObject_TypeCheck(other, &ExtensionDict_Type)) {
  298. equals = self->parent == reinterpret_cast<ExtensionDict*>(other)->parent;;
  299. }
  300. if (equals ^ (opid == Py_EQ)) {
  301. Py_RETURN_FALSE;
  302. } else {
  303. Py_RETURN_TRUE;
  304. }
  305. }
  306. static PySequenceMethods SeqMethods = {
  307. (lenfunc)len, // sq_length
  308. 0, // sq_concat
  309. 0, // sq_repeat
  310. 0, // sq_item
  311. 0, // sq_slice
  312. 0, // sq_ass_item
  313. 0, // sq_ass_slice
  314. (objobjproc)Contains, // sq_contains
  315. };
  316. static PyMappingMethods MpMethods = {
  317. (lenfunc)len, /* mp_length */
  318. (binaryfunc)subscript, /* mp_subscript */
  319. (objobjargproc)ass_subscript,/* mp_ass_subscript */
  320. };
  321. #define EDMETHOD(name, args, doc) { #name, (PyCFunction)name, args, doc }
  322. static PyMethodDef Methods[] = {
  323. EDMETHOD(_FindExtensionByName, METH_O, "Finds an extension by name."),
  324. EDMETHOD(_FindExtensionByNumber, METH_O,
  325. "Finds an extension by field number."),
  326. {NULL, NULL},
  327. };
  328. } // namespace extension_dict
  329. PyTypeObject ExtensionDict_Type = {
  330. PyVarObject_HEAD_INIT(&PyType_Type, 0) //
  331. FULL_MODULE_NAME ".ExtensionDict", // tp_name
  332. sizeof(ExtensionDict), // tp_basicsize
  333. 0, // tp_itemsize
  334. (destructor)extension_dict::dealloc, // tp_dealloc
  335. 0, // tp_print
  336. 0, // tp_getattr
  337. 0, // tp_setattr
  338. 0, // tp_compare
  339. 0, // tp_repr
  340. 0, // tp_as_number
  341. &extension_dict::SeqMethods, // tp_as_sequence
  342. &extension_dict::MpMethods, // tp_as_mapping
  343. PyObject_HashNotImplemented, // tp_hash
  344. 0, // tp_call
  345. 0, // tp_str
  346. 0, // tp_getattro
  347. 0, // tp_setattro
  348. 0, // tp_as_buffer
  349. Py_TPFLAGS_DEFAULT, // tp_flags
  350. "An extension dict", // tp_doc
  351. 0, // tp_traverse
  352. 0, // tp_clear
  353. (richcmpfunc)extension_dict::RichCompare, // tp_richcompare
  354. 0, // tp_weaklistoffset
  355. extension_dict::GetIter, // tp_iter
  356. 0, // tp_iternext
  357. extension_dict::Methods, // tp_methods
  358. 0, // tp_members
  359. 0, // tp_getset
  360. 0, // tp_base
  361. 0, // tp_dict
  362. 0, // tp_descr_get
  363. 0, // tp_descr_set
  364. 0, // tp_dictoffset
  365. 0, // tp_init
  366. };
  367. PyObject* IterNext(PyObject* _self) {
  368. extension_dict::ExtensionIterator* self =
  369. reinterpret_cast<extension_dict::ExtensionIterator*>(_self);
  370. Py_ssize_t total_size = self->fields.size();
  371. Py_ssize_t index = self->index;
  372. while (self->index < total_size) {
  373. index = self->index;
  374. ++self->index;
  375. if (self->fields[index]->is_extension()) {
  376. // With C++ descriptors, the field can always be retrieved, but for
  377. // unknown extensions which have not been imported in Python code, there
  378. // is no message class and we cannot retrieve the value.
  379. // ListFields() has the same behavior.
  380. if (self->fields[index]->message_type() != nullptr &&
  381. message_factory::GetMessageClass(
  382. cmessage::GetFactoryForMessage(self->extension_dict->parent),
  383. self->fields[index]->message_type()) == nullptr) {
  384. PyErr_Clear();
  385. continue;
  386. }
  387. return PyFieldDescriptor_FromDescriptor(self->fields[index]);
  388. }
  389. }
  390. return nullptr;
  391. }
  392. PyTypeObject ExtensionIterator_Type = {
  393. PyVarObject_HEAD_INIT(&PyType_Type, 0) //
  394. FULL_MODULE_NAME ".ExtensionIterator", // tp_name
  395. sizeof(extension_dict::ExtensionIterator), // tp_basicsize
  396. 0, // tp_itemsize
  397. extension_dict::DeallocExtensionIterator, // tp_dealloc
  398. 0, // tp_print
  399. 0, // tp_getattr
  400. 0, // tp_setattr
  401. 0, // tp_compare
  402. 0, // tp_repr
  403. 0, // tp_as_number
  404. 0, // tp_as_sequence
  405. 0, // tp_as_mapping
  406. 0, // tp_hash
  407. 0, // tp_call
  408. 0, // tp_str
  409. 0, // tp_getattro
  410. 0, // tp_setattro
  411. 0, // tp_as_buffer
  412. Py_TPFLAGS_DEFAULT, // tp_flags
  413. "A scalar map iterator", // tp_doc
  414. 0, // tp_traverse
  415. 0, // tp_clear
  416. 0, // tp_richcompare
  417. 0, // tp_weaklistoffset
  418. PyObject_SelfIter, // tp_iter
  419. IterNext, // tp_iternext
  420. 0, // tp_methods
  421. 0, // tp_members
  422. 0, // tp_getset
  423. 0, // tp_base
  424. 0, // tp_dict
  425. 0, // tp_descr_get
  426. 0, // tp_descr_set
  427. 0, // tp_dictoffset
  428. 0, // tp_init
  429. };
  430. } // namespace python
  431. } // namespace protobuf
  432. } // namespace google