zhihu.collection 源代码

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from .common import *
from .base import BaseZhihu


[文档]class Collection(BaseZhihu): """收藏夹,请使用``ZhihuClient.collection``方法构造对象.""" @class_common_init(re_collection_url)
[文档] def __init__(self, url, owner=None, name=None, follower_num=None, session=None): """创建收藏夹类实例. :param str url: 收藏夹主页url,必须 :param Author owner: 收藏夹拥有者,可选 :param str name: 收藏夹标题,可选 :param int follower_num: 收藏夹关注人数,可选 :param Session session: 使用的网络会话,为空则使用新会话。 :return: 收藏夹对象 :rtype: Collection """ self.url = url self._session = session self.soup = None self._name = name self._owner = owner self._follower_num = follower_num self._id = int(re.match(r'.*/(\d+)', self.url).group(1))
@property def id(self): """获取收藏夹id(网址最后的部分). :return: 收藏夹id :rtype: int """ return self._id @property @check_soup('_cid') def cid(self): """获取收藏夹内部Id(用不到忽视就好) :return: 内部Id :rtype: int """ return int(re_get_number.match( self.soup.find('a', attrs={'name': 'focus'})['id']).group(1)) @property @check_soup('_xsrf') def xsrf(self): """获取知乎的反xsrf参数(用不到就忽视吧~) :return: xsrf参数 :rtype: str """ return self.soup.find( 'input', attrs={'name': '_xsrf'})['value'] @property @check_soup('_name') def name(self): """获取收藏夹名字. :return: 收藏夹名字 :rtype: str """ return re_del_empty_line.match( self.soup.find('h2', id='zh-fav-head-title').text).group(1) @property @check_soup('_owner') def owner(self): """获取收藏夹拥有者,返回Author对象. :return: 收藏夹拥有者 :rtype: Author """ from .author import Author a = self.soup.find('h2', class_='zm-list-content-title').a name = a.text url = Zhihu_URL + a['href'] motto = self.soup.find( 'div', id='zh-single-answer-author-info').div.text photo_url = PROTOCOL + self.soup.find( 'img', class_='zm-list-avatar-medium')['src'].replace('_m', '_r') return Author(url, name, motto, photo_url=photo_url, session=self._session) @property @check_soup('_follower_num') def follower_num(self): """获取关注此收藏夹的人数. :return: 关注此收藏夹的人数 :rtype: int """ href = re_collection_url_split.match(self.url).group(1) return int(self.soup.find('a', href=href + 'followers').text) @property def followers(self): """获取关注此收藏夹的用户 :return: 关注此收藏夹的用户 :rtype: Author.Iterable """ self._make_soup() followers_url = self.url + 'followers' for x in common_follower(followers_url, self.xsrf, self._session): yield x @property def questions(self): """获取收藏夹内所有问题对象. :return: 收藏夹内所有问题,返回生成器 :rtype: Question.Iterable """ self._make_soup() # noinspection PyTypeChecker for question in self._page_get_questions(self.soup): yield question i = 2 while True: soup = BeautifulSoup(self._session.get( self.url[:-1] + '?page=' + str(i)).text) for question in self._page_get_questions(soup): if question == 0: return yield question i += 1 @property def answers(self): """获取收藏夹内所有答案对象. :return: 收藏夹内所有答案,返回生成器 :rtype: Answer.Iterable """ self._make_soup() # noinspection PyTypeChecker for answer in self._page_get_answers(self.soup): yield answer i = 2 while True: soup = BeautifulSoup(self._session.get( self.url[:-1] + '?page=' + str(i)).text) for answer in self._page_get_answers(soup): if answer == 0: return yield answer i += 1 @property def logs(self): """获取收藏夹日志 :return: 收藏夹日志中的操作,返回生成器 :rtype: CollectActivity.Iterable """ import time from datetime import datetime from .answer import Answer from .question import Question from .acttype import CollectActType self._make_soup() gotten_feed_num = 20 offset = 0 data = { 'start': 0, '_xsrf': self.xsrf } api_url = self.url + 'log' while gotten_feed_num == 20: data['offset'] = offset res = self._session.post(url=api_url, data=data) gotten_feed_num = res.json()['msg'][0] soup = BeautifulSoup(res.json()['msg'][1]) offset += gotten_feed_num zm_items = soup.find_all('div', class_='zm-item') for zm_item in zm_items: act_time = datetime.strptime(zm_item.find('time').text, "%Y-%m-%d %H:%M:%S") if zm_item.find('ins'): link = zm_item.find('ins').a act_type = CollectActType.INSERT_ANSWER elif zm_item.find('del'): link = zm_item.find('del').a act_type = CollectActType.DELETE_ANSWER else: continue try: answer_url = Zhihu_URL + link['href'] question_url = re_a2q.match(answer_url).group(1) question = Question(question_url, link.text) answer = Answer( answer_url, question, session=self._session) yield CollectActivity( act_type, act_time, self.owner, self, answer) except AttributeError: act_type = CollectActType.CREATE_COLLECTION yield CollectActivity( act_type, act_time, self.owner, self) data['start'] = zm_items[-1]['id'][8:] time.sleep(0.5) def _page_get_questions(self, soup): from .question import Question question_tags = soup.find_all("div", class_="zm-item") if len(question_tags) == 0: yield 0 return else: for question_tag in question_tags: if question_tag.h2 is not None: question_title = question_tag.h2.a.text question_url = Zhihu_URL + question_tag.h2.a['href'] yield Question(question_url, question_title, session=self._session) def _page_get_answers(self, soup): from .question import Question from .author import Author, ANONYMOUS from .answer import Answer answer_tags = soup.find_all("div", class_="zm-item") if len(answer_tags) == 0: yield 0 return else: question = None for tag in answer_tags: # 判断是否是'建议修改的回答'等情况 url_tag = tag.find('a', class_='answer-date-link') if url_tag is None: reason = tag.find('div', id='answer-status').p.text print("pass a answer, reason %s ." % reason) continue if tag.h2 is not None: question_title = tag.h2.a.text question_url = Zhihu_URL + tag.h2.a['href'] question = Question(question_url, question_title, session=self._session) answer_url = Zhihu_URL + url_tag['href'] div = tag.find('div', class_='zm-item-answer-author-info') author_link = div.find('a', class_='author-link') if author_link is not None: author_url = Zhihu_URL + author_link['href'] author_name = author_link.text motto_span = div.find('span', class_='bio') author_motto = motto_span['title'] if motto_span else '' author = Author(author_url, author_name, author_motto, session=self._session) else: author = ANONYMOUS upvote_num = tag.find('a', class_='zm-item-vote-count').text if upvote_num.isdigit(): upvote_num = int(upvote_num) else: upvote_num = None answer = Answer(answer_url, question, author, upvote_num, session=self._session) yield answer
[文档]class CollectActivity: """收藏夹操作, 请使用``Collection.logs``构造对象."""
[文档] def __init__(self, type, time, owner, collection, answer=None): """创建收藏夹操作类实例 :param acttype.CollectActType type: 操作类型 :param datetime.datetime time: 进行操作的时间 :param Author owner: 收藏夹的拥有者 :param Collection collection: 所属收藏夹 :param Answer answer: 收藏的答案,可选 :return: CollectActivity """ self._type = type self._time = time self._owner = owner self._collection = collection self._answer = answer
@property def type(self): """ :return: 收藏夹操作类型, 具体参见 :class:`.CollectActType` :rtype: :class:`.CollectActType` """ return self._type @property def answer(self): """ :return: 添加或删除收藏的答案, 若是创建收藏夹操作返回 None :rtype: Answer or None """ return self._answer @property def time(self): """ :return: 进行操作的时间 :rtype: datetime.datetime """ return self._time @property def owner(self): """ :return: 收藏夹的拥有者 :rtype: Author """ return self._owner @property def collection(self): """ :return: 所属收藏夹 :rtype: Collection """ return self._collection