zhihu.collection 源代码

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from .common import *
from .base import BaseZhihu


[文档]class Collection(BaseZhihu):

    """收藏夹，请使用``ZhihuClient.collection``方法构造对象."""

    @class_common_init(re_collection_url)
[文档]    def __init__(self, url, owner=None, name=None, follower_num=None,
                 session=None):
        """创建收藏夹类实例.

        :param str url: 收藏夹主页url，必须
        :param Author owner: 收藏夹拥有者，可选
        :param str name: 收藏夹标题，可选
        :param int follower_num: 收藏夹关注人数，可选
        :param Session session: 使用的网络会话，为空则使用新会话。
        :return: 收藏夹对象
        :rtype: Collection
        """
        self.url = url
        self._session = session
        self.soup = None
        self._name = name
        self._owner = owner
        self._follower_num = follower_num
        self._id = int(re.match(r'.*/(\d+)', self.url).group(1))

    @property
    def id(self):
        """获取收藏夹id（网址最后的部分）.

        :return: 收藏夹id
        :rtype: int
        """
        return self._id

    @property
    @check_soup('_cid')
    def cid(self):
        """获取收藏夹内部Id（用不到忽视就好）

        :return: 内部Id
        :rtype: int
        """
        return int(re_get_number.match(
            self.soup.find('a', attrs={'name': 'focus'})['id']).group(1))

    @property
    @check_soup('_xsrf')
    def xsrf(self):
        """获取知乎的反xsrf参数（用不到就忽视吧~）

        :return: xsrf参数
        :rtype: str
        """
        return self.soup.find(
            'input', attrs={'name': '_xsrf'})['value']

    @property
    @check_soup('_name')
    def name(self):
        """获取收藏夹名字.

        :return: 收藏夹名字
        :rtype: str
        """
        return re_del_empty_line.match(
            self.soup.find('h2', id='zh-fav-head-title').text).group(1)

    @property
    @check_soup('_owner')
    def owner(self):
        """获取收藏夹拥有者，返回Author对象.

        :return: 收藏夹拥有者
        :rtype: Author
        """
        from .author import Author

        a = self.soup.find('h2', class_='zm-list-content-title').a
        name = a.text
        url = Zhihu_URL + a['href']
        motto = self.soup.find(
            'div', id='zh-single-answer-author-info').div.text
        photo_url = PROTOCOL + self.soup.find(
            'img', class_='zm-list-avatar-medium')['src'].replace('_m', '_r')
        return Author(url, name, motto, photo_url=photo_url,
                      session=self._session)

    @property
    @check_soup('_follower_num')
    def follower_num(self):
        """获取关注此收藏夹的人数.

        :return: 关注此收藏夹的人数
        :rtype: int
        """
        href = re_collection_url_split.match(self.url).group(1)
        return int(self.soup.find('a', href=href + 'followers').text)

    @property
    def followers(self):
        """获取关注此收藏夹的用户

        :return: 关注此收藏夹的用户
        :rtype: Author.Iterable
        """
        self._make_soup()
        followers_url = self.url + 'followers'
        for x in common_follower(followers_url, self.xsrf, self._session):
            yield x

    @property
    def questions(self):
        """获取收藏夹内所有问题对象.

        :return: 收藏夹内所有问题，返回生成器
        :rtype: Question.Iterable
        """
        self._make_soup()
        # noinspection PyTypeChecker
        for question in self._page_get_questions(self.soup):
            yield question
        i = 2
        while True:
            soup = BeautifulSoup(self._session.get(
                self.url[:-1] + '?page=' + str(i)).text)
            for question in self._page_get_questions(soup):
                if question == 0:
                    return
                yield question
            i += 1

    @property
    def answers(self):
        """获取收藏夹内所有答案对象.

        :return: 收藏夹内所有答案，返回生成器
        :rtype: Answer.Iterable
        """
        self._make_soup()
        # noinspection PyTypeChecker
        for answer in self._page_get_answers(self.soup):
            yield answer
        i = 2
        while True:
            soup = BeautifulSoup(self._session.get(
                self.url[:-1] + '?page=' + str(i)).text)
            for answer in self._page_get_answers(soup):
                if answer == 0:
                    return
                yield answer
            i += 1

    @property
    def logs(self):
        """获取收藏夹日志

        :return: 收藏夹日志中的操作，返回生成器
        :rtype: CollectActivity.Iterable
        """
        import time
        from datetime import datetime
        from .answer import Answer
        from .question import Question
        from .acttype import CollectActType

        self._make_soup()
        gotten_feed_num = 20
        offset = 0
        data = {
            'start': 0,
            '_xsrf': self.xsrf
        }
        api_url = self.url + 'log'
        while gotten_feed_num == 20:
            data['offset'] = offset
            res = self._session.post(url=api_url, data=data)
            gotten_feed_num = res.json()['msg'][0]
            soup = BeautifulSoup(res.json()['msg'][1])
            offset += gotten_feed_num
            zm_items = soup.find_all('div', class_='zm-item')

            for zm_item in zm_items:
                act_time = datetime.strptime(zm_item.find('time').text, "%Y-%m-%d %H:%M:%S")
                if zm_item.find('ins'):
                    link = zm_item.find('ins').a
                    act_type = CollectActType.INSERT_ANSWER
                elif zm_item.find('del'):
                    link = zm_item.find('del').a
                    act_type = CollectActType.DELETE_ANSWER
                else:
                    continue
                try:
                    answer_url = Zhihu_URL + link['href']
                    question_url = re_a2q.match(answer_url).group(1)
                    question = Question(question_url, link.text)
                    answer = Answer(
                        answer_url, question, session=self._session)
                    yield CollectActivity(
                        act_type, act_time, self.owner, self, answer)
                except AttributeError:
                    act_type = CollectActType.CREATE_COLLECTION
                    yield CollectActivity(
                        act_type, act_time, self.owner, self)
            data['start'] = zm_items[-1]['id'][8:]
            time.sleep(0.5)

    def _page_get_questions(self, soup):
        from .question import Question

        question_tags = soup.find_all("div", class_="zm-item")
        if len(question_tags) == 0:
            yield 0
            return
        else:
            for question_tag in question_tags:
                if question_tag.h2 is not None:
                    question_title = question_tag.h2.a.text
                    question_url = Zhihu_URL + question_tag.h2.a['href']
                    yield Question(question_url, question_title,
                                   session=self._session)

    def _page_get_answers(self, soup):
        from .question import Question
        from .author import Author, ANONYMOUS
        from .answer import Answer

        answer_tags = soup.find_all("div", class_="zm-item")
        if len(answer_tags) == 0:
            yield 0
            return
        else:
            question = None
            for tag in answer_tags:
                # 判断是否是'建议修改的回答'等情况
                url_tag = tag.find('a', class_='answer-date-link')
                if url_tag is None:
                    reason = tag.find('div', id='answer-status').p.text
                    print("pass a answer, reason %s ." % reason)
                    continue
                if tag.h2 is not None:
                    question_title = tag.h2.a.text
                    question_url = Zhihu_URL + tag.h2.a['href']
                    question = Question(question_url, question_title,
                                        session=self._session)
                answer_url = Zhihu_URL + url_tag['href']
                div = tag.find('div', class_='zm-item-answer-author-info')
                author_link = div.find('a', class_='author-link')
                if author_link is not None:
                    author_url = Zhihu_URL + author_link['href']
                    author_name = author_link.text
                    motto_span = div.find('span', class_='bio')
                    author_motto = motto_span['title'] if motto_span else ''
                    author = Author(author_url, author_name, author_motto,
                                    session=self._session)
                else:
                    author = ANONYMOUS
                upvote_num = tag.find('a', class_='zm-item-vote-count').text
                if upvote_num.isdigit():
                    upvote_num = int(upvote_num)
                else:
                    upvote_num = None
                answer = Answer(answer_url, question, author,
                                upvote_num, session=self._session)
                yield answer


[文档]class CollectActivity:
    """收藏夹操作, 请使用``Collection.logs``构造对象."""

[文档]    def __init__(self, type, time, owner, collection, answer=None):
        """创建收藏夹操作类实例

        :param acttype.CollectActType type: 操作类型
        :param datetime.datetime time: 进行操作的时间
        :param Author owner: 收藏夹的拥有者
        :param Collection collection: 所属收藏夹
        :param Answer answer: 收藏的答案，可选
        :return: CollectActivity
        """
        self._type = type
        self._time = time
        self._owner = owner
        self._collection = collection
        self._answer = answer

    @property
    def type(self):
        """
        :return: 收藏夹操作类型, 具体参见 :class:`.CollectActType`
        :rtype: :class:`.CollectActType`
        """
        return self._type

    @property
    def answer(self):
        """
        :return: 添加或删除收藏的答案, 若是创建收藏夹操作返回 None
        :rtype: Answer or None
        """
        return self._answer

    @property
    def time(self):
        """
        :return: 进行操作的时间
        :rtype: datetime.datetime
        """
        return self._time

    @property
    def owner(self):
        """
        :return: 收藏夹的拥有者
        :rtype: Author
        """
        return self._owner

    @property
    def collection(self):
        """
        :return: 所属收藏夹
        :rtype: Collection
        """
        return self._collection