zhihu.post 源代码

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from .common import *
from .base import BaseZhihu, JsonAsSoupMixin


[文档]class Post(JsonAsSoupMixin, BaseZhihu): """专栏文章类,请使用``ZhihuClient.post``方法构造对象.""" @class_common_init(re_post_url)
[文档] def __init__(self, url, column=None, author=None, title=None, upvote_num=None, comment_num=None, session=None): """创建专栏文章类实例. :param str url: 文章url :param Column column: 文章所属专栏,可选 :param Author author: 文章作者,可选 :param str title: 文章标题,可选 :param int upvote_num: 文章赞同数,可选 :param int comment_num: 文章评论数,可选 :param Session session: 使用的网络会话,为空则使用新会话 :return: 专栏文章对象 :rtype: Post """ match = re_post_url.match(url) self.url = url self._session = session self._column = column self._author = author self._title = title self._upvote_num = upvote_num self._comment_num = comment_num self._slug = int(match.group(1)) # 文章编号
def _make_soup(self): if self.soup is None: json = self._get_content() self._gen_soup(json) def _get_content(self): origin_host = self._session.headers.get('Host') self._session.headers.update(Host='zhuanlan.zhihu.com') json = self._session.get(Column_Post_Data.format(self.slug)).json() self._session.headers.update(Host=origin_host) return json @property def column_in_name(self): """获取文章所在专栏的内部名称(用不到就忽视吧~) :return: 专栏的内部名称 :rtype: str """ self._make_soup() if 'column' in self.soup: return self.soup['column']['slug'] else: return None @property def slug(self): """获取文章的编号(用不到就忽视吧~) :return: 文章编号 :rtype: int """ return self._slug @property @check_soup('_column') def column(self): """获取文章所在专栏. :return: 文章所在专栏 :rtype: Column """ from .column import Column if 'column' in self.soup: url = Column_Url + '/' + self.soup['column']['slug'] name = self.soup['column']['name'] return Column(url, name, session=self._session) else: return None @property @check_soup('_author') def author(self): """获取文章作者. :return: 文章作者 :rtype: Author """ from .author import Author url = self.soup['author']['profileUrl'] name = self.soup['author']['name'] motto = self.soup['author']['bio'] template = self.soup['author']['avatar']['template'] photo_id = self.soup['author']['avatar']['id'] photo_url = template.format(id=photo_id, size='r') return Author(url, name, motto, photo_url=photo_url, session=self._session) @property @check_soup('_title') def title(self): """获取文章标题. :return: 文章标题 :rtype: str """ return self.soup['title'] @property @check_soup('_upvote_num') def upvote_num(self): """获取文章赞同数量. :return: 文章赞同数 :rtype: int """ return int(self.soup['likesCount']) @property @check_soup('_comment_num') def comment_num(self): """获取评论数量. :return: 评论数量 :rtype: int """ return self.soup['commentsCount']
[文档] def save(self, filepath=None, filename=None, mode="md"): """保存答案为 Html 文档或 markdown 文档. :param str filepath: 要保存的文件所在的目录, 不填为当前目录下以专栏标题命名的目录, 设为"."则为当前目录。 :param str filename: 要保存的文件名, 不填则默认为 所在文章标题 - 作者名.html/md。 如果文件已存在,自动在后面加上数字区分。 **自定义文件名时请不要输入后缀 .html 或 .md。** :param str mode: 保存类型,可选 `html` 、 `markdown` 、 `md` 。 :return: 无 :rtype: None """ if mode not in ["html", "md", "markdown"]: raise ValueError("`mode` must be 'html', 'markdown' or 'md'," " got {0}".format(mode)) self._make_soup() file = get_path(filepath, filename, mode, self.column.name, self.title + '-' + self.author.name) with open(file, 'wb') as f: if mode == "html": f.write(self.soup['content'].encode('utf-8')) else: import html2text h2t = html2text.HTML2Text() h2t.body_width = 0 f.write(h2t.handle(self.soup['content']).encode('utf-8'))
@property def upvoters(self): """获取文章的点赞用户 :return: 文章的点赞用户,返回生成器。 """ from .author import Author, ANONYMOUS self._make_soup() headers = dict(Default_Header) headers['Host'] = 'zhuanlan.zhihu.com' json = self._session.get( Post_Get_Upvoter.format(self.slug), headers=headers ).json() for au in json: try: yield Author( au['profileUrl'], au['name'], au['bio'], photo_url=au['avatar']['template'].format( id=au['avatar']['id'], size='r'), session=self._session ) except ValueError: # invalid url yield ANONYMOUS