最近家里搞了台群晖,准备部署个Calibreweb看看电子书,看了好多部署的教程老是不太成功,要么报错要么有问题的,很难搞。下面将部署流程分享一下,给大家参考,少走点弯路
镜像的选择
我们使用johngong/calibre-web,我们用这个镜像来构建应用,这个镜像已经添加了一些原始镜像没有的功能,可以更快的上手。我们直接拉取johngong/calibre-web镜像即可
配置
映射卷
我们需要映射镜像的卷
参数 | 说明 |
---|---|
本地文件夹1:/library | calibre-web与calibre-server书库默认位置 |
本地文件夹2:/config | calibre-web与calibre-server配置位置文件 |
本地文件夹3:/autoaddbooks | calibre自动添加图书文件夹位置 |
本地文件夹4:/usr/local/calibre-web/app/metadata_provider | 元数据抓取脚本位置 |
本地文件夹4:/usr/local/calibre-web/app/metadata_provider
这条配置下的本地文件夹4,需要放一个获取元数据的脚本,脚本内容如下,命名为NewDouban.py
import random
import re
import time
import dataclasses
import urllibimport requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import urlparse, unquote
from lxml import etree
from functools import lru_cachefrom cps.services.Metadata import Metadata, MetaSourceInfo, MetaRecordfrom cps.search_metadata import meta
from flask import request, Response
from cps import helper# 是否自动代理封面地址
DOUBAN_PROXY_COVER = True
# 如果自动计算的服务器地址不正确,可以填写自己的calibre-web地址,参考:http://nas_ip:8083/
DOUBAN_PROXY_COVER_HOST_URL = ''
DOUBAN_PROXY_COVER_PATH = 'metadata/douban_cover?cover='
DOUBAN_SEARCH_URL = "https://www.douban.com/search"
DOUBAN_BASE = "https://book.douban.com/"
DOUBAN_COVER_DOMAIN = 'doubanio.com'
DOUBAN_BOOK_CAT = "1001"
DOUBAN_BOOK_CACHE_SIZE = 500 # 最大缓存数量
DOUBAN_CONCURRENCY_SIZE = 5 # 并发查询数
DOUBAN_BOOK_URL_PATTERN = re.compile(".*/subject/(\\d+)/?")
DEFAULT_HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3573.0 Safari/537.36','Accept-Encoding': 'gzip, deflate','Referer': DOUBAN_BASE
}
PROVIDER_NAME = "New Douban Books"
PROVIDER_ID = "new_douban"class NewDouban(Metadata):__name__ = PROVIDER_NAME__id__ = PROVIDER_IDdef __init__(self):self.searcher = DoubanBookSearcher()self.hack_helper_cover()super().__init__()def search(self, query: str, generic_cover: str = "", locale: str = "en"):if self.active:return self.searcher.search_books(query)@staticmethoddef hack_helper_cover():"""覆盖helper.save_cover_from_url方法实现豆瓣的封面下载:return:"""save_cover = helper.save_cover_from_urldef new_save_cover(url, book_path):if DOUBAN_COVER_DOMAIN in url:cover_url = urlif DOUBAN_PROXY_COVER:component = urllib.parse.urlparse(url)query = urllib.parse.parse_qs(component.query)cover_url = urllib.parse.unquote(query.get('cover')[0])res = requests.get(cover_url, headers=DEFAULT_HEADERS)return helper.save_cover(res, book_path)else:return save_cover(url, book_path)helper.save_cover_from_url = new_save_cover@dataclasses.dataclass
class DoubanMetaRecord(MetaRecord):def __getattribute__(self, item): # cover通过本地服务代理访问if item == 'cover' and DOUBAN_PROXY_COVER:cover_url = super().__getattribute__(item)if cover_url:try:host_url = DOUBAN_PROXY_COVER_HOST_URLif not host_url and request.host_url:host_url = request.host_urlif host_url and host_url not in cover_url:self.cover = host_url + DOUBAN_PROXY_COVER_PATH + urllib.parse.quote(cover_url)except BaseException:passreturn super().__getattribute__(item)class DoubanBookSearcher:def __init__(self):self.book_loader = DoubanBookLoader()self.thread_pool = ThreadPoolExecutor(max_workers=10, thread_name_prefix='douban_async')def calc_url(self, href):query = urlparse(href).queryparams = {item.split('=')[0]: item.split('=')[1] for item in query.split('&')}url = unquote(params['url'])if DOUBAN_BOOK_URL_PATTERN.match(url):return urldef load_book_urls_new(self, query):url = DOUBAN_SEARCH_URLparams = {"cat": DOUBAN_BOOK_CAT, "q": query}res = requests.get(url, params, headers=DEFAULT_HEADERS)book_urls = []if res.status_code in [200, 201]:html = etree.HTML(res.content)alist = html.xpath('//a[@class="nbg"]')for link in alist:href = link.attrib['href']parsed = self.calc_url(href)if parsed and len(book_urls) < DOUBAN_CONCURRENCY_SIZE:book_urls.append(parsed)return book_urlsdef search_books(self, query):book_urls = self.load_book_urls_new(query)books = []futures = [self.thread_pool.submit(self.book_loader.load_book, book_url) for book_url in book_urls]for future in as_completed(futures):book = future.result()if book is not None:books.append(future.result())return booksclass DoubanBookLoader:def __init__(self):self.book_parser = DoubanBookHtmlParser()@lru_cache(maxsize=DOUBAN_BOOK_CACHE_SIZE)def load_book(self, url):book = Noneself.random_sleep()start_time = time.time()res = requests.get(url, headers=DEFAULT_HEADERS)if res.status_code in [200, 201]:print("下载书籍:{}成功,耗时{:.0f}ms".format(url, (time.time() - start_time) * 1000))book_detail_content = res.contentbook = self.book_parser.parse_book(url, book_detail_content.decode("utf8"))return bookdef random_sleep(self):random_sec = random.random() / 10print("Random sleep time {}s".format(random_sec))time.sleep(random_sec)class DoubanBookHtmlParser:def __init__(self):self.id_pattern = DOUBAN_BOOK_URL_PATTERNself.date_pattern = re.compile("(\\d{4})-(\\d+)")self.tag_pattern = re.compile("criteria = '(.+)'")def parse_book(self, url, book_content):book = DoubanMetaRecord(id="",title="",authors=[],publisher="",description="",url="",source=MetaSourceInfo(id=PROVIDER_ID,description=PROVIDER_NAME,link="https://book.douban.com/"))html = etree.HTML(book_content)title_element = html.xpath("//span[@property='v:itemreviewed']")book.title = self.get_text(title_element)share_element = html.xpath("//a[@data-url]")if len(share_element):url = share_element[0].attrib['data-url']book.url = urlid_match = self.id_pattern.match(url)if id_match:book.id = id_match.group(1)img_element = html.xpath("//a[@class='nbg']")if len(img_element):cover = img_element[0].attrib['href']if not cover or cover.endswith('update_image'):book.cover = ''else:book.cover = coverrating_element = html.xpath("//strong[@property='v:average']")book.rating = self.get_rating(rating_element)elements = html.xpath("//span[@class='pl']")for element in elements:text = self.get_text(element)if text.startswith("作者") or text.startswith("译者"):book.authors.extend([self.get_text(author_element) for author_element infilter(self.author_filter, element.findall("..//a"))])elif text.startswith("出版社"):book.publisher = self.get_tail(element)elif text.startswith("副标题"):book.title = book.title + ':' + self.get_tail(element)elif text.startswith("出版年"):book.publishedDate = self.get_publish_date(self.get_tail(element))elif text.startswith("丛书"):book.series = self.get_text(element.getnext())elif text.startswith("ISBN"):book.identifiers["isbn"] = self.get_tail(element)summary_element = html.xpath("//div[@id='link-report']//div[@class='intro']")if len(summary_element):book.description = etree.tostring(summary_element[-1], encoding="utf8").decode("utf8").strip()tag_elements = html.xpath("//a[contains(@class, 'tag')]")if len(tag_elements):book.tags = [self.get_text(tag_element) for tag_element in tag_elements]else:book.tags = self.get_tags(book_content)return bookdef get_tags(self, book_content):tag_match = self.tag_pattern.findall(book_content)if len(tag_match):return [tag.replace('7:', '') for tag infilter(lambda tag: tag and tag.startswith('7:'), tag_match[0].split('|'))]return []def get_publish_date(self, date_str):if date_str:date_match = self.date_pattern.fullmatch(date_str)if date_match:date_str = "{}-{}-1".format(date_match.group(1), date_match.group(2))return date_strdef get_rating(self, rating_element):return float(self.get_text(rating_element, '0')) / 2def author_filter(self, a_element):a_href = a_element.attrib['href']return '/author' in a_href or '/search' in a_hrefdef get_text(self, element, default_str=''):text = default_strif len(element) and element[0].text:text = element[0].text.strip()elif isinstance(element, etree._Element) and element.text:text = element.text.strip()return text if text else default_strdef get_tail(self, element, default_str=''):text = default_strif isinstance(element, etree._Element) and element.tail:text = element.tail.strip()if not text:text = self.get_text(element.getnext(), default_str)return text if text else default_str@meta.route("/metadata/douban_cover", methods=["GET"])
def proxy_douban_cover():"""代理豆瓣封面展示:return:"""cover_url = urllib.parse.unquote(request.args.get('cover'))res = requests.get(cover_url, headers=DEFAULT_HEADERS)return Response(res.content, mimetype=res.headers['Content-Type'])
映射端口
参数 | 说明 |
---|---|
本地端口1:8083 | calibre-web web访问端口 |
本地端口2:8080 | calibre-server web访问端口 |
环境变量
环境变量默认即可 大家可以根据需要进行调整
参数 | 说明 |
---|---|
UID=1000 | uid设置,默认为1000 |
GID=1000 | gid设置,默认为1000 |
ENABLE_CALIBRE_SERVER=true | (true|false)设定开启calibre-server,默认开启 |
ENABLE_CALIBRE_SERVER_OPDS=false | (true|false)开启calibre-server的OPDS功能,默认不开启,arm可能不可用 |
CALIBRE_SERVER_USER= | calibre-server 用户名 |
CALIBRE_SERVER_PASSWORD= | calibre-server 用户密码 |
CALIBRE_SERVER_WEB_LANGUAGE=zh_CN | calibre-server web界面语言,详见calibre-server其它语言 |
CALIBRE_ASCII_FILENAME=true | (true|false)设定false时calibre支持中文目录 |
CALIBRE_WEB_LANGUAGE=zh_Hans_CN | (zh_Hans_CN|en)calibre-web初始界面语言,详见calibre-web其它语言 |
TZ=Asia/Shanghai | 系统时区设置,默认为Asia/Shanghai |
CALIBREDB_OTHER_OPTION= | 为自动添加脚本中calibredb命令添加其它参数,例如:duplicates命令[-d] |
DISABLE_GOOGLE_SEARCH=false | (true|false)设定禁用google搜索,默认不开启 |
DISABLE_SCHOLAR_SEARCH=false | (true|false)设定禁用scholar搜索,默认不开启 |
ENABLE_CHOWN_LIBRARY=true | (true|false)设定修复library文件夹拥有者,默认开启 |
CALIBRE_LOCALHOST=true | (true|false)设定从本地主机和本地网络加载封面,默认开启 |
配置完成就可以正常启动了