Excel单元格使用xlwings包调用python函数的公式,截取子网页(标题)的试验 问题求助CSDN
Python 环境:python3.7 的conda上的py3环境
Excel 2010
Excel单元格布置
D114='http://mp.weixin.qq.com/s?__biz=MzU2MTgxNTE1Nw==&mid=2247484671&idx=2&sn=177e74610bd48593e6a45efb81dddc21&chksm=fc724fe5cb05c6f3df76ef025a6d8ee2c63d5385b96b2f5c13931cd3f9d46d1469486d452239&mpshare=1&scene=23&srcid=&sharer_sharetime=1574561625912&sharer_shareid=d333ff324a43c5c77fa866d86816c0fe#rd
'E114=get_title(D114,"h2","{'class_':'rich_media_title'}")
使用的源码:文章链接索引清单简化.py
import xlwings as xwfrom bs4 import BeautifulSoup
import requests
import codecs
import os
import re
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}@xw.sub # only required if you want to import it or run it via UDF Server
def main():wb = xw.Book.caller()wb.sheets[0].range("A1").value = "Hello xlwings!"@xw.func
def hello(name):return "hello {0}".format(name)@xw.func
def sum_two(a,b):return a+b@xw.func
@xw.arg('item', doc='This is item that is found.')
@xw.arg('attr', doc='This is an attribute in dictionary {key:value,...}.')
# @xw.arg('key', doc='This is key that is attribute.')
# @xw.arg('value', doc='This is value of the attribute above.')
def get_title(url,item,attr):#(url,item,{key:value,...}s):html = requests.get(url,params=headers)#补充params=# html = requests.request('GET',url,headers)soup = BeautifulSoup(html.text, 'lxml') #原文.contentx1 =soup.find(name=item,attrs=attr)url1='https://mp.weixin.qq.com/s?__biz=MzU2MTgxNTE1Nw==&mid=2247484031&idx=2&sn=e916e47c2f6fa55703f0a3e25c1a860e&chksm=fc724965cb05c073ad331b41bb385701895f3e8f33766b5ba9ad35f78215f96003fd6f544d95&mpshare=1&scene=23&srcid=&sharer_sharetime=1574578858152&sharer_shareid=d333ff324a43c5c77fa866d86816c0fe#rd'html1 = requests.get(url1,headers)markup=''''''#1=html1#2='<h1 style="display:inline">深度揭秘铁路 12306 的架构</h1>'markup1=''' <h2 class="rich_media_title" id="activity-name"> Gartner : 2019年十大数据与分析技术趋势 </h2>''' markup2='''<div id="img-content" class="rich_media_wrp"><h2 class="rich_media_title" id="activity-name"> Gartner : 2019年十大数据与分析技术趋势</h2>'''#注意class关键词为python保留,外部调用需要避讳,写在程序里的倒是无妨。soup1= BeautifulSoup(markup2, 'lxml') #soup1.text测试正常,对markup1#soup1.prettify()测试正常 # return测试,attrs=attr传递正常 attr={'class':'rich_media_title'}x11=soup1.find(name=item,attrs=attr) #item='h2' ;attr={'class':'rich_media_title'} #find(name='h2',attrs={'class':'rich_media_title'})#测试正常return x1.text#soup1.text#x11.text#content#x1.text#len(x11)#x11.texturl1='https://mp.weixin.qq.com/s?__biz=MzU2MTgxNTE1Nw==&mid=2247484031&idx=2&sn=e916e47c2f6fa55703f0a3e25c1a860e&chksm=fc724965cb05c073ad331b41bb385701895f3e8f33766b5ba9ad35f78215f96003fd6f544d95&mpshare=1&scene=23&srcid=&sharer_sharetime=1574578858152&sharer_shareid=d333ff324a43c5c77fa866d86816c0fe#rd'
# print('测试截取的网页:',get_title(url1,item,attr))
# get_title(url1,item,attr)if __name__ == "__main__":xw.books.active.set_mock_caller()main()# get_title(url1,item,attr)# print('测试截取的网页:',get_title(url1,item,attr))
#先导入 from 文章链接索引清单 import get_title;再调用get_title(url,item,attr);#return x1.text#.string #len(x1)#.string 都试过#重点:获取tag内容有三种方法,string方法不能处理标签中含有标签的内容,获取为None;text和get_text可以获取。#tag.string,soup1.string 返回空#type(soup1.string),type(tag.string) #出错 "Unexpected Python Error: TypeError: Objects of type 'type' can not be converted to a COM VARIANT"#soup1.name返回[document]#.get_text()[0],返回空#1,测试可以正常返回# x1这才是完整的元素,.get_text()和.get都不完整,都是截取的
试验结果 征求错误解决方法,Excel公式调用的python函数期望能正常返回抓取的微信页面标题
" return xFile ""e:\鏂板叴it銆乨t棰嗗煙璧勬枡\dt\澶ф暟鎹椂浠e皬鏁版嵁鍒嗘瀽-28.5g\澶ф暟鎹功绫?璁哄潧绛夎祫婧愮粍缁嘰寰俊鍏紬鍙疯祫婧?閾炬帴娓呭崟\澶ф暟鎹垎鏋愭寲鎺樺拰python鏈哄櫒瀛︿範 鍏紬鍙锋枃绔犻摼鎺ョ储寮曟竻鍗昞鏂囩珷閾炬帴绱㈠紩娓呭崟\鏂囩珷閾炬帴绱㈠紩娓呭崟.py"", line 3140, in get_titleret = func(*args)File ""D:\ProgramData\Miniconda2\envs\py3\lib\site-packages\xlwings\udfs.py"", line 314, in call_udfres = call_udf(script, fname, args, this_workbook, FromVariant(caller))File ""D:\ProgramData\Miniconda2\envs\py3\lib\site-packages\xlwings\server.py"", line 198, in CallUDFreturn func(*args)File ""D:\ProgramData\Miniconda2\envs\py3\lib\site-packages\win32com\server\policy.py"", line 586, in _invokeex_return S_OK, -1, self._invokeex_(dispid, lcid, wFlags, args, None, None)File ""D:\ProgramData\Miniconda2\envs\py3\lib\site-packages\win32com\server\policy.py"", line 283, in _invoke_return self._invoke_(dispid, lcid, wFlags, args)File ""D:\ProgramData\Miniconda2\envs\py3\lib\site-packages\win32com\server\policy.py"", line 278, in _Invoke_"
命令行执行 效果良好
>python -c "import 文章链接索引清单简化; url='https://mp.weixin.qq.com/s?__biz=MzU2MTgxNTE1Nw==&mid=2247484671&idx=2&sn=177e74610bd48593e6a45efb81dddc21&chksm=fc724fe5cb05c6f3df76ef025a6d8ee2c63d5385b96b2f5c13931cd3f9d46d1469486d452239&mpshare=1&scene=23&srcid=&sharer_sharetime=1574561625912&sharer_shareid=d333ff324a43c5c77fa866d86816c0fe#rd';item= 'h2' ;attr={'class':'rich_media_title'};s=文章链接索引清单简化.get_title(url,item,attr);print('s=',s)"
试验结果
s=北大开源了Python中文分词工具包,准确度远超Jieba(py3) E:\新兴IT、DT领域资料\DT\大数据时代小数据分析-28.5G\大数据书籍 论坛等资源 组织\微信公众号资源 链接清单\大数据分析挖掘和Python机器学习 公众号文章链接索引清单\文章链接索引清单>