主程序
#!/usr/bin/python
# -*- coding:utf-8 -*-
import urllib.request, http.cookiejar, re
import time
import threading, sys
# tools是我的自定义工具类
import tools
import socket'''
模拟访问博客增加访问量
'''
socket.setdefaulttimeout(1.0)
visitnum = 2
if len(sys.argv) > 1:visitnum = sys.argv[1]
visitnum = int(visitnum)class Csdn(threading.Thread):'csdn增加访问量'contents = {}headers = [('host', 'blog.csdn.net'),('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'),]domain = 'http://blog.csdn.net'url = 'http://blog.csdn.net/%s/article/list/%s'def __init__(self, username, page, times):threading.Thread.__init__(self)self.username = usernameself.createOpener()self.page = pageself.times = times@staticmethoddef getListPages(username,page=1,count=1):'获取总页数'key = 'page_' + str(page)if page==1:Csdn.headers.append(('Referer', 'http://blog.csdn.net/' + username))cookie = http.cookiejar.CookieJar()cookieProc = urllib.request.HTTPCookieProcessor(cookie)opener = urllib.request.build_opener(cookieProc)opener.addheaders = Csdn.headersurl = Csdn.url % (username, page)try:response = opener.open(url)except Exception as e:if count>page:Csdn.getListPages(username, page + 1, count)else:raise Exception('出错了')Csdn.contents[key] = contents = response.read().decode('utf-8', 'ignore')pattern = r'<div id="papelist" class="pagelist">([\s\S]*?)共(\d+)页'match = re.search(pattern, contents)pages = int(match.group(2))if pages>page:Csdn.getListPages(username,page+1,pages)return pagesdef createOpener(self):cookie = http.cookiejar.CookieJar()cookieProc = urllib.request.HTTPCookieProcessor(cookie)opener = urllib.request.build_opener(cookieProc)opener.addheaders = Csdn.headersself.opener = openerdef visitUrl(self):'访问列表页获取内容'key = 'page_' + str(self.page)if key in Csdn.contents:print('--------%s exists' % key)self.contents = Csdn.contents[key]else:opener = self.openerurl = Csdn.url % (self.username, self.page)try:response = opener.open(url)self.contents = contents = response.read().decode('utf-8', 'ignore')except Exception as e:print("访问 %s 出错 " % url)returnCsdn.contents[key] = contentsself.addVisitNum()def addVisitNum(self):opener = self.openercontents = self.contentsdivPattern = r'<div id="article_list" ([\s\S]*)<div id="papelist" class="pagelist">'ulMatch = re.search(divPattern, contents)divText = ulMatch.group(1)smallPattern = r'<div class="list_item article_item">([\s\S]*?)<span class="link_title"><a href="(.*?)">([\s\S]*?)</a></span>([\s\S]*?)阅读</a>\((\d+)\)'match = re.findall(smallPattern, divText)for i in match:list = {'url': Csdn.domain + i[1], 'name': i[2].strip(), 'num': i[4]}try:opener.open(list['url'])except Exception as e:print("当前第%s次访问,访问出错,url:%s" % (self.times, list['url']))continueprint("当前第%s次访问,第%s页,url:%s" % (self.times, self.page, list['url']))def run(self):'线程主方法'self.visitUrl()class MainCsdn(threading.Thread):def __init__(self, username, times, pages):threading.Thread.__init__(self)self.username = usernameself.times = timesself.pages = pagesdef run(self):pages = self.pagesusername = self.usernamethreads = []for page in range(1, pages + 1):thread = Csdn(username, page, times=self.times)thread.start()threads.append(thread)# 等待所有线程完成for t in threads:t.join()print(self.times, " 退出第%s次执行任务" % self.times)@tools.runTime
def main():'主方法'# csdn昵称username = 'csdn昵称'pages = Csdn.getListPages(username,1,1)threads = []# startThread = MainCsdn(username=username, times=0, pages=pages)# startThread.start()# startThread.join()for i in range(1, visitnum + 1):thread = MainCsdn(username=username, times=i, pages=pages)thread.start()threads.append(thread)# 等待所有线程完成for t in threads:t.join()print("退出主线程")if __name__ == '__main__':main()
#!/usr/bin/python
# -*- coding:utf-8 -*-
import time'''
自定义工具方法,tools.py
'''def runTime(func):'记录程序运行时间'def newFunc(*args, **kwargs):start = time.clock()res = func(*args, **kwargs)end = time.clock()print("read: %f s" % (end - start))return resreturn newFuncdef log(content, file='test.log', type=1):if type == 1:f = open(file, 'a+', encoding='utf-8')else:f = open(file, 'w+', encoding='utf-8')content=str(content)f.write(content)