换行缩进不要用tab键!!!!
import requests
import urllib
import json
import os,sys
from lxml import etreedef main():f = open('test1.txt', 'w')path = os.getcwd()+'/image';print(path)if not os.path.isdir(path):os.makedirs(path)for i in range(1):url = 'https://movie.douban.com/j/search_subjects?type=tv&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start='+str(i*20)print(url)data = requests.get(url)res = json.loads(data.text)result = []for x in res["subjects"]:result.append(x["cover"])for i in range(0, len(result)):f.write(result[i]+'\n')filename = result[i].strip().split('/')[-1]print(result[i])#urllib.request.urlretrieve(result[i], filename)imagepath = path+"/"+filenameresponse = urllib.urlopen(result[i])cat_img = response.read()with open(imagepath, 'wb') as tag:tag.write(cat_img)if __name__ == '__main__':main()
数据从豆瓣的接口中获取的,并非是返回的网页中通过正则获取。
url = 'https://movie.douban.com/j/search_subjects?type=tv&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start='+str(i*20)
因此,把返回的数据进行json化,然后获取其中的list.
data = requests.get(url)res = json.loads(data.text)
循环获取subjects中的元素的cover,写入到result中。
result = []for x in res["subjects"]:result.append(x["cover"])
第一层for循环是为了改变页码,获取更多信息。
1.打开文件,写入数据
f = open('test1.txt', 'w')
2.文件存放路径
path = os.getcwd()+'/image';
if not os.path.isdir(path):os.makedirs(path)
3.获取爬去链接的后面的名字
filename = result[i].strip().split('/')[-1]
4.下载文件到本地
response = urllib.urlopen(result[i])cat_img = response.read()with open(imagepath, 'wb') as tag:tag.write(cat_img)
java 把文件上传到服务器
@GetMapping("/uploadImage")public Object uploadFile() throws Exception {String dirPath = "/Users/davi/Documents/GitHub/python/image";File dir = new File(dirPath);File[] files = dir.listFiles(); // 该文件目录下文件全部放入数组String type = "resource";final FileType fileType = FileType.valueOf(type.toUpperCase());FileWriter fw = new FileWriter("/Users/davi/Desktop/imageout.txt");for (int i = 0; i < files.length; i++) {if (files[i].isFile()) { // 判断是文件还是文件夹File file = files[i];FileInputStream fileInputStream = new FileInputStream(file);//个人的上传服务接口final MyFileCreateRequest createFileRequest = new MyFileCreateRequest(fileType, file.getName(), "multipart/form-data; boundary=----WebKitFormBoundaryNKE6Mjn6vK5PxlhK", fileInputStream);final MyFileInfoVo result = this.myservice.createFile(createFileRequest);System.out.println("upload image result:" + result.getUrl());fw.write(result.getUrl()+"\n");}}fw.close();return Results.singleResult(ResultCodes.SUCCESS);}
爬取的图片