为什么80%的码农都做不了架构师?>>>
使用nodejs抓取 https://themeforest.net 网页模版预览放入mongoose。
以后找模版就不用那么费劲了,我是太懒了。。。
执行图片:
代码:
var request = require('request');
var cheerio = require('cheerio');
var mongoose = require('mongoose');var Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/theme');
var Theme = new Schema({text: String,url: String,img: String
});
var ThemeModel = mongoose.model('Themes', Theme);
var urlPrefix = 'https://themeforest.net/';
var baseUrl = 'https://themeforest.net/category/site-templates/creative?page=';
var currentPage = 1;
var countPage = 60; // 抓取网页总页数
//延时
var itemCount = 0;
var itemLoad = 0;function getPageList(page) {if (page > countPage) {console.log('所有数据加载完毕!');process.exit(0);}console.log(`当前加载第 ${page} 页数据`);request(baseUrl + page, function(err, response, body) {if (!err && response.statusCode == 200) {analysisPage(body);} else {console.log('get page error url => ' + baseUrl + page, err);}});
}
getPageList(currentPage);function analysisPage(body) {var items;var url;var $ = cheerio.load(body);// 抽出列表 liitems = $('.js-google-analytics__list-event-container');itemCount = items.length;items.map(function(i, item) {item = $(item);var linkA = item.find('.js-google-analytics__list-event-trigger.t-link');var text = linkA.text();var url = linkA.attr('href');var img = item.find('.landscape-image-magnifier').attr('data-preview-url');saveFile(urlPrefix + url,text,img);});
}function saveFile(url, text, img) {var theme = new ThemeModel({url: url,text: text,img: img});theme.save(function(err) {if (err) {console.log('save mongoose err! ');} else {itemLoad++;if (itemLoad === itemCount) {// 已保存完数据清空当前加载数itemLoad = 0;getPageList(++currentPage);}}});
}
package.json
{"name": "theme","version": "1.0.0","description": "","main": "index.js","scripts": {"test": "echo \"Error: no test specified\" && exit 1"},"keywords": [],"author": "","license": "ISC","dependencies": {"cheerio": "^0.22.0","mongoose": "^4.7.0","request": "^2.79.0"}
}
更新一个前端查看页面:
var express = require('express');
var app = express();
var mongoose = require('mongoose');
var Schema = mongoose.Schema;var Theme = new Schema({text: String,url: String,img: String
});
var ThemeModel = mongoose.model('Themes', Theme);
var html = `
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><title>themes</title>
</head>
<body>
%{body}
</body>
</html>
`;mongoose.connect('mongodb://localhost/theme');
app.get('/', function(req, res) {var body = '<div>';var responseText = '';res.set('Content-Type', 'text/html');ThemeModel.find({}, function(err, items) {items.map(function(item, i) {body += [`<img src="${item.img}" />`,`<p><a href="${item.url}" target="_blank">${item.text}</a></p>`].join('');});body += '</div>';responseText = html.replace('%{body}', body);res.send(responseText);});
});var server = app.listen(3000, function() {var host = server.address().address;var port = server.address().port;console.log('Example app listening at http://%s:%s', host, port);
});