首先看我的目录结构:
node 实现简单爬取文字
index.js代码如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
| const cheerio = require('cheerio'); const http = require('http'); const iconv = require('iconv-lite'); const fs = require("fs"); const colors = require('colors');
http.get('http://www.shuoshuodaitupian.com/', function (sres) { var chunks = []; sres.on('data', function (chunk) { chunks.push(chunk); console.log('<<<------------------------------- 得到html ------------------------------->>>\n'.green); console.log("html:\n", chunks.toString(),"\n"); }); sres.on('end', function () { var titles = []; var html = iconv.decode(Buffer.concat(chunks), 'utf-8'); var $ = cheerio.load(html, { decodeEntities: false }); $('#snsBox .item').each(function (index, element) { var $element = $(element); titles.push({ text: $element.text() }); }); console.log('<<<------------------------------- 摘取目标元素完毕 ------------------------------->>>\n'.green);
console.log("目标元素:\n", titles,"\n"); var writerStream = fs.createWriteStream('data.json'); writerStream.write(JSON.stringify(titles), 'UTF8'); writerStream.end(); writerStream.on('finish', function () { console.log('<<<------------------------------- 写入完成 ------------------------------->>>\n'.green); }); writerStream.on('error', function (err) { console.log(err.stack); }); }); });
JAVASCRIPT
|
server.js代码如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| const express = require('express'); const fs = require("fs"); const colors = require('colors');
const data = fs.readFileSync('E:/My/nodeJS/capture/data.json').toString(); console.log(data,"\n");
const app = express(); app.get('/api/data', function (req, res) { res.send(data); }); const server = app.listen(9999, function () { console.log("接口地址为:","http://9999/api/data".green); });
JAVASCRIPT
|
需知:依赖模块没有安装的执行npm安装下
node 实现简单爬取文字
node 实现简单爬取文字
node 实现简单爬取文字