|
@@ -303,7 +303,7 @@
|
|
|
},
|
|
|
rules:{ //
|
|
|
title: [
|
|
|
- { min: 0, max: 20, message: '不能超过 20 个字符', trigger: 'blur' },
|
|
|
+ { min: 0, max: 60, message: '不能超过 60 个字符', trigger: 'blur' },
|
|
|
{ pattern: /^[^\\/:*?"<>|]*$/, message: '标题中不能包含 \\ / : * ? " < > |', trigger: 'blur' }
|
|
|
],
|
|
|
url: [
|
|
@@ -1512,7 +1512,7 @@
|
|
|
|
|
|
let m1 = document.querySelectorAll('img[class^=PicGallery--thumbnailPic--]');
|
|
|
let m2 = document.querySelectorAll('img[class*=thumbnailPic--]');
|
|
|
- if(document.querySelector('iframe') && m1.length == 0 && m2.length == 0){ // 出现弹窗而且没有主图,判断未拦截模式
|
|
|
+ if(document.querySelector('iframe') && m1.length == 0 && m2.length == 0){ // 出现弹窗而且没有主图,判断为拦截模式
|
|
|
await tbBrowser.close();
|
|
|
urlInfo.status = '6';
|
|
|
resolve(true);
|
|
@@ -2007,8 +2007,6 @@
|
|
|
await browser.close();
|
|
|
},
|
|
|
|
|
|
-
|
|
|
-
|
|
|
// 5-小红书下载
|
|
|
async redDownload(urlInfo){
|
|
|
let task = await new Promise((resolve,reject) =>{
|
|
@@ -2021,113 +2019,131 @@
|
|
|
puppeteer.use(StealthPlugin());
|
|
|
const browser = await puppeteer.launch({
|
|
|
executablePath: puppeteer.executablePath().replace('win32-1', 'win64-1'),
|
|
|
- userDataDir: os.tmpdir() + separator + 'chrome-data-capture',
|
|
|
- args: [
|
|
|
- '--start-maximized',
|
|
|
- '--no-sandbox',
|
|
|
- '--disable-setuid-sandbox',
|
|
|
- '--disable-blink-features=AutomationControlled',
|
|
|
- ]
|
|
|
+ userDataDir: os.tmpdir() + separator + 'chrome-data-capture'
|
|
|
});
|
|
|
const page = await browser.newPage();
|
|
|
+ await page.goto(urlInfo.url, {waitUntil : 'networkidle2'});
|
|
|
|
|
|
+ if(urlInfo.title){
|
|
|
+ if (fs.existsSync(this.downloadDir + separator + pjson.softInfo.softName + separator + urlInfo.title)) {
|
|
|
+ urlInfo.newPath = this.downloadDir + separator + pjson.softInfo.softName + separator + urlInfo.title;
|
|
|
+ } else {
|
|
|
+ fs.mkdirSync(this.downloadDir + separator + pjson.softInfo.softName + separator + urlInfo.title);
|
|
|
+ urlInfo.newPath = this.downloadDir + separator + pjson.softInfo.softName + separator + urlInfo.title;
|
|
|
+ }
|
|
|
+ }else{
|
|
|
+ await this.getTitle(page, urlInfo); // 生成页面标题对应的文件夹
|
|
|
+ }
|
|
|
+ urlInfo.status = '3';
|
|
|
+
|
|
|
+ let responseVideo = [];
|
|
|
page.on('response', async(response) => {
|
|
|
- // 检查响应的 MIME 类型是否以 'image/' 开头
|
|
|
- if (response.headers()['content-type'] && response.headers()['content-type'].startsWith('image/') && response.headers()['content-length']) {
|
|
|
- let imgArr = ['gif', 'jpeg', 'png', 'webp', 'svg', 'tiff', 'bmp', 'ico', 'avif'];
|
|
|
- let imgType = 'jpg';
|
|
|
- let isBase = false;
|
|
|
- imgArr.map((item, index) => {
|
|
|
- if(response.headers()['content-type'].indexOf(item) > -1){
|
|
|
- imgType = item;
|
|
|
- if(item == 'jpeg'){
|
|
|
- imgType = 'jpg';
|
|
|
- }else if(item == 'avif'){
|
|
|
- imgType = 'png';
|
|
|
- }
|
|
|
- }
|
|
|
- });
|
|
|
-
|
|
|
- let url = response.url();
|
|
|
- let regex = /^data:image\/[\w|+|-]+;base64,/;
|
|
|
- if(regex.exec(url)){
|
|
|
- url = response.url().replace(/^data:image\/[\w|+|-]+;base64,/, '');
|
|
|
- isBase = true;
|
|
|
+ // 检查响应的 MIME 类型是否以 'video/' 开头
|
|
|
+ if (response.headers()['content-type'] && response.headers()['content-type'].startsWith('video/')) {
|
|
|
+ responseVideo.push(response.url());
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ let noteImg = await page.$$('img[class^=note-slider-img]');
|
|
|
+ if(noteImg.length == 0){
|
|
|
+ let refreshImg = await page.$$('.xgplayer-error-refresh');
|
|
|
+ let startImg = await page.$$('.xgplayer-start');
|
|
|
+ //点击播放生成视频
|
|
|
+ if(refreshImg.length == 0){
|
|
|
+ await page.waitForSelector('.xgplayer-start', { visible: true });
|
|
|
+ await page.click('.xgplayer-start');
|
|
|
+ }else{
|
|
|
+ await page.waitForSelector('.xgplayer-error-refresh', { visible: true });
|
|
|
+ await page.click('.xgplayer-error-refresh');
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //detailImg:详情图;skuImg:sku图片;commentImg: 评论图;video: 视频
|
|
|
+ const imgInfo = await page.evaluate((authority, execNum) => {
|
|
|
+ let outObj = {
|
|
|
+ mainImg: [],
|
|
|
+ detailImg: [],
|
|
|
+ skuImg: [],
|
|
|
+ commentImg: [],
|
|
|
+ video: []
|
|
|
+ };
|
|
|
+
|
|
|
+ // 正则表达式匹配字符 重写图片路径
|
|
|
+ let regex = /\.(.{3,4})_[0-9a-zA-z]+\.(.{3,4})_\.(.{3,4})/;
|
|
|
+
|
|
|
+ //主图
|
|
|
+ let arr1 = document.querySelectorAll('img[class^=note-slider-img]');
|
|
|
+ for(let i=0; i< arr1.length; i++){
|
|
|
+ let mainImgUrl = arr1[i].src;
|
|
|
+ let result = regex.exec(mainImgUrl);
|
|
|
+ if(result){
|
|
|
+ mainImgUrl = mainImgUrl.replace(result[0], '.'+result[1]);
|
|
|
}
|
|
|
-
|
|
|
- let imgInfo = {
|
|
|
- url: url,
|
|
|
- contentType: response.headers()['content-type'],
|
|
|
- status: response.status(),
|
|
|
- imgType: imgType,
|
|
|
- isBase: isBase
|
|
|
+ if(!authority && i < execNum){
|
|
|
+ outObj.mainImg.push(mainImgUrl);
|
|
|
}
|
|
|
-
|
|
|
- if(urlInfo.title){
|
|
|
- if (fs.existsSync(this.downloadDir + separator + pjson.softInfo.softName + separator + urlInfo.title)) {
|
|
|
- urlInfo.newPath = this.downloadDir + separator + pjson.softInfo.softName + separator + urlInfo.title;
|
|
|
- } else {
|
|
|
- fs.mkdirSync(this.downloadDir + separator + pjson.softInfo.softName + separator + urlInfo.title);
|
|
|
- urlInfo.newPath = this.downloadDir + separator + pjson.softInfo.softName + separator + urlInfo.title;
|
|
|
- }
|
|
|
- }else{
|
|
|
- await this.getTitle(page, urlInfo); // 生成页面标题对应的文件夹
|
|
|
+ if(authority){
|
|
|
+ outObj.mainImg.push(mainImgUrl);
|
|
|
}
|
|
|
-
|
|
|
- let outputPath = urlInfo.newPath + '\\' + this.randomString(35) + '.' + imgInfo.imgType;
|
|
|
-
|
|
|
- urlInfo.status = '3';
|
|
|
- number++;
|
|
|
- if(!authority && number <= this.execNum){
|
|
|
- if(imgInfo.isBase){ //base64位图片下载
|
|
|
- this.downloadBaseImage(imgInfo.url, outputPath, urlInfo)
|
|
|
- }else{
|
|
|
- this.downloadImage(imgInfo.url, outputPath, urlInfo);
|
|
|
+ }
|
|
|
+ // 视频
|
|
|
+ let arr5 = document.querySelectorAll('video.lib-video');
|
|
|
+ for(let i=0; i< arr5.length; i++){
|
|
|
+ if(outObj.video.indexOf(arr5[i].src) == -1){
|
|
|
+ if(!authority && i < execNum){
|
|
|
+ outObj.video.push(arr5[i].src);
|
|
|
}
|
|
|
- }
|
|
|
-
|
|
|
- if(authority){
|
|
|
- if(imgInfo.isBase){ //base64位图片下载
|
|
|
- this.downloadBaseImage(imgInfo.url, outputPath, urlInfo)
|
|
|
- }else{
|
|
|
- this.downloadImage(imgInfo.url, outputPath, urlInfo);
|
|
|
+ if(authority){
|
|
|
+ outObj.video.push(arr5[i].src);
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
}
|
|
|
- });
|
|
|
-
|
|
|
- await page.goto(urlInfo.url, {waitUntil : 'networkidle2'});
|
|
|
+ return outObj;
|
|
|
+ }, authority, this.execNum);
|
|
|
|
|
|
- let pageInfo = await page.evaluate(() => {
|
|
|
- let cHeight = document.documentElement.clientHeight;
|
|
|
- let scrollHeight = document.body.scrollHeight;
|
|
|
- return {'scrollHeight': scrollHeight, 'cHeight': cHeight}
|
|
|
- });
|
|
|
+ if(imgInfo.mainImg.length >= 3){
|
|
|
+ imgInfo.mainImg = imgInfo.mainImg.slice(1, imgInfo.mainImg.length-1); //小红书轮播图第一个/最后一个和内容重复去掉
|
|
|
+ }
|
|
|
+ imgInfo.video = responseVideo;
|
|
|
|
|
|
- let scrollHeight = pageInfo.scrollHeight;
|
|
|
- let cHeight = pageInfo.cHeight;
|
|
|
+ // 主图下载
|
|
|
+ for(let j = 0; j < imgInfo.mainImg.length; j++){
|
|
|
+ let fileName = imgInfo.mainImg[j].split('/').pop();
|
|
|
+ if(fileName){
|
|
|
+ let queryIndex = fileName.indexOf('?');
|
|
|
+ if (queryIndex !== -1) {
|
|
|
+ fileName = fileName.substr(0, queryIndex);
|
|
|
+ }
|
|
|
+
|
|
|
+ let num = Number(j) + 1;
|
|
|
+ let suffix = '.webp';
|
|
|
+ if(fileName.lastIndexOf('.') > -1){
|
|
|
+ suffix = fileName.substr(fileName.lastIndexOf('.'));
|
|
|
+ }
|
|
|
+
|
|
|
+ let outputPath = urlInfo.newPath + '\\文章图' + num + suffix;
|
|
|
+ await this.downloadImage(imgInfo.mainImg[j], outputPath, urlInfo);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- let num = Math.ceil(scrollHeight / cHeight);
|
|
|
- let start = -1;
|
|
|
- let scrollInt = setInterval(async() => {
|
|
|
- start ++;
|
|
|
- await page.evaluate((start) => {
|
|
|
- let cHeight = document.documentElement.clientHeight;
|
|
|
- window.scrollTo({
|
|
|
- top: cHeight * start,
|
|
|
- behavior: "smooth"
|
|
|
- });
|
|
|
- }, start);
|
|
|
- if(start > num || start > 200){ // 防止页面过长,滚动200次自动停止
|
|
|
- clearInterval(scrollInt);
|
|
|
- await browser.close();
|
|
|
- urlInfo.status = '4';
|
|
|
- resolve(true);
|
|
|
- this.loading = false;
|
|
|
+ //视频下载
|
|
|
+ for(let j = 0; j < imgInfo.video.length; j++){
|
|
|
+ let fileName = imgInfo.video[j].split('/').pop();
|
|
|
+ if(fileName){
|
|
|
+ let queryIndex = fileName.indexOf('?');
|
|
|
+ if (queryIndex !== -1) {
|
|
|
+ fileName = fileName.substr(0, queryIndex);
|
|
|
+ }
|
|
|
+
|
|
|
+ let outputPath = urlInfo.newPath + '\\' + fileName;
|
|
|
+ await this.downloadImage(imgInfo.video[j], outputPath, urlInfo);
|
|
|
}
|
|
|
- }, 300);
|
|
|
+ }
|
|
|
|
|
|
+ await browser.close();
|
|
|
+ urlInfo.status = '4';
|
|
|
+ resolve(true);
|
|
|
+ this.loading = false;
|
|
|
}catch(e){
|
|
|
urlInfo.status = '5';
|
|
|
reject(e);
|
|
@@ -2213,10 +2229,10 @@
|
|
|
|
|
|
// 获取页面标题 - 生成对应的文件夹
|
|
|
async getTitle(page, urlInfo){
|
|
|
- // 已页面标题作为新建文件夹,保留前20个字
|
|
|
+ // 已页面标题作为新建文件夹,保留前60个字
|
|
|
let title = await page.title();
|
|
|
if(title){
|
|
|
- title = title.substring(0, 20);
|
|
|
+ title = title.substring(0, 60);
|
|
|
if(this.containsAnyChar(title, ['\\', '/', ':', '*', '?', '"', '<', '>', '|'])){ //判断是否含有特殊字符
|
|
|
title = title.replace(/[\\|/|:|*|?|"|<|>||]/g, "");
|
|
|
}
|