Hello
I am having a hard time getting this to work with the following website: https://massart.edu/news-category/massart-news
Here is the code for my pagescraper.js
const scraperObject = {
//url: 'http://books.toscrape.com',
url: 'https://massart.edu/news-category/massart-news',
async scraper(browser){
let page = await browser.newPage();
console.log(`Navigating to ${this.url}...`);
await page.goto(this.url, {waitUntil: 'domcontentloaded'});
// Wait for the required DOM to be rendered
//await page.waitForSelector('.l_main'); //need to figure .page_inner
await page.waitForSelector('.layout-region.content-main');
// Get the link to all the required books
//let urls = await page.$$eval('section ol > li', links => {
//let urls = await page.$$eval('section main > article >div', links => {
let urls = await page.$$eval('article div > h2', links => {
// Make sure the book to be scraped is in stock
links = links.filter(link => link.querySelector('.field__items > i'))
// Extract the links from the data
links = links.map(el => el.querySelector('h2 > a'))
return links;
});
console.log(urls);
}
}
module.exports = scraperObject;
This textbox defaults to using Markdown to format your answer.
You can type !ref in this text area to quickly search our full set of tutorials, documentation & marketplace offerings and insert the link!