Testing – How to automate Web browser by Puppeteer?

Setup

Create NodeJS project

npm init
yarn add puppeteer jsdom xpath

Code template

let fs = require("fs");
let puppeteer = require('puppeteer');
const jsdom = require("jsdom");
const xpath = require('xpath');
const { JSDOM } = jsdom;
const path = require('path');

////////////////////////////////////
// Config constants
const openURL = "https://google.com";
const exportFilePath = "output/main.html";

////////////////////////////////////////////
// Scrape HTML structure
(async () => {
	
    // Initialize
    let browser = await puppeteer.launch({ 
        headless: false);
    let page = await browser.newPage();
	
    // Open URL
    console.log(openURL);
    await page.goto(openURL, {waitUntil: 'networkidle0'});
    // or use this if above code didn't wait until page is fully loaded
    // await page.goto(openURL, {waitUntil: 'networkidle2', timeout: 9900000});

    // Save to html file
    let html = await page.evaluate('new XMLSerializer().serializeToString(document.doctype) + document.documentElement.outerHTML');

    /**********************
    // Save HTML file
    fs.writeFileSync(filePath, html, function(err){
        console.log('html file successfully written!');
    });
    console.log('html file successfully written!');

    // Read html file
    var absFilePath = path.join(__dirname, filePath);
    */
    try {
        /*****************************
        // Read HTML file
        html = fs.readFileSync(absFilePath, 'utf8');
        */

        // Read DOM
        const dom = new JSDOM(html);
        const document = dom.window.document;
        
        // Work with DOM
        // ...

        // Close the browser
	await browser.close();
})();

How to open Chrome app, not Chromium?

  • Open Terminal
    /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 --no-first-run --no-default-browser-check --user-data-dir=$(mktemp -d -t 'chrome-remote_data_dir')
  • You will see Chrome is opened, and there is line in Terminal like this
    DevTools listening on ws://127.0.0.1:9222/devtools/browser/a5541788-9d01-42f2-ac0b-f86cb5d3aecf
  • Update JS code as below
    const wsChromeEndpointurl = 'ws://127.0.0.1:9222/devtools/browser/a5541788-9d01-42f2-ac0b-f86cb5d3aecf';
    const browser = await puppeteer.connect({
        browserWSEndpoint: wsChromeEndpointurl,
    });
    let page = await browser.newPage();
  • Avoid calling browser.close();, if not you have to run step 1 again

Ref: Connect to existing Chrome

How to run multiple instances at the same time?

Use puppeteer-cluster

const { Cluster } = require('puppeteer-cluster');

(async () => {

    const cluster = await Cluster.launch({
        concurrency: Cluster.CONCURRENCY_CONTEXT,
        maxConcurrency: 2,
        puppeteerOptions: {headless: false}
    });

    await cluster.task(async ({ page, data: url }) => {       
        // Open URL
        await page.goto(openURL);

        // Do something
        ...
    });
    
    cluster.queue('http://google.com');
    cluster.queue('https://stackoverflow.com/');

    await cluster.idle();
    await cluster.close();
})();

However there is no feature to set puppeteer.connect(), so it’s impossible to open Chrome app but Chromium

puppeteer-cluster/issues/220

How to debug puppeteer?

  • Add debugger and console.log into your codes
  • Launch puppeteer with this option
puppeteer.launch({devtools: true})
  • Then, browser will stops, you can then check console log

How to click a button?

HTML codes

<a role="button" id="button_id">Click Me Now</a>

Puppeteer codes

await page.waitFor('a[id=button_id]');
await page.$eval( 'a#button_id', form => form.click() );

How to click on an element?

// Quickest way
// <li class='react-tabs_tab'>AAA</li>
// <li class='react-tabs_tab'>BBB</li>
// <li class='react-tabs_tab'>CCC</li>

await page.$$eval('li.react-tabs__tab', (elHandles) =>
  elHandles.every((el) => {
    if (el?.textContent === 'BBB') {
      el.click()
      return false
    }

    return true
  })
)

How to fill an input field?

await page.$eval('input[name=search]', el => el.value = 'ABCDEFGH');

or

await page.type('input[name=search]]', 'ABCDEFGH', {delay: 20})

How to sleep for a certain of time?

  // Option 1 - resolving a promise when `setTimeout` finishes
  const sleep = duration => new Promise(resolve => setTimeout(resolve, duration));
  await sleep(3000);

  // Option 2 - if we have a page instance, just using `waitFor`
  await page.waitFor(3000);

How to emulate iPhone X?

 let page = await browser.newPage();

// Emulates an iPhone X
await page.setUserAgent('Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1');
await page.setViewport({ width: 375, height: 812 });

How to go back previous page?

await page.goBack();

How to work with DOM?

How to select elements by XPath?

Select elements containing text

    // nodes having promotion keyword
    const elements = xpath.select(`//*[text()[contains(., 'ABCDEF')]]`, document);

    // get first node
    const oneElement = elements[0];
    if (oneElement) {
        .....
    }

Parent, sibling nodes

const nextSibling = oneElement.nextSibling;
const parent = oneElement.parentNode;

Get attribute value

const href = oneElement.getAttribute('href')

Locate element by XPath

//div[@id="top-list"]/div[@data-list]
//div[@id="top-list"]/div[@data-list="1"]

Locate element by class name

/* <div class="Test"> */

//div[@class="Test"]
/* <div class="Test some-other-class"> */

//div[contains(concat(' ', normalize-space(@class), ' '), ' Test ')]
/* <div class="Test some-other-class"> */

//div[contains(concat(' ', @class, ' '), ' Test ')]

Check if element is visible

export async function isVisible(page, xPathSelector) {
  try {
    await page.waitForXPath(xPathSelector, { visible: true, timeout: 1000 })
    return true
  } catch {
    return false
  }
}

Be the first to comment

Leave a Reply

Your email address will not be published.


*