A web crawler program for crawling Echarts official website examples implemented by Puppeter
wangzhi
Posted on April 12, 2024
I have been idle and bored these days, thinking about whether I can simplify the use of Echarts configuration items. Currently, it is just a simple idea, and only the generated Storybook has been implemented for the time being. I am still thinking about simplifying the use of configuration items.
Project directory structure
- /bots
- /bots.mjs
- /template.mjs
- /package.json
- /assests/
package.json content
{
"name": "bots",
"private": true,
"version": "0.0.0",
"type": "module",
"scripts": {
"run": "node bots/bots.mjs"
},
"dependencies": {
"puppeteer": "^22.6.3"
},
"devDependencies": {}
}
bots.mjs content
import puppeteer from "puppeteer";
import fs from "node:fs";
import {
storiesTpl,
storiesArgs,
generOptions,
generOptionsWithFn,
} from "./template.mjs";
const ECHARTS_BASE_URL = "https://echarts.apache.org/examples/en/index.html";
function capitalizeFirstLetter(str) {
if (!str || str.length === 0) {
return "";
}
str = str.toLowerCase();
return str.charAt(0).toUpperCase() + str.slice(1);
}
(async function () {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Navigate the page to a URL
await page.goto(ECHARTS_BASE_URL);
// Set screen size
await page.setViewport({ width: 1080, height: 1024 });
// Type into search box
// const examples = await page.type([".example-list-panel"]);
const searchResultSelector = ".example-list-panel > div";
const results = await page.$$(searchResultSelector);
for (const element of results) {
// gener namespace
const ele = await element.$(".chart-type-head");
const title = await ele.evaluate((el) => el.textContent);
let namespace = title.split(" ").filter(Boolean);
namespace = namespace.slice(0, namespace.length - 1);
namespace = namespace
.map((item) => item.replace("\n", "").replace("/", ""))
.filter(Boolean)
.join("");
console.log(`${namespace} start`);
const instances = await element.$$(".row .example-list-item");
const components = [];
for (const instance of instances) {
// title
const titleElement = await instance.$(".example-title");
const subTitle = await titleElement.evaluate((el) => el.textContent);
const titles = subTitle
.split(" ")
.filter(Boolean)
.map((item) =>
item
.replace(/\+/g, "")
.replace(/\(/g, "")
.replace(/\)/g, "")
.replace(/-/g, "")
);
const title = titles.map((item) => capitalizeFirstLetter(item)).join("");
const link = await instance.$(".example-link");
const newPagePromise = new Promise((resolve) => {
browser.on("targetcreated", async (target) => {
if (target.type() === "page") {
const targetPage = await target.page();
const url = await targetPage.url();
if (url.includes("editor")) {
resolve(targetPage);
}
}
});
});
await link.click();
const newPage = await newPagePromise;
await newPage.setViewport({ width: 40000, height: 20000 });
await newPage.waitForSelector(".ace_text-layer");
await new Promise((resolve) => {
setTimeout(() => {
resolve();
}, 3000);
});
let content = await newPage.evaluate(
() => document.querySelector(".ace_text-layer").innerText
);
content = content
.replace(/\[\]/g, "[] as any")
.replace(/<click to see more...>/g, "")
.replace(/var/g, "let");
let options;
if (content.includes("myChart")) {
options = generOptionsWithFn({ options: content });
} else {
options = generOptions({ options: content });
}
components.push({ options, title });
await newPage.close();
}
const args = components
.filter(({ options }) => {
if (options.includes("$")) return false;
return true;
})
.map(({ options, title }) =>
storiesArgs({ options: options, name: title })
)
.join("\r\n");
const scripts = storiesTpl({
namespace: `Charts/${namespace}`,
components: args,
});
fs.writeFileSync(`./bots/assests/${namespace}.stories.ts`, scripts);
console.log(`${namespace} end`);
}
})();
template.mjs content
export const storiesTpl = ({ namespace, components }) => `
/* eslint-disable @typescript-eslint/no-unused-vars */
/* eslint-disable @typescript-eslint/no-explicit-any */
/* eslint-disable @typescript-eslint/no-unused-lets */
/* eslint-disable prefer-const */
//@ ts-nocheck
import type { Meta, StoryObj } from "@storybook/react";
// your components
import { Charts } from "./Charts";
import * as echarts from 'echarts'
const ROOT_PATH="https://echarts.apache.org/"
const meta = {
title: "${namespace}",
component: Charts,
parameters: {
layout: "centered",
},
tags: ["autodocs"],
// More on argTypes: https://storybook.js.org/docs/api/argtypes
argTypes: {},
} satisfies Meta<typeof Charts>;
export default meta;
type Story = StoryObj<typeof meta>;
${components}
`;
export const storiesArgs = ({ name, options }) => ` ;
export const ${name}: Story = {
args: {
${options},
},
};`;
export const generOptionsWithFn = ({
options,
}) => `getEchartsInstance: function(myChart: any) {
let option;
${options};
return option
}`;
export const generOptions = ({ options }) => `options:(function() {
let option;
${options};
return option
})()`;
After running, wait for a period of time, and the. stores.ts file will be generated in the assets path
FAQ
- Unfortunately, so far this is just an example and there is still a considerable way to go.
💖 💪 🙅 🚩
wangzhi
Posted on April 12, 2024
Join Our Newsletter. No Spam, Only the good stuff.
Sign up to receive the latest update from our blog.