Certainly! To create a URL metadata scraper app using Node.js and Express, you can follow the steps below:
Step 1: Set Up Your Project
Create a new project folder:
mkdir url-metadata-scraper cd url-metadata-scraper
Initialize a new Node.js project:
npm init -y
Install necessary dependencies:
npm install express axios cheerio
Step 2: Create Your Folder Structure
Create the following folder structure:
url-metadata-scraper/
|-- src/
| |-- routes/
| |-- index.js
| |-- app.js
|-- views/
| |-- index.ejs
|-- server.js
Step 3: Set Up Express Server
In app.js
, set up an Express server:
const express = require('express');
const app = express();
const PORT = process.env.PORT || 3000;
app.set('view engine', 'ejs');
app.use(express.static('public'));
// Set up routes
const indexRoutes = require('./routes/index');
app.use('/', indexRoutes);
module.exports = app;
Step 4: Create Routes
In routes/index.js
, set up routes for the URL metadata scraper:
const express = require('express');
const router = express.Router();
const axios = require('axios');
const cheerio = require('cheerio');
router.get('/', (req, res) => {
res.render('index');
});
router.post('/scrape', async (req, res) => {
const { url } = req.body;
try {
const response = await axios.get(url);
const html = response.data;
const $ = cheerio.load(html);
const title = $('head title').text();
const description = $('meta[name="description"]').attr('content');
const image = $('meta[property="og:image"]').attr('content') || $('meta[property="twitter:image"]').attr('content');
res.json({ title, description, image });
} catch (error) {
console.error(error);
res.status(500).json({ error: 'Error scraping metadata. Please try again.' });
}
});
module.exports = router;
Step 5: Create Views
In views/index.ejs
, create the view for the URL metadata scraper:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="/styles/main.css">
<title>URL Metadata Scraper</title>
</head>
<body>
<h1>URL Metadata Scraper</h1>
<form id="urlForm">
<label for="url">Enter URL:</label>
<input type="text" id="url" name="url" required>
<button type="button" onclick="scrapeMetadata()">Scrape Metadata</button>
</form>
<div id="metadataResult" style="display: none;">
<h2>Metadata Result</h2>
<p id="title"></p>
<p id="description"></p>
<img id="image" alt="Metadata Image">
</div>
<script src="/scripts/main.js"></script>
</body>
</html>
Step 6: Create CSS Styles
In public/styles/main.css
, add your styles:
body {
font-family: 'Arial', sans-serif;
margin: 20px;
}
h1, h2 {
color: #333;
}
form {
margin-bottom: 20px;
}
label {
font-weight: bold;
}
input {
width: 100%;
padding: 8px;
margin-top: 8px;
margin-bottom: 16px;
}
button {
background-color: #007BFF;
color: #fff;
padding: 10px;
cursor: pointer;
}
#metadataResult {
margin-top: 20px;
}
img {
max-width: 100%;
height: auto;
margin-top: 10px;
}
Step 7: Create JavaScript for URL Metadata Scraper
In public/scripts/main.js
, add JavaScript to handle the URL metadata scraping:
async function scrapeMetadata() {
const url = document.getElementById('url').value;
const metadataResult = document.getElementById('metadataResult');
try {
const response = await fetch('/scrape', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ url }),
});
const data = await response.json();
document.getElementById('title').innerText = `Title: ${data.title || 'N/A'}`;
document.getElementById('description').innerText = `Description: ${data.description || 'N/A'}`;
document.getElementById('image').src = data.image || '';
metadataResult.style.display = 'block';
} catch (error) {
console.error(error);
metadataResult.style.display = 'none';
alert('Error scraping metadata. Please try again.');
}
}
Step 8: Run Your Application
In server.js
, use the exported app
to start the application:
const app = require('./src/app');
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
console.log(`Server is running on http://localhost:${PORT}`);
});
Start the server:
node server.js
Visit http://localhost:3000 in your browser. You should see the URL Metadata Scraper app where you can enter a URL, click the "Scrape Metadata" button, and view the scraped metadata.