added rate limiting + handling error and rate limits + better headers
This commit is contained in:
302
index.js
302
index.js
@@ -2,16 +2,56 @@ const cheerio = require("cheerio");
|
|||||||
const axios = require("axios");
|
const axios = require("axios");
|
||||||
const randomUseragent = require("random-useragent");
|
const randomUseragent = require("random-useragent");
|
||||||
|
|
||||||
|
// Utility functions
|
||||||
|
const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
|
|
||||||
|
// Cache implementation
|
||||||
|
class JobCache {
|
||||||
|
constructor() {
|
||||||
|
this.cache = new Map();
|
||||||
|
this.TTL = 1000 * 60 * 60; // 1 hour
|
||||||
|
}
|
||||||
|
|
||||||
|
set(key, value) {
|
||||||
|
this.cache.set(key, {
|
||||||
|
data: value,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
get(key) {
|
||||||
|
const item = this.cache.get(key);
|
||||||
|
if (!item) return null;
|
||||||
|
if (Date.now() - item.timestamp > this.TTL) {
|
||||||
|
this.cache.delete(key);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return item.data;
|
||||||
|
}
|
||||||
|
|
||||||
|
clear() {
|
||||||
|
const now = Date.now();
|
||||||
|
for (const [key, value] of this.cache.entries()) {
|
||||||
|
if (now - value.timestamp > this.TTL) {
|
||||||
|
this.cache.delete(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const cache = new JobCache();
|
||||||
|
|
||||||
|
// Main query function
|
||||||
module.exports.query = (queryObject) => {
|
module.exports.query = (queryObject) => {
|
||||||
const query = new Query(queryObject);
|
const query = new Query(queryObject);
|
||||||
console.log(query.url(0));
|
|
||||||
return query.getJobs();
|
return query.getJobs();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Query constructor
|
||||||
function Query(queryObj) {
|
function Query(queryObj) {
|
||||||
this.host = queryObj.host || "www.linkedin.com";
|
this.host = queryObj.host || "www.linkedin.com";
|
||||||
this.keyword = queryObj.keyword?.trim().replace(" ", "+") || "";
|
this.keyword = queryObj.keyword?.trim().replace(/\s+/g, "+") || "";
|
||||||
this.location = queryObj.location?.trim().replace(" ", "+") || "";
|
this.location = queryObj.location?.trim().replace(/\s+/g, "+") || "";
|
||||||
this.dateSincePosted = queryObj.dateSincePosted || "";
|
this.dateSincePosted = queryObj.dateSincePosted || "";
|
||||||
this.jobType = queryObj.jobType || "";
|
this.jobType = queryObj.jobType || "";
|
||||||
this.remoteFilter = queryObj.remoteFilter || "";
|
this.remoteFilter = queryObj.remoteFilter || "";
|
||||||
@@ -22,13 +62,14 @@ function Query(queryObj) {
|
|||||||
this.page = Number(queryObj.page) || 0;
|
this.page = Number(queryObj.page) || 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Query prototype methods
|
||||||
Query.prototype.getDateSincePosted = function () {
|
Query.prototype.getDateSincePosted = function () {
|
||||||
const dateRange = {
|
const dateRange = {
|
||||||
"past month": "r2592000",
|
"past month": "r2592000",
|
||||||
"past week": "r604800",
|
"past week": "r604800",
|
||||||
"24hr": "r86400",
|
"24hr": "r86400",
|
||||||
};
|
};
|
||||||
return dateRange[this.dateSincePosted.toLowerCase()] ?? "";
|
return dateRange[this.dateSincePosted.toLowerCase()] || "";
|
||||||
};
|
};
|
||||||
|
|
||||||
Query.prototype.getExperienceLevel = function () {
|
Query.prototype.getExperienceLevel = function () {
|
||||||
@@ -40,8 +81,9 @@ Query.prototype.getExperienceLevel = function () {
|
|||||||
director: "5",
|
director: "5",
|
||||||
executive: "6",
|
executive: "6",
|
||||||
};
|
};
|
||||||
return experienceRange[this.experienceLevel.toLowerCase()] ?? "";
|
return experienceRange[this.experienceLevel.toLowerCase()] || "";
|
||||||
};
|
};
|
||||||
|
|
||||||
Query.prototype.getJobType = function () {
|
Query.prototype.getJobType = function () {
|
||||||
const jobTypeRange = {
|
const jobTypeRange = {
|
||||||
"full time": "F",
|
"full time": "F",
|
||||||
@@ -53,8 +95,9 @@ Query.prototype.getJobType = function () {
|
|||||||
volunteer: "V",
|
volunteer: "V",
|
||||||
internship: "I",
|
internship: "I",
|
||||||
};
|
};
|
||||||
return jobTypeRange[this.jobType.toLowerCase()] ?? "";
|
return jobTypeRange[this.jobType.toLowerCase()] || "";
|
||||||
};
|
};
|
||||||
|
|
||||||
Query.prototype.getRemoteFilter = function () {
|
Query.prototype.getRemoteFilter = function () {
|
||||||
const remoteFilterRange = {
|
const remoteFilterRange = {
|
||||||
"on-site": "1",
|
"on-site": "1",
|
||||||
@@ -62,8 +105,9 @@ Query.prototype.getRemoteFilter = function () {
|
|||||||
remote: "2",
|
remote: "2",
|
||||||
hybrid: "3",
|
hybrid: "3",
|
||||||
};
|
};
|
||||||
return remoteFilterRange[this.remoteFilter.toLowerCase()] ?? "";
|
return remoteFilterRange[this.remoteFilter.toLowerCase()] || "";
|
||||||
};
|
};
|
||||||
|
|
||||||
Query.prototype.getSalary = function () {
|
Query.prototype.getSalary = function () {
|
||||||
const salaryRange = {
|
const salaryRange = {
|
||||||
40000: "1",
|
40000: "1",
|
||||||
@@ -72,113 +116,193 @@ Query.prototype.getSalary = function () {
|
|||||||
100000: "4",
|
100000: "4",
|
||||||
120000: "5",
|
120000: "5",
|
||||||
};
|
};
|
||||||
return salaryRange[this.salary.toLowerCase()] ?? "";
|
return salaryRange[this.salary] || "";
|
||||||
};
|
};
|
||||||
|
|
||||||
Query.prototype.getPage = function () {
|
Query.prototype.getPage = function () {
|
||||||
return this.page * 25;
|
return this.page * 25;
|
||||||
};
|
};
|
||||||
|
|
||||||
Query.prototype.url = function (start) {
|
Query.prototype.url = function (start) {
|
||||||
let query = `https://${this.host}/jobs-guest/jobs/api/seeMoreJobPostings/search?`;
|
let query = `https://${this.host}/jobs-guest/jobs/api/seeMoreJobPostings/search?`;
|
||||||
if (this.keyword !== "") query += `keywords=${this.keyword}`;
|
|
||||||
if (this.location !== "") query += `&location=${this.location}`;
|
const params = new URLSearchParams();
|
||||||
if (this.getDateSincePosted() !== "")
|
|
||||||
query += `&f_TPR=${this.getDateSincePosted()}`;
|
if (this.keyword) params.append("keywords", this.keyword);
|
||||||
if (this.getSalary() !== "") query += `&f_SB2=${this.getSalary()}`;
|
if (this.location) params.append("location", this.location);
|
||||||
if (this.getExperienceLevel() !== "")
|
if (this.getDateSincePosted())
|
||||||
query += `&f_E=${this.getExperienceLevel()}`;
|
params.append("f_TPR", this.getDateSincePosted());
|
||||||
if (this.getRemoteFilter() !== "") query += `&f_WT=${this.getRemoteFilter()}`;
|
if (this.getSalary()) params.append("f_SB2", this.getSalary());
|
||||||
if (this.getJobType() !== "") query += `&f_JT=${this.getJobType()}`;
|
if (this.getExperienceLevel())
|
||||||
if (this.getPage() !== "") query += `&start=${start + this.getPage()};`;
|
params.append("f_E", this.getExperienceLevel());
|
||||||
if (this.sortBy == "recent" || this.sortBy == "relevant") {
|
if (this.getRemoteFilter()) params.append("f_WT", this.getRemoteFilter());
|
||||||
let sortMethod = "R";
|
if (this.getJobType()) params.append("f_JT", this.getJobType());
|
||||||
if (this.sortBy == "recent") sortMethod = "DD";
|
|
||||||
query += `&sortBy=${sortMethod}`;
|
params.append("start", start + this.getPage());
|
||||||
}
|
|
||||||
return encodeURI(query);
|
if (this.sortBy === "recent") params.append("sortBy", "DD");
|
||||||
|
else if (this.sortBy === "relevant") params.append("sortBy", "R");
|
||||||
|
|
||||||
|
return query + params.toString();
|
||||||
};
|
};
|
||||||
|
|
||||||
Query.prototype.getJobs = async function () {
|
Query.prototype.getJobs = async function () {
|
||||||
|
let allJobs = [];
|
||||||
|
let start = 0;
|
||||||
|
const BATCH_SIZE = 25;
|
||||||
|
let hasMore = true;
|
||||||
|
let consecutiveErrors = 0;
|
||||||
|
const MAX_CONSECUTIVE_ERRORS = 3;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
let parsedJobs,
|
// Check cache first
|
||||||
resultCount = 1,
|
const cacheKey = this.url(0);
|
||||||
start = 0,
|
const cachedJobs = cache.get(cacheKey);
|
||||||
jobLimit = this.limit,
|
if (cachedJobs) {
|
||||||
allJobs = [];
|
console.log("Returning cached results");
|
||||||
|
return cachedJobs;
|
||||||
|
}
|
||||||
|
|
||||||
while (resultCount > 0) {
|
while (hasMore) {
|
||||||
const userAgent = randomUseragent.getRandom();
|
try {
|
||||||
|
const jobs = await this.fetchJobBatch(start);
|
||||||
|
|
||||||
const { data } = await axios.get(this.url(start), {
|
if (!jobs || jobs.length === 0) {
|
||||||
headers: {
|
hasMore = false;
|
||||||
"User-Agent": userAgent,
|
break;
|
||||||
Accept:
|
}
|
||||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
|
||||||
"Accept-Language": "en-US,en;q=0.9",
|
|
||||||
"Accept-Encoding": "gzip, deflate, br",
|
|
||||||
Connection: "keep-alive",
|
|
||||||
"Upgrade-Insecure-Requests": "1",
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
const $ = cheerio.load(data);
|
allJobs.push(...jobs);
|
||||||
const jobs = $("li");
|
console.log(`Fetched ${jobs.length} jobs. Total: ${allJobs.length}`);
|
||||||
resultCount = jobs.length;
|
|
||||||
console.log("I got ", jobs.length, " jobs");
|
|
||||||
|
|
||||||
parsedJobs = parseJobList(data);
|
if (this.limit && allJobs.length >= this.limit) {
|
||||||
allJobs.push(...parsedJobs);
|
allJobs = allJobs.slice(0, this.limit);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
start += 25;
|
// Reset error counter on successful fetch
|
||||||
|
consecutiveErrors = 0;
|
||||||
|
start += BATCH_SIZE;
|
||||||
|
|
||||||
if (jobLimit != 0 && allJobs.length > jobLimit) {
|
// Add reasonable delay between requests
|
||||||
while (allJobs.length != jobLimit) allJobs.pop();
|
await delay(2000 + Math.random() * 1000);
|
||||||
return allJobs;
|
} catch (error) {
|
||||||
|
consecutiveErrors++;
|
||||||
|
console.error(
|
||||||
|
`Error fetching batch (attempt ${consecutiveErrors}):`,
|
||||||
|
error.message
|
||||||
|
);
|
||||||
|
|
||||||
|
if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
|
||||||
|
console.log("Max consecutive errors reached. Stopping.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exponential backoff
|
||||||
|
await delay(Math.pow(2, consecutiveErrors) * 1000);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cache results if we got any
|
||||||
|
if (allJobs.length > 0) {
|
||||||
|
cache.set(this.url(0), allJobs);
|
||||||
|
}
|
||||||
|
|
||||||
return allJobs;
|
return allJobs;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(error);
|
console.error("Fatal error in job fetching:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Query.prototype.fetchJobBatch = async function (start) {
|
||||||
|
const headers = {
|
||||||
|
"User-Agent": randomUseragent.getRandom(),
|
||||||
|
Accept: "application/json, text/javascript, */*; q=0.01",
|
||||||
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
|
"Accept-Encoding": "gzip, deflate, br",
|
||||||
|
Referer: "https://www.linkedin.com/jobs",
|
||||||
|
"X-Requested-With": "XMLHttpRequest",
|
||||||
|
Connection: "keep-alive",
|
||||||
|
"Sec-Fetch-Dest": "empty",
|
||||||
|
"Sec-Fetch-Mode": "cors",
|
||||||
|
"Sec-Fetch-Site": "same-origin",
|
||||||
|
"Cache-Control": "no-cache",
|
||||||
|
Pragma: "no-cache",
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await axios.get(this.url(start), {
|
||||||
|
headers,
|
||||||
|
validateStatus: function (status) {
|
||||||
|
return status === 200;
|
||||||
|
},
|
||||||
|
timeout: 10000,
|
||||||
|
});
|
||||||
|
|
||||||
|
return parseJobList(response.data);
|
||||||
|
} catch (error) {
|
||||||
|
if (error.response?.status === 429) {
|
||||||
|
throw new Error("Rate limit reached");
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
function parseJobList(jobData) {
|
function parseJobList(jobData) {
|
||||||
const $ = cheerio.load(jobData);
|
try {
|
||||||
const jobs = $("li");
|
const $ = cheerio.load(jobData);
|
||||||
|
const jobs = $("li");
|
||||||
|
|
||||||
const jobObjects = jobs
|
return jobs
|
||||||
.map((index, element) => {
|
.map((index, element) => {
|
||||||
const job = $(element);
|
try {
|
||||||
const position = job.find(".base-search-card__title").text().trim() || "";
|
const job = $(element);
|
||||||
const company =
|
const position = job.find(".base-search-card__title").text().trim();
|
||||||
job.find(".base-search-card__subtitle").text().trim() || "";
|
const company = job.find(".base-search-card__subtitle").text().trim();
|
||||||
const location =
|
const location = job.find(".job-search-card__location").text().trim();
|
||||||
job.find(".job-search-card__location").text().trim() || "";
|
const dateElement = job.find("time");
|
||||||
const date = job.find("time").attr("datetime") || "";
|
const date = dateElement.attr("datetime");
|
||||||
const salary =
|
const salary = job
|
||||||
job
|
.find(".job-search-card__salary-info")
|
||||||
.find(".job-search-card__salary-info")
|
.text()
|
||||||
.text()
|
.trim()
|
||||||
.trim()
|
.replace(/\s+/g, " ");
|
||||||
.replace(/\n/g, "")
|
const jobUrl = job.find(".base-card__full-link").attr("href");
|
||||||
.replaceAll(" ", "") || "";
|
const companyLogo = job
|
||||||
const jobUrl = job.find(".base-card__full-link").attr("href") || "";
|
.find(".artdeco-entity-image")
|
||||||
const companyLogo =
|
.attr("data-delayed-url");
|
||||||
job.find(".artdeco-entity-image").attr("data-delayed-url") || "";
|
const agoTime = job.find(".job-search-card__listdate").text().trim();
|
||||||
const agoTime =
|
|
||||||
job.find(".job-search-card__listdate").text().trim() || "";
|
|
||||||
return {
|
|
||||||
position: position,
|
|
||||||
company: company,
|
|
||||||
companyLogo: companyLogo,
|
|
||||||
location: location,
|
|
||||||
date: date,
|
|
||||||
agoTime: agoTime,
|
|
||||||
salary: salary,
|
|
||||||
jobUrl: jobUrl,
|
|
||||||
};
|
|
||||||
})
|
|
||||||
.get();
|
|
||||||
|
|
||||||
return jobObjects;
|
// Only return job if we have at least position and company
|
||||||
|
if (!position || !company) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
position,
|
||||||
|
company,
|
||||||
|
location,
|
||||||
|
date,
|
||||||
|
salary: salary || "Not specified",
|
||||||
|
jobUrl: jobUrl || "",
|
||||||
|
companyLogo: companyLogo || "",
|
||||||
|
agoTime: agoTime || "",
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
console.warn(`Error parsing job at index ${index}:`, err.message);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.get()
|
||||||
|
.filter(Boolean);
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error parsing job list:", error);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Export additional utilities for testing and monitoring
|
||||||
|
module.exports.JobCache = JobCache;
|
||||||
|
module.exports.clearCache = () => cache.clear();
|
||||||
|
module.exports.getCacheSize = () => cache.cache.size;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "linkedin-jobs-api",
|
"name": "linkedin-jobs-api",
|
||||||
"version": "1.0.5",
|
"version": "1.0.6",
|
||||||
"description": "advanced node.js package for getting job listings from LinkedIn",
|
"description": "advanced node.js package for getting job listings from LinkedIn",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|||||||
Reference in New Issue
Block a user