added rate limiting + handling error and rate limits + better headers
This commit is contained in:
302
index.js
302
index.js
@@ -2,16 +2,56 @@ const cheerio = require("cheerio");
|
||||
const axios = require("axios");
|
||||
const randomUseragent = require("random-useragent");
|
||||
|
||||
// Utility functions
|
||||
const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
||||
|
||||
// Cache implementation
|
||||
class JobCache {
|
||||
constructor() {
|
||||
this.cache = new Map();
|
||||
this.TTL = 1000 * 60 * 60; // 1 hour
|
||||
}
|
||||
|
||||
set(key, value) {
|
||||
this.cache.set(key, {
|
||||
data: value,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
get(key) {
|
||||
const item = this.cache.get(key);
|
||||
if (!item) return null;
|
||||
if (Date.now() - item.timestamp > this.TTL) {
|
||||
this.cache.delete(key);
|
||||
return null;
|
||||
}
|
||||
return item.data;
|
||||
}
|
||||
|
||||
clear() {
|
||||
const now = Date.now();
|
||||
for (const [key, value] of this.cache.entries()) {
|
||||
if (now - value.timestamp > this.TTL) {
|
||||
this.cache.delete(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const cache = new JobCache();
|
||||
|
||||
// Main query function
|
||||
module.exports.query = (queryObject) => {
|
||||
const query = new Query(queryObject);
|
||||
console.log(query.url(0));
|
||||
return query.getJobs();
|
||||
};
|
||||
|
||||
// Query constructor
|
||||
function Query(queryObj) {
|
||||
this.host = queryObj.host || "www.linkedin.com";
|
||||
this.keyword = queryObj.keyword?.trim().replace(" ", "+") || "";
|
||||
this.location = queryObj.location?.trim().replace(" ", "+") || "";
|
||||
this.keyword = queryObj.keyword?.trim().replace(/\s+/g, "+") || "";
|
||||
this.location = queryObj.location?.trim().replace(/\s+/g, "+") || "";
|
||||
this.dateSincePosted = queryObj.dateSincePosted || "";
|
||||
this.jobType = queryObj.jobType || "";
|
||||
this.remoteFilter = queryObj.remoteFilter || "";
|
||||
@@ -22,13 +62,14 @@ function Query(queryObj) {
|
||||
this.page = Number(queryObj.page) || 0;
|
||||
}
|
||||
|
||||
// Query prototype methods
|
||||
Query.prototype.getDateSincePosted = function () {
|
||||
const dateRange = {
|
||||
"past month": "r2592000",
|
||||
"past week": "r604800",
|
||||
"24hr": "r86400",
|
||||
};
|
||||
return dateRange[this.dateSincePosted.toLowerCase()] ?? "";
|
||||
return dateRange[this.dateSincePosted.toLowerCase()] || "";
|
||||
};
|
||||
|
||||
Query.prototype.getExperienceLevel = function () {
|
||||
@@ -40,8 +81,9 @@ Query.prototype.getExperienceLevel = function () {
|
||||
director: "5",
|
||||
executive: "6",
|
||||
};
|
||||
return experienceRange[this.experienceLevel.toLowerCase()] ?? "";
|
||||
return experienceRange[this.experienceLevel.toLowerCase()] || "";
|
||||
};
|
||||
|
||||
Query.prototype.getJobType = function () {
|
||||
const jobTypeRange = {
|
||||
"full time": "F",
|
||||
@@ -53,8 +95,9 @@ Query.prototype.getJobType = function () {
|
||||
volunteer: "V",
|
||||
internship: "I",
|
||||
};
|
||||
return jobTypeRange[this.jobType.toLowerCase()] ?? "";
|
||||
return jobTypeRange[this.jobType.toLowerCase()] || "";
|
||||
};
|
||||
|
||||
Query.prototype.getRemoteFilter = function () {
|
||||
const remoteFilterRange = {
|
||||
"on-site": "1",
|
||||
@@ -62,8 +105,9 @@ Query.prototype.getRemoteFilter = function () {
|
||||
remote: "2",
|
||||
hybrid: "3",
|
||||
};
|
||||
return remoteFilterRange[this.remoteFilter.toLowerCase()] ?? "";
|
||||
return remoteFilterRange[this.remoteFilter.toLowerCase()] || "";
|
||||
};
|
||||
|
||||
Query.prototype.getSalary = function () {
|
||||
const salaryRange = {
|
||||
40000: "1",
|
||||
@@ -72,113 +116,193 @@ Query.prototype.getSalary = function () {
|
||||
100000: "4",
|
||||
120000: "5",
|
||||
};
|
||||
return salaryRange[this.salary.toLowerCase()] ?? "";
|
||||
return salaryRange[this.salary] || "";
|
||||
};
|
||||
|
||||
Query.prototype.getPage = function () {
|
||||
return this.page * 25;
|
||||
};
|
||||
|
||||
Query.prototype.url = function (start) {
|
||||
let query = `https://${this.host}/jobs-guest/jobs/api/seeMoreJobPostings/search?`;
|
||||
if (this.keyword !== "") query += `keywords=${this.keyword}`;
|
||||
if (this.location !== "") query += `&location=${this.location}`;
|
||||
if (this.getDateSincePosted() !== "")
|
||||
query += `&f_TPR=${this.getDateSincePosted()}`;
|
||||
if (this.getSalary() !== "") query += `&f_SB2=${this.getSalary()}`;
|
||||
if (this.getExperienceLevel() !== "")
|
||||
query += `&f_E=${this.getExperienceLevel()}`;
|
||||
if (this.getRemoteFilter() !== "") query += `&f_WT=${this.getRemoteFilter()}`;
|
||||
if (this.getJobType() !== "") query += `&f_JT=${this.getJobType()}`;
|
||||
if (this.getPage() !== "") query += `&start=${start + this.getPage()};`;
|
||||
if (this.sortBy == "recent" || this.sortBy == "relevant") {
|
||||
let sortMethod = "R";
|
||||
if (this.sortBy == "recent") sortMethod = "DD";
|
||||
query += `&sortBy=${sortMethod}`;
|
||||
}
|
||||
return encodeURI(query);
|
||||
|
||||
const params = new URLSearchParams();
|
||||
|
||||
if (this.keyword) params.append("keywords", this.keyword);
|
||||
if (this.location) params.append("location", this.location);
|
||||
if (this.getDateSincePosted())
|
||||
params.append("f_TPR", this.getDateSincePosted());
|
||||
if (this.getSalary()) params.append("f_SB2", this.getSalary());
|
||||
if (this.getExperienceLevel())
|
||||
params.append("f_E", this.getExperienceLevel());
|
||||
if (this.getRemoteFilter()) params.append("f_WT", this.getRemoteFilter());
|
||||
if (this.getJobType()) params.append("f_JT", this.getJobType());
|
||||
|
||||
params.append("start", start + this.getPage());
|
||||
|
||||
if (this.sortBy === "recent") params.append("sortBy", "DD");
|
||||
else if (this.sortBy === "relevant") params.append("sortBy", "R");
|
||||
|
||||
return query + params.toString();
|
||||
};
|
||||
|
||||
Query.prototype.getJobs = async function () {
|
||||
let allJobs = [];
|
||||
let start = 0;
|
||||
const BATCH_SIZE = 25;
|
||||
let hasMore = true;
|
||||
let consecutiveErrors = 0;
|
||||
const MAX_CONSECUTIVE_ERRORS = 3;
|
||||
|
||||
try {
|
||||
let parsedJobs,
|
||||
resultCount = 1,
|
||||
start = 0,
|
||||
jobLimit = this.limit,
|
||||
allJobs = [];
|
||||
// Check cache first
|
||||
const cacheKey = this.url(0);
|
||||
const cachedJobs = cache.get(cacheKey);
|
||||
if (cachedJobs) {
|
||||
console.log("Returning cached results");
|
||||
return cachedJobs;
|
||||
}
|
||||
|
||||
while (resultCount > 0) {
|
||||
const userAgent = randomUseragent.getRandom();
|
||||
while (hasMore) {
|
||||
try {
|
||||
const jobs = await this.fetchJobBatch(start);
|
||||
|
||||
const { data } = await axios.get(this.url(start), {
|
||||
headers: {
|
||||
"User-Agent": userAgent,
|
||||
Accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
Connection: "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
},
|
||||
});
|
||||
if (!jobs || jobs.length === 0) {
|
||||
hasMore = false;
|
||||
break;
|
||||
}
|
||||
|
||||
const $ = cheerio.load(data);
|
||||
const jobs = $("li");
|
||||
resultCount = jobs.length;
|
||||
console.log("I got ", jobs.length, " jobs");
|
||||
allJobs.push(...jobs);
|
||||
console.log(`Fetched ${jobs.length} jobs. Total: ${allJobs.length}`);
|
||||
|
||||
parsedJobs = parseJobList(data);
|
||||
allJobs.push(...parsedJobs);
|
||||
if (this.limit && allJobs.length >= this.limit) {
|
||||
allJobs = allJobs.slice(0, this.limit);
|
||||
break;
|
||||
}
|
||||
|
||||
start += 25;
|
||||
// Reset error counter on successful fetch
|
||||
consecutiveErrors = 0;
|
||||
start += BATCH_SIZE;
|
||||
|
||||
if (jobLimit != 0 && allJobs.length > jobLimit) {
|
||||
while (allJobs.length != jobLimit) allJobs.pop();
|
||||
return allJobs;
|
||||
// Add reasonable delay between requests
|
||||
await delay(2000 + Math.random() * 1000);
|
||||
} catch (error) {
|
||||
consecutiveErrors++;
|
||||
console.error(
|
||||
`Error fetching batch (attempt ${consecutiveErrors}):`,
|
||||
error.message
|
||||
);
|
||||
|
||||
if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
|
||||
console.log("Max consecutive errors reached. Stopping.");
|
||||
break;
|
||||
}
|
||||
|
||||
// Exponential backoff
|
||||
await delay(Math.pow(2, consecutiveErrors) * 1000);
|
||||
}
|
||||
}
|
||||
|
||||
// Cache results if we got any
|
||||
if (allJobs.length > 0) {
|
||||
cache.set(this.url(0), allJobs);
|
||||
}
|
||||
|
||||
return allJobs;
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
console.error("Fatal error in job fetching:", error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
Query.prototype.fetchJobBatch = async function (start) {
|
||||
const headers = {
|
||||
"User-Agent": randomUseragent.getRandom(),
|
||||
Accept: "application/json, text/javascript, */*; q=0.01",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
Referer: "https://www.linkedin.com/jobs",
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
Connection: "keep-alive",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"Cache-Control": "no-cache",
|
||||
Pragma: "no-cache",
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await axios.get(this.url(start), {
|
||||
headers,
|
||||
validateStatus: function (status) {
|
||||
return status === 200;
|
||||
},
|
||||
timeout: 10000,
|
||||
});
|
||||
|
||||
return parseJobList(response.data);
|
||||
} catch (error) {
|
||||
if (error.response?.status === 429) {
|
||||
throw new Error("Rate limit reached");
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
function parseJobList(jobData) {
|
||||
const $ = cheerio.load(jobData);
|
||||
const jobs = $("li");
|
||||
try {
|
||||
const $ = cheerio.load(jobData);
|
||||
const jobs = $("li");
|
||||
|
||||
const jobObjects = jobs
|
||||
.map((index, element) => {
|
||||
const job = $(element);
|
||||
const position = job.find(".base-search-card__title").text().trim() || "";
|
||||
const company =
|
||||
job.find(".base-search-card__subtitle").text().trim() || "";
|
||||
const location =
|
||||
job.find(".job-search-card__location").text().trim() || "";
|
||||
const date = job.find("time").attr("datetime") || "";
|
||||
const salary =
|
||||
job
|
||||
.find(".job-search-card__salary-info")
|
||||
.text()
|
||||
.trim()
|
||||
.replace(/\n/g, "")
|
||||
.replaceAll(" ", "") || "";
|
||||
const jobUrl = job.find(".base-card__full-link").attr("href") || "";
|
||||
const companyLogo =
|
||||
job.find(".artdeco-entity-image").attr("data-delayed-url") || "";
|
||||
const agoTime =
|
||||
job.find(".job-search-card__listdate").text().trim() || "";
|
||||
return {
|
||||
position: position,
|
||||
company: company,
|
||||
companyLogo: companyLogo,
|
||||
location: location,
|
||||
date: date,
|
||||
agoTime: agoTime,
|
||||
salary: salary,
|
||||
jobUrl: jobUrl,
|
||||
};
|
||||
})
|
||||
.get();
|
||||
return jobs
|
||||
.map((index, element) => {
|
||||
try {
|
||||
const job = $(element);
|
||||
const position = job.find(".base-search-card__title").text().trim();
|
||||
const company = job.find(".base-search-card__subtitle").text().trim();
|
||||
const location = job.find(".job-search-card__location").text().trim();
|
||||
const dateElement = job.find("time");
|
||||
const date = dateElement.attr("datetime");
|
||||
const salary = job
|
||||
.find(".job-search-card__salary-info")
|
||||
.text()
|
||||
.trim()
|
||||
.replace(/\s+/g, " ");
|
||||
const jobUrl = job.find(".base-card__full-link").attr("href");
|
||||
const companyLogo = job
|
||||
.find(".artdeco-entity-image")
|
||||
.attr("data-delayed-url");
|
||||
const agoTime = job.find(".job-search-card__listdate").text().trim();
|
||||
|
||||
return jobObjects;
|
||||
// Only return job if we have at least position and company
|
||||
if (!position || !company) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
position,
|
||||
company,
|
||||
location,
|
||||
date,
|
||||
salary: salary || "Not specified",
|
||||
jobUrl: jobUrl || "",
|
||||
companyLogo: companyLogo || "",
|
||||
agoTime: agoTime || "",
|
||||
};
|
||||
} catch (err) {
|
||||
console.warn(`Error parsing job at index ${index}:`, err.message);
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.get()
|
||||
.filter(Boolean);
|
||||
} catch (error) {
|
||||
console.error("Error parsing job list:", error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
// Export additional utilities for testing and monitoring
|
||||
module.exports.JobCache = JobCache;
|
||||
module.exports.clearCache = () => cache.clear();
|
||||
module.exports.getCacheSize = () => cache.cache.size;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "linkedin-jobs-api",
|
||||
"version": "1.0.5",
|
||||
"version": "1.0.6",
|
||||
"description": "advanced node.js package for getting job listings from LinkedIn",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
|
||||
Reference in New Issue
Block a user