diff --git a/index.js b/index.js index f67178c..27eca98 100644 --- a/index.js +++ b/index.js @@ -2,16 +2,56 @@ const cheerio = require("cheerio"); const axios = require("axios"); const randomUseragent = require("random-useragent"); +// Utility functions +const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + +// Cache implementation +class JobCache { + constructor() { + this.cache = new Map(); + this.TTL = 1000 * 60 * 60; // 1 hour + } + + set(key, value) { + this.cache.set(key, { + data: value, + timestamp: Date.now(), + }); + } + + get(key) { + const item = this.cache.get(key); + if (!item) return null; + if (Date.now() - item.timestamp > this.TTL) { + this.cache.delete(key); + return null; + } + return item.data; + } + + clear() { + const now = Date.now(); + for (const [key, value] of this.cache.entries()) { + if (now - value.timestamp > this.TTL) { + this.cache.delete(key); + } + } + } +} + +const cache = new JobCache(); + +// Main query function module.exports.query = (queryObject) => { const query = new Query(queryObject); - console.log(query.url(0)); return query.getJobs(); }; +// Query constructor function Query(queryObj) { this.host = queryObj.host || "www.linkedin.com"; - this.keyword = queryObj.keyword?.trim().replace(" ", "+") || ""; - this.location = queryObj.location?.trim().replace(" ", "+") || ""; + this.keyword = queryObj.keyword?.trim().replace(/\s+/g, "+") || ""; + this.location = queryObj.location?.trim().replace(/\s+/g, "+") || ""; this.dateSincePosted = queryObj.dateSincePosted || ""; this.jobType = queryObj.jobType || ""; this.remoteFilter = queryObj.remoteFilter || ""; @@ -22,13 +62,14 @@ function Query(queryObj) { this.page = Number(queryObj.page) || 0; } +// Query prototype methods Query.prototype.getDateSincePosted = function () { const dateRange = { "past month": "r2592000", "past week": "r604800", "24hr": "r86400", }; - return dateRange[this.dateSincePosted.toLowerCase()] ?? ""; + return dateRange[this.dateSincePosted.toLowerCase()] || ""; }; Query.prototype.getExperienceLevel = function () { @@ -40,8 +81,9 @@ Query.prototype.getExperienceLevel = function () { director: "5", executive: "6", }; - return experienceRange[this.experienceLevel.toLowerCase()] ?? ""; + return experienceRange[this.experienceLevel.toLowerCase()] || ""; }; + Query.prototype.getJobType = function () { const jobTypeRange = { "full time": "F", @@ -53,8 +95,9 @@ Query.prototype.getJobType = function () { volunteer: "V", internship: "I", }; - return jobTypeRange[this.jobType.toLowerCase()] ?? ""; + return jobTypeRange[this.jobType.toLowerCase()] || ""; }; + Query.prototype.getRemoteFilter = function () { const remoteFilterRange = { "on-site": "1", @@ -62,8 +105,9 @@ Query.prototype.getRemoteFilter = function () { remote: "2", hybrid: "3", }; - return remoteFilterRange[this.remoteFilter.toLowerCase()] ?? ""; + return remoteFilterRange[this.remoteFilter.toLowerCase()] || ""; }; + Query.prototype.getSalary = function () { const salaryRange = { 40000: "1", @@ -72,113 +116,193 @@ Query.prototype.getSalary = function () { 100000: "4", 120000: "5", }; - return salaryRange[this.salary.toLowerCase()] ?? ""; + return salaryRange[this.salary] || ""; }; Query.prototype.getPage = function () { return this.page * 25; }; + Query.prototype.url = function (start) { let query = `https://${this.host}/jobs-guest/jobs/api/seeMoreJobPostings/search?`; - if (this.keyword !== "") query += `keywords=${this.keyword}`; - if (this.location !== "") query += `&location=${this.location}`; - if (this.getDateSincePosted() !== "") - query += `&f_TPR=${this.getDateSincePosted()}`; - if (this.getSalary() !== "") query += `&f_SB2=${this.getSalary()}`; - if (this.getExperienceLevel() !== "") - query += `&f_E=${this.getExperienceLevel()}`; - if (this.getRemoteFilter() !== "") query += `&f_WT=${this.getRemoteFilter()}`; - if (this.getJobType() !== "") query += `&f_JT=${this.getJobType()}`; - if (this.getPage() !== "") query += `&start=${start + this.getPage()};`; - if (this.sortBy == "recent" || this.sortBy == "relevant") { - let sortMethod = "R"; - if (this.sortBy == "recent") sortMethod = "DD"; - query += `&sortBy=${sortMethod}`; - } - return encodeURI(query); + + const params = new URLSearchParams(); + + if (this.keyword) params.append("keywords", this.keyword); + if (this.location) params.append("location", this.location); + if (this.getDateSincePosted()) + params.append("f_TPR", this.getDateSincePosted()); + if (this.getSalary()) params.append("f_SB2", this.getSalary()); + if (this.getExperienceLevel()) + params.append("f_E", this.getExperienceLevel()); + if (this.getRemoteFilter()) params.append("f_WT", this.getRemoteFilter()); + if (this.getJobType()) params.append("f_JT", this.getJobType()); + + params.append("start", start + this.getPage()); + + if (this.sortBy === "recent") params.append("sortBy", "DD"); + else if (this.sortBy === "relevant") params.append("sortBy", "R"); + + return query + params.toString(); }; Query.prototype.getJobs = async function () { + let allJobs = []; + let start = 0; + const BATCH_SIZE = 25; + let hasMore = true; + let consecutiveErrors = 0; + const MAX_CONSECUTIVE_ERRORS = 3; + try { - let parsedJobs, - resultCount = 1, - start = 0, - jobLimit = this.limit, - allJobs = []; + // Check cache first + const cacheKey = this.url(0); + const cachedJobs = cache.get(cacheKey); + if (cachedJobs) { + console.log("Returning cached results"); + return cachedJobs; + } - while (resultCount > 0) { - const userAgent = randomUseragent.getRandom(); + while (hasMore) { + try { + const jobs = await this.fetchJobBatch(start); - const { data } = await axios.get(this.url(start), { - headers: { - "User-Agent": userAgent, - Accept: - "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", - "Accept-Language": "en-US,en;q=0.9", - "Accept-Encoding": "gzip, deflate, br", - Connection: "keep-alive", - "Upgrade-Insecure-Requests": "1", - }, - }); + if (!jobs || jobs.length === 0) { + hasMore = false; + break; + } - const $ = cheerio.load(data); - const jobs = $("li"); - resultCount = jobs.length; - console.log("I got ", jobs.length, " jobs"); + allJobs.push(...jobs); + console.log(`Fetched ${jobs.length} jobs. Total: ${allJobs.length}`); - parsedJobs = parseJobList(data); - allJobs.push(...parsedJobs); + if (this.limit && allJobs.length >= this.limit) { + allJobs = allJobs.slice(0, this.limit); + break; + } - start += 25; + // Reset error counter on successful fetch + consecutiveErrors = 0; + start += BATCH_SIZE; - if (jobLimit != 0 && allJobs.length > jobLimit) { - while (allJobs.length != jobLimit) allJobs.pop(); - return allJobs; + // Add reasonable delay between requests + await delay(2000 + Math.random() * 1000); + } catch (error) { + consecutiveErrors++; + console.error( + `Error fetching batch (attempt ${consecutiveErrors}):`, + error.message + ); + + if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) { + console.log("Max consecutive errors reached. Stopping."); + break; + } + + // Exponential backoff + await delay(Math.pow(2, consecutiveErrors) * 1000); } } + + // Cache results if we got any + if (allJobs.length > 0) { + cache.set(this.url(0), allJobs); + } + return allJobs; } catch (error) { - console.error(error); + console.error("Fatal error in job fetching:", error); + throw error; + } +}; + +Query.prototype.fetchJobBatch = async function (start) { + const headers = { + "User-Agent": randomUseragent.getRandom(), + Accept: "application/json, text/javascript, */*; q=0.01", + "Accept-Language": "en-US,en;q=0.9", + "Accept-Encoding": "gzip, deflate, br", + Referer: "https://www.linkedin.com/jobs", + "X-Requested-With": "XMLHttpRequest", + Connection: "keep-alive", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + "Cache-Control": "no-cache", + Pragma: "no-cache", + }; + + try { + const response = await axios.get(this.url(start), { + headers, + validateStatus: function (status) { + return status === 200; + }, + timeout: 10000, + }); + + return parseJobList(response.data); + } catch (error) { + if (error.response?.status === 429) { + throw new Error("Rate limit reached"); + } + throw error; } }; function parseJobList(jobData) { - const $ = cheerio.load(jobData); - const jobs = $("li"); + try { + const $ = cheerio.load(jobData); + const jobs = $("li"); - const jobObjects = jobs - .map((index, element) => { - const job = $(element); - const position = job.find(".base-search-card__title").text().trim() || ""; - const company = - job.find(".base-search-card__subtitle").text().trim() || ""; - const location = - job.find(".job-search-card__location").text().trim() || ""; - const date = job.find("time").attr("datetime") || ""; - const salary = - job - .find(".job-search-card__salary-info") - .text() - .trim() - .replace(/\n/g, "") - .replaceAll(" ", "") || ""; - const jobUrl = job.find(".base-card__full-link").attr("href") || ""; - const companyLogo = - job.find(".artdeco-entity-image").attr("data-delayed-url") || ""; - const agoTime = - job.find(".job-search-card__listdate").text().trim() || ""; - return { - position: position, - company: company, - companyLogo: companyLogo, - location: location, - date: date, - agoTime: agoTime, - salary: salary, - jobUrl: jobUrl, - }; - }) - .get(); + return jobs + .map((index, element) => { + try { + const job = $(element); + const position = job.find(".base-search-card__title").text().trim(); + const company = job.find(".base-search-card__subtitle").text().trim(); + const location = job.find(".job-search-card__location").text().trim(); + const dateElement = job.find("time"); + const date = dateElement.attr("datetime"); + const salary = job + .find(".job-search-card__salary-info") + .text() + .trim() + .replace(/\s+/g, " "); + const jobUrl = job.find(".base-card__full-link").attr("href"); + const companyLogo = job + .find(".artdeco-entity-image") + .attr("data-delayed-url"); + const agoTime = job.find(".job-search-card__listdate").text().trim(); - return jobObjects; + // Only return job if we have at least position and company + if (!position || !company) { + return null; + } + + return { + position, + company, + location, + date, + salary: salary || "Not specified", + jobUrl: jobUrl || "", + companyLogo: companyLogo || "", + agoTime: agoTime || "", + }; + } catch (err) { + console.warn(`Error parsing job at index ${index}:`, err.message); + return null; + } + }) + .get() + .filter(Boolean); + } catch (error) { + console.error("Error parsing job list:", error); + return []; + } } + +// Export additional utilities for testing and monitoring +module.exports.JobCache = JobCache; +module.exports.clearCache = () => cache.clear(); +module.exports.getCacheSize = () => cache.cache.size; diff --git a/package.json b/package.json index fb29c6e..95a7440 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "linkedin-jobs-api", - "version": "1.0.5", + "version": "1.0.6", "description": "advanced node.js package for getting job listings from LinkedIn", "main": "index.js", "scripts": {