From 35cb998ae5fb19653c562eaa2185925b120161fc Mon Sep 17 00:00:00 2001
From: Muhammad Azeem <37941410+AzeemSup@users.noreply.github.com>
Date: Thu, 31 Oct 2024 15:48:27 +0500
Subject: [PATCH] Improve URL handling for external links in index.js
Improved URL handling with error checks for malformed links in `getUrlInfo`, refined domain extraction in `extractMainDomainFromUrl`, and ensured external link attributes apply correctly. Enhanced logging for easier debugging.
Signed-off-by: Muhammad Azeem <37941410+AzeemSup@users.noreply.github.com>
---
lib/index.js | 116 ++++++++++++++++++++++++++-------------------------
1 file changed, 60 insertions(+), 56 deletions(-)
diff --git a/lib/index.js b/lib/index.js
index 4faee7a60..814e6faaa 100755
--- a/lib/index.js
+++ b/lib/index.js
@@ -141,74 +141,78 @@ exports.parseFromDirectory = contentPath => {
const mainUrl = 'remoteintech.company'
- function addTargetBlankAndExternalLinkIcons (el) {
- if (el.type === 'tag') {
- const anchorTagElements = el.children.filter(element => element.name === 'a')
- if (anchorTagElements.length > 0) {
- anchorTagElements.forEach(element => {
- const url = element.attribs.href
- const urlInfo = getUrlInfo(url)
-
- if (urlInfo.is_email || urlInfo.is_internal) {
- return
- }
-
- element.attribs.target = '_blank'
-
- $element = $( element )
- $element.append(' ')
- })
- }
-
- if (el.children && el.children.length) {
- el.children.forEach(element => {
- addTargetBlankAndExternalLinkIcons(element)
- })
- }
- }
+ function addTargetBlankAndExternalLinkIcons(el) {
+ if (el.type === 'tag') {
+ const anchorTagElements = el.children.filter(element => element.name === 'a');
+
+ anchorTagElements.forEach(element => {
+ const url = element.attribs.href;
+ const urlInfo = getUrlInfo(url);
+
+ if (url && !urlInfo.is_email && !urlInfo.is_internal) {
+ element.attribs.target = '_blank';
+ element.attribs.rel = 'noopener noreferrer';
+
+ const $element = $(element);
+ $element.append(' ');
+ }
+ });
+
+ // Recursively process child elements
+ el.children.forEach(child => addTargetBlankAndExternalLinkIcons(child));
+ }
}
/**
* Getting info about the url. It includes checking isEmail of isInternal
* @param {*} url
*/
- function getUrlInfo (url) {
- const data = {}
-
- if (url.match(/^mailto:/)) { // checking url email or not
- data.is_email = true
- return data
- }
-
- const mainDomainFromGivenUrl = extractMainDomainFromUrl(url)
-
- // checking url is email or not
- if (mainDomainFromGivenUrl !== mainUrl) {
- data.is_internal = false
- return data
- } else {
- data.is_internal = true
- }
-
- return data
+ function getUrlInfo(url) {
+ const data = {};
+
+ if (!url || typeof url !== 'string') {
+ data.is_email = false;
+ data.is_internal = false;
+ return data;
+ }
+
+ if (url.startsWith('mailto:')) {
+ data.is_email = true;
+ return data;
+ }
+
+ try {
+ const mainDomainFromGivenUrl = extractMainDomainFromUrl(url);
+ data.is_internal = mainDomainFromGivenUrl === mainUrl;
+ } catch (error) {
+ console.error("Error processing URL:", url, error);
+ data.is_internal = false;
+ }
+
+ return data;
}
/**
* Extracting main domain from the url
* @param {*} url
*/
- function extractMainDomainFromUrl (url) {
- const domainRe = /(https?:\/\/){0,1}((?:[\w\d-]+\.)+[\w\d]{2,})/i; // taken example from https://stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls
-
- const data = domainRe.exec(url)
-
- const splittedDomain = data[2].split('.')
-
- if (splittedDomain.length === 2) { // check extra subdomain is present or not
- return data[2]
- }
-
- return splittedDomain[splittedDomain.length - 2] + '.' + splittedDomain[splittedDomain.length - 1] // return only main domain address
+ function extractMainDomainFromUrl(url) {
+ try {
+ const domainRe = /(https?:\/\/)?(([\w\d-]+\.)+[\w\d]{2,})/i; // taken example from https://stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls
+ const data = domainRe.exec(url);
+
+ if (!data || !data[2]) {
+ console.warn("Invalid URL format:", url);
+ return ''; // Return empty if domain extraction fails
+ }
+
+ const domainParts = data[2].split('.');
+ return domainParts.length === 2 ? data[2] :
+ domainParts.slice(-2).join('.');
+ } catch (error) {
+ console.error("Error extracting main domain:", url, error);
+ return ''; // Safe fallback for unexpected input
+ }
}
let lastCompanyName = null;