Skip to content

Commit

Permalink
Improve URL handling for external links in index.js
Browse files Browse the repository at this point in the history
Improved URL handling with error checks for malformed links in `getUrlInfo`, refined domain extraction in `extractMainDomainFromUrl`, and ensured external link attributes apply correctly. Enhanced logging for easier debugging.

Signed-off-by: Muhammad Azeem <[email protected]>
  • Loading branch information
AzeemSup authored Oct 31, 2024
1 parent 9f13291 commit 35cb998
Showing 1 changed file with 60 additions and 56 deletions.
116 changes: 60 additions & 56 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -141,74 +141,78 @@ exports.parseFromDirectory = contentPath => {

const mainUrl = 'remoteintech.company'

function addTargetBlankAndExternalLinkIcons (el) {
if (el.type === 'tag') {
const anchorTagElements = el.children.filter(element => element.name === 'a')
if (anchorTagElements.length > 0) {
anchorTagElements.forEach(element => {
const url = element.attribs.href
const urlInfo = getUrlInfo(url)

if (urlInfo.is_email || urlInfo.is_internal) {
return
}

element.attribs.target = '_blank'

$element = $( element )
$element.append('<span style="vertical-align: text-top;"> <img src="/assets/external-link.svg" /> </span>')
})
}

if (el.children && el.children.length) {
el.children.forEach(element => {
addTargetBlankAndExternalLinkIcons(element)
})
}
}
function addTargetBlankAndExternalLinkIcons(el) {
if (el.type === 'tag') {
const anchorTagElements = el.children.filter(element => element.name === 'a');

anchorTagElements.forEach(element => {
const url = element.attribs.href;
const urlInfo = getUrlInfo(url);

if (url && !urlInfo.is_email && !urlInfo.is_internal) {
element.attribs.target = '_blank';
element.attribs.rel = 'noopener noreferrer';

const $element = $(element);
$element.append('<span style="vertical-align: text-top;"> <img src="/assets/external-link.svg" /> </span>');
}
});

// Recursively process child elements
el.children.forEach(child => addTargetBlankAndExternalLinkIcons(child));
}
}

/**
* Getting info about the url. It includes checking isEmail of isInternal
* @param {*} url
*/
function getUrlInfo (url) {
const data = {}

if (url.match(/^mailto:/)) { // checking url email or not
data.is_email = true
return data
}

const mainDomainFromGivenUrl = extractMainDomainFromUrl(url)

// checking url is email or not
if (mainDomainFromGivenUrl !== mainUrl) {
data.is_internal = false
return data
} else {
data.is_internal = true
}

return data
function getUrlInfo(url) {
const data = {};

if (!url || typeof url !== 'string') {
data.is_email = false;
data.is_internal = false;
return data;
}

if (url.startsWith('mailto:')) {
data.is_email = true;
return data;
}

try {
const mainDomainFromGivenUrl = extractMainDomainFromUrl(url);
data.is_internal = mainDomainFromGivenUrl === mainUrl;
} catch (error) {
console.error("Error processing URL:", url, error);
data.is_internal = false;
}

return data;
}

/**
* Extracting main domain from the url
* @param {*} url
*/
function extractMainDomainFromUrl (url) {
const domainRe = /(https?:\/\/){0,1}((?:[\w\d-]+\.)+[\w\d]{2,})/i; // taken example from https://stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls

const data = domainRe.exec(url)

const splittedDomain = data[2].split('.')

if (splittedDomain.length === 2) { // check extra subdomain is present or not
return data[2]
}

return splittedDomain[splittedDomain.length - 2] + '.' + splittedDomain[splittedDomain.length - 1] // return only main domain address
function extractMainDomainFromUrl(url) {
try {
const domainRe = /(https?:\/\/)?(([\w\d-]+\.)+[\w\d]{2,})/i; // taken example from https://stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls
const data = domainRe.exec(url);

if (!data || !data[2]) {
console.warn("Invalid URL format:", url);
return ''; // Return empty if domain extraction fails
}

const domainParts = data[2].split('.');
return domainParts.length === 2 ? data[2] :
domainParts.slice(-2).join('.');
} catch (error) {
console.error("Error extracting main domain:", url, error);
return ''; // Safe fallback for unexpected input
}
}

let lastCompanyName = null;
Expand Down

0 comments on commit 35cb998

Please sign in to comment.