Tuesday, August 7, 2012

SQL statement to get domain name from URL

Given below is a SQL statement to extract domain name from URLs stored in a table
CREATE TABLE #tmpurl (url varchar(1000))

INSERT INTO #tmpurl (url)VALUES ('http://yahoo.com/')
INSERT INTO #tmpurl (url)VALUES('http://yahoo.com/test')
INSERT INTO #tmpurl (url)VALUES('http://www.yahoo.com/test')
INSERT INTO #tmpurl (url)VALUES('https://www.yahoo.com/test')
INSERT INTO #tmpurl (url)VALUES('https://www.yahoo.com/test?test=a')
INSERT INTO #tmpurl (url)VALUES('https://www.yahoo.com?test=a')

SELECT 
url,
substring(
substring(url,charindex('://',url)+3,len(url)),
0,
case when charindex('/',substring(url,charindex('://',url)+3,len(url)))>0  
then charindex('/',substring(url,charindex('://',url)+3,len(url)))
when charindex('?',substring(url,charindex('://',url)+3,len(url)))>0  
then charindex('?',substring(url,charindex('://',url)+3,len(url)))
else
len(url)
end
)
FROM #tmpurl

1 comment:

Unknown said...

Try this instead...

ALTER FUNCTION dbo.spExtractDomainFromURL ( @strURL NVARCHAR(1000) )
RETURNS NVARCHAR(100)
--Posted at http://stackoverflow.com/a/20808097/391101
AS
BEGIN
--Strip Protocol
SELECT @strURL = SUBSTRING(@strURL, CHARINDEX('://', @strURL) + 3, 999)

-- Strip www subdomain
IF LEFT(@strURL, 4) = 'www.'
SELECT @strURL = SUBSTRING(@strURL, 5, 999)

-- Strip Path
IF CHARINDEX('/', @strURL) > 0
SELECT @strURL = LEFT(@strURL, CHARINDEX('/', @strURL) - 1)

--Unless you iterate through a list of TLDs, you can't differentiate between subdomain.example.com and example.com.au
--I decided against this because it's slower, and the TLD list requires maintenance

RETURN @strURL
END