There's nothing more fun for C/C++ coder than playing with strings. This is what we do best and this is a bit is string playing for you! There's also a new version for PHP to extract domain from url in PHP.
I would not really call this easy! Many would think this is simple to do. You just extract last 2 pieces of the hostname and, bang! You have a domain name for use. Well … what do you do about .co.uk or net.ec. These are the top level domains hence the domain name will be formed by the last 3 pieces of the hostname.
If you want to make it really difficult to parse the domain name from host name you start to play with Whois and DNS lookups which will waste any performance you application used to have. This code needs nothing else but it's own. Not even an internet connection :)
If you try to consider the last two pieces as a TLD (as .co.uk) by their lengths you might be wrong. You actually need a predefined list to use with the ones that are oficially established to have a properly working code. So I did some research and found these on this site.
You will notice I commented it but I will explain you what it does in a few lines here.
I needed this to play with some Whois queries and, as I did not find any code online to do it, here it is. Might come in handy to some C++ coders.
inline LPTSTR Url2Domain(LPCTSTR szUrl){
//-----------------------------------------------
// These 2 should never happen
if(!szUrl || !_tcslen(szUrl)) return 0;
LPTSTR szHost=_tcsdup(szUrl);
// To free it!
LPTSTR szPtrMark=szHost;
//-----------------------------------------------
{ // We check if there was an URL as parameter
LPTSTR UrlHostMark=_tcsstr(szHost,_T("://"));
if(UrlHostMark){ // We get Host from URL
UrlHostMark+=3;
szHost=UrlHostMark;
LPTSTR DummyMark=szHost;
while(*DummyMark){
if(_tcschr(_T("/?#"),*DummyMark)) break;
DummyMark++;
}
if(*DummyMark) *DummyMark='\0';
} // We got the host from URL
}
//-----------------------------------------------
BYTE Dots = 0;
{ // We lowercase the hostname
LPTSTR DummyMark=szHost;
while(*DummyMark){
if(*DummyMark=='.') Dots++;
if(_istupper(*DummyMark))
*DummyMark=_tolower(*DummyMark);
DummyMark++;
}
} // - We lowercase the hostname
//-----------------------------------------------
// If we have a host with only 1 dot it has to be
// a valid domain name !
if(Dots<2){
LPTSTR Host=_tcsdup(szHost);
free(szPtrMark);
return Host;
}
// We can not have more then 2 dots in a domain name
// so we just strip the extra fat !
if(Dots>2){
BYTE Extras=Dots-2;
while(Extras>0){
szHost=_tcschr(szHost,'.')+1;
Extras--;
}
}
//-----------------------------------------------
TCHAR szSLTLDs[]=
_T("com.ac,edu.ac,gov.ac,net.ac,mil.ac,org.ac,com.ae,n")
_T("et.ae,org.ae,gov.ae,ac.ae,co.ae,sch.ae,pro.ae,com.")
_T("ai,org.ai,edu.ai,gov.ai,com.ar,net.ar,org.ar,gov.a")
_T("r,mil.ar,edu.ar,int.ar,co.at,ac.at,or.at,gv.at,pri")
_T("v.at,com.au,gov.au,org.au,edu.au,id.au,oz.au,info.")
_T("au,net.au,asn.au,csiro.au,telememo.au,conf.au,otc.")
_T("au,id.au,com.az,net.az,org.az,com.bb,net.bb,org.bb")
_T(",ac.be,belgie.be,dns.be,fgov.be,com.bh,gov.bh,net.")
_T("bh,edu.bh,org.bh,com.bm,edu.bm,gov.bm,org.bm,net.b")
_T("m,adm.br,adv.br,agr.br,am.br,arq.br,art.br,ato.br,")
_T("bio.br,bmd.br,cim.br,cng.br,cnt.br,com.br,coop.br,")
_T("ecn.br,edu.br,eng.br,esp.br,etc.br,eti.br,far.br,f")
_T("m.br,fnd.br,fot.br,fst.br,g12.br,ggf.br,gov.br,imb")
_T(".br,ind.br,inf.br,jor.br,lel.br,mat.br,med.br,mil.")
_T("br,mus.br,net.br,nom.br,not.br,ntr.br,odo.br,org.b")
_T("r,ppg.br,pro.br,psc.br,psi.br,qsl.br,rec.br,slg.br")
_T(",srv.br,tmp.br,trd.br,tur.br,tv.br,vet.br,zlg.br,c")
_T("om.bs,net.bs,org.bs,ab.ca,bc.ca,mb.ca,nb.ca,nf.ca,")
_T("nl.ca,ns.ca,nt.ca,nu.ca,on.ca,pe.ca,qc.ca,sk.ca,yk")
_T(".ca,gc.ca,co.ck,net.ck,org.ck,edu.ck,gov.ck,com.cn")
_T(",edu.cn,gov.cn,net.cn,org.cn,ac.cn,ah.cn,bj.cn,cq.")
_T("cn,gd.cn,gs.cn,gx.cn,gz.cn,hb.cn,he.cn,hi.cn,hk.cn")
_T(",hl.cn,hn.cn,jl.cn,js.cn,ln.cn,mo.cn,nm.cn,nx.cn,q")
_T("h.cn,sc.cn,sn.cn,sh.cn,sx.cn,tj.cn,tw.cn,xj.cn,xz.")
_T("cn,yn.cn,zj.cn,arts.co,com.co,edu.co,firm.co,gov.c")
_T("o,info.co,int.co,nom.co,mil.co,org.co,rec.co,store")
_T(".co,web.co,ac.cr,co.cr,ed.cr,fi.cr,go.cr,or.cr,sa.")
_T("cr,com.cu,net.cu,org.cu,ac.cy,com.cy,gov.cy,net.cy")
_T(",org.cy,co.dk,art.do,com.do,edu.do,gov.do,gob.do,o")
_T("rg.do,mil.do,net.do,sld.do,web.do,com.dz,org.dz,ne")
_T("t.dz,gov.dz,edu.dz,ass.dz,pol.dz,art.dz,com.ec,k12")
_T(".ec,edu.ec,fin.ec,med.ec,gov.ec,mil.ec,org.ec,net.")
_T("ec,com.ee,pri.ee,fie.ee,org.ee,med.ee,com.eg,edu.e")
_T("g,eun.eg,gov.eg,net.eg,org.eg,sci.eg,com.er,net.er")
_T(",org.er,edu.er,mil.er,gov.er,ind.er,com.es,org.es,")
_T("gob.es,edu.es,nom.es,com.et,gov.et,org.et,edu.et,n")
_T("et.et,biz.et,name.et,info.et,ac.fj,com.fj,gov.fj,i")
_T("d.fj,org.fj,school.fj,com.fk,ac.fk,gov.fk,net.fk,n")
_T("om.fk,org.fk,asso.fr,nom.fr,barreau.fr,com.fr,prd.")
_T("fr,presse.fr,tm.fr,aeroport.fr,assedic.fr,avocat.f")
_T("r,avoues.fr,cci.fr,chambagri.fr,chirurgiens-dentis")
_T("tes.fr,experts-comptables.fr,geometre-expert.fr,go")
_T("uv.fr,greta.fr,huissier-justice.fr,medecin.fr,nota")
_T("ires.fr,pharmacien.fr,port.fr,veterinaire.fr,com.g")
_T("e,edu.ge,gov.ge,mil.ge,net.ge,org.ge,pvt.ge,co.gg,")
_T("org.gg,sch.gg,ac.gg,gov.gg,ltd.gg,ind.gg,net.gg,al")
_T("derney.gg,guernsey.gg,sark.gg,com.gr,edu.gr,gov.gr")
_T(",net.gr,org.gr,com.gt,edu.gt,net.gt,gob.gt,org.gt,")
_T("mil.gt,ind.gt,com.gu,edu.gu,net.gu,org.gu,gov.gu,m")
_T("il.gu,com.hk,net.hk,org.hk,idv.hk,gov.hk,edu.hk,co")
_T(".hu,2000.hu,erotika.hu,jogasz.hu,sex.hu,video.hu,i")
_T("nfo.hu,agrar.hu,film.hu,konyvelo.hu,shop.hu,org.hu")
_T(",bolt.hu,forum.hu,lakas.hu,suli.hu,priv.hu,casino.")
_T("hu,games.hu,media.hu,szex.hu,sport.hu,city.hu,hote")
_T("l.hu,news.hu,tozsde.hu,tm.hu,erotica.hu,ingatlan.h")
_T("u,reklam.hu,utazas.hu,ac.id,co.id,go.id,mil.id,net")
_T(".id,or.id,co.il,net.il,org.il,ac.il,gov.il,k12.il,")
_T("muni.il,idf.il,co.im,net.im,org.im,ac.im,lkd.co.im")
_T(",gov.im,nic.im,plc.co.im,co.in,net.in,ac.in,ernet.")
_T("in,gov.in,nic.in,res.in,gen.in,firm.in,mil.in,org.")
_T("in,ind.in,ac.ir,co.ir,gov.ir,id.ir,net.ir,org.ir,s")
_T("ch.ir,ac.je,co.je,net.je,org.je,gov.je,ind.je,jers")
_T("ey.je,ltd.je,sch.je,com.jo,org.jo,net.jo,gov.jo,ed")
_T("u.jo,mil.jo,ad.jp,ac.jp,co.jp,go.jp,or.jp,ne.jp,gr")
_T(".jp,ed.jp,lg.jp,net.jp,org.jp,gov.jp,hokkaido.jp,a")
_T("omori.jp,iwate.jp,miyagi.jp,akita.jp,yamagata.jp,f")
_T("ukushima.jp,ibaraki.jp,tochigi.jp,gunma.jp,saitama")
_T(".jp,chiba.jp,tokyo.jp,kanagawa.jp,niigata.jp,toyam")
_T("a.jp,ishikawa.jp,fukui.jp,yamanashi.jp,nagano.jp,g")
_T("ifu.jp,shizuoka.jp,aichi.jp,mie.jp,shiga.jp,kyoto.")
_T("jp,osaka.jp,hyogo.jp,nara.jp,wakayama.jp,tottori.j")
_T("p,shimane.jp,okayama.jp,hiroshima.jp,yamaguchi.jp,")
_T("tokushima.jp,kagawa.jp,ehime.jp,kochi.jp,fukuoka.j")
_T("p,saga.jp,nagasaki.jp,kumamoto.jp,oita.jp,miyazaki")
_T(".jp,kagoshima.jp,okinawa.jp,sapporo.jp,sendai.jp,y")
_T("okohama.jp,kawasaki.jp,nagoya.jp,kobe.jp,kitakyush")
_T("u.jp,utsunomiya.jp,kanazawa.jp,takamatsu.jp,matsuy")
_T("ama.jp,com.kh,net.kh,org.kh,per.kh,edu.kh,gov.kh,m")
_T("il.kh,ac.kr,co.kr,go.kr,ne.kr,or.kr,pe.kr,re.kr,se")
_T("oul.kr,kyonggi.kr,com.kw,net.kw,org.kw,edu.kw,gov.")
_T("kw,com.la,net.la,org.la,com.lb,org.lb,net.lb,edu.l")
_T("b,gov.lb,mil.lb,com.lc,edu.lc,gov.lc,net.lc,org.lc")
_T(",com.lv,net.lv,org.lv,edu.lv,gov.lv,mil.lv,id.lv,a")
_T("sn.lv,conf.lv,com.ly,net.ly,org.ly,co.ma,net.ma,or")
_T("g.ma,press.ma,ac.ma,com.mk,com.mm,net.mm,org.mm,ed")
_T("u.mm,gov.mm,com.mn,org.mn,edu.mn,gov.mn,museum.mn,")
_T("com.mo,net.mo,org.mo,edu.mo,gov.mo,com.mt,net.mt,o")
_T("rg.mt,edu.mt,tm.mt,uu.mt,com.mx,net.mx,org.mx,gob.")
_T("mx,edu.mx,com.my,org.my,gov.my,edu.my,net.my,com.n")
_T("a,org.na,net.na,alt.na,edu.na,cul.na,unam.na,telec")
_T("om.na,com.nc,net.nc,org.nc,ac.ng,edu.ng,sch.ng,com")
_T(".ng,gov.ng,org.ng,net.ng,gob.ni,com.ni,net.ni,edu.")
_T("ni,nom.ni,org.ni,com.np,net.np,org.np,gov.np,edu.n")
_T("p,ac.nz,co.nz,cri.nz,gen.nz,geek.nz,govt.nz,iwi.nz")
_T(",maori.nz,mil.nz,net.nz,org.nz,school.nz,com.om,co")
_T(".om,edu.om,ac.om,gov.om,net.om,org.om,mod.om,museu")
_T("m.om,biz.om,pro.om,med.om,com.pa,net.pa,org.pa,edu")
_T(".pa,ac.pa,gob.pa,sld.pa,edu.pe,gob.pe,nom.pe,mil.p")
_T("e,org.pe,com.pe,net.pe,com.pg,net.pg,ac.pg,com.ph,")
_T("net.ph,org.ph,mil.ph,ngo.ph,aid.pl,agro.pl,atm.pl,")
_T("auto.pl,biz.pl,com.pl,edu.pl,gmina.pl,gsm.pl,info.")
_T("pl,mail.pl,miasta.pl,media.pl,mil.pl,net.pl,nieruc")
_T("homosci.pl,nom.pl,org.pl,pc.pl,powiat.pl,priv.pl,r")
_T("ealestate.pl,rel.pl,sex.pl,shop.pl,sklep.pl,sos.pl")
_T(",szkola.pl,targi.pl,tm.pl,tourism.pl,travel.pl,tur")
_T("ystyka.pl,com.pk,net.pk,edu.pk,org.pk,fam.pk,biz.p")
_T("k,web.pk,gov.pk,gob.pk,gok.pk,gon.pk,gop.pk,gos.pk")
_T(",edu.ps,gov.ps,plo.ps,sec.ps,com.pt,edu.pt,gov.pt,")
_T("int.pt,net.pt,nome.pt,org.pt,publ.pt,com.py,net.py")
_T(",org.py,edu.py,com.qa,net.qa,org.qa,edu.qa,gov.qa,")
_T("asso.re,com.re,nom.re,com.ro,org.ro,tm.ro,nt.ro,no")
_T("m.ro,info.ro,rec.ro,arts.ro,firm.ro,store.ro,www.r")
_T("o,com.ru,net.ru,org.ru,gov.ru,pp.ru,com.sa,edu.sa,")
_T("sch.sa,med.sa,gov.sa,net.sa,org.sa,pub.sa,com.sb,n")
_T("et.sb,org.sb,edu.sb,gov.sb,com.sd,net.sd,org.sd,ed")
_T("u.sd,sch.sd,med.sd,gov.sd,tm.se,press.se,parti.se,")
_T("brand.se,fh.se,fhsk.se,fhv.se,komforb.se,kommunalf")
_T("orbund.se,komvux.se,lanarb.se,lanbib.se,naturbruks")
_T("gymn.se,sshn.se,org.se,pp.se,com.sg,net.sg,org.sg,")
_T("edu.sg,gov.sg,per.sg,com.sh,net.sh,org.sh,edu.sh,g")
_T("ov.sh,mil.sh,gov.st,saotome.st,principe.st,consula")
_T("do.st,embaixada.st,org.st,edu.st,net.st,com.st,sto")
_T("re.st,mil.st,co.st,com.sv,org.sv,edu.sv,gob.sv,red")
_T(".sv,com.sy,net.sy,org.sy,gov.sy,ac.th,co.th,go.th,")
_T("net.th,or.th,com.tn,net.tn,org.tn,edunet.tn,gov.tn")
_T(",ens.tn,fin.tn,nat.tn,ind.tn,info.tn,intl.tn,rnrt.")
_T("tn,rnu.tn,rns.tn,tourism.tn,com.tr,net.tr,org.tr,e")
_T("du.tr,gov.tr,mil.tr,bbs.tr,k12.tr,gen.tr,co.tt,com")
_T(".tt,org.tt,net.tt,biz.tt,info.tt,pro.tt,int.tt,coo")
_T("p.tt,jobs.tt,mobi.tt,travel.tt,museum.tt,aero.tt,n")
_T("ame.tt,gov.tt,edu.tt,nic.tt,us.tt,uk.tt,ca.tt,eu.t")
_T("t,es.tt,fr.tt,it.tt,se.tt,dk.tt,be.tt,de.tt,at.tt,")
_T("au.tt,co.tv,com.tw,net.tw,org.tw,edu.tw,idv.tw,gov")
_T(".tw,com.ua,net.ua,org.ua,edu.ua,gov.ua,ac.ug,co.ug")
_T(",or.ug,go.ug,co.uk,me.uk,org.uk,edu.uk,ltd.uk,plc.")
_T("uk,net.uk,sch.uk,nic.uk,ac.uk,gov.uk,nhs.uk,police")
_T(".uk,mod.uk,dni.us,fed.us,com.uy,edu.uy,net.uy,org.")
_T("uy,gub.uy,mil.uy,com.ve,net.ve,org.ve,co.ve,edu.ve")
_T(",gov.ve,mil.ve,arts.ve,bib.ve,firm.ve,info.ve,int.")
_T("ve,nom.ve,rec.ve,store.ve,tec.ve,web.ve,co.vi,net.")
_T("vi,org.vi,com.vn,biz.vn,edu.vn,gov.vn,net.vn,org.v")
_T("n,int.vn,ac.vn,pro.vn,info.vn,health.vn,name.vn,co")
_T("m.vu,edu.vu,net.vu,org.vu,de.vu,ch.vu,fr.vu,com.ws")
_T(",net.ws,org.ws,gov.ws,edu.ws,ac.yu,co.yu,edu.yu,or")
_T("g.yu,com.ye,net.ye,org.ye,gov.ye,edu.ye,mil.ye,ac.")
_T("za,alt.za,bourse.za,city.za,co.za,edu.za,gov.za,la")
_T("w.za,mil.za,net.za,ngo.za,nom.za,org.za,school.za,")
_T("tm.za,web.za,co.zw,ac.zw,org.zw,gov.zw,eu.org,au.c")
_T("om,br.com,cn.com,de.com,de.net,eu.com,gb.com,gb.ne")
_T("t,hu.com,no.com,qc.com,ru.com,sa.com,se.com,uk.com")
_T(",uk.net,us.com,uy.com,za.com,dk.org,tel.no,fax.nr,")
_T("mob.nr,mobil.nr,mobile.nr,tel.nr,tlf.nr,e164.arpa")
; // Ending Second Level TLDs
// We get level 2 TLD from host name
LPTSTR L2TLD=_tcschr(szHost,'.')+1;
LPTSTR L2Pos=_tcsstr(szSLTLDs,L2TLD);
// If level 2 TLD is not found in list then it is the
// domain name
if(!L2Pos){
LPTSTR Host=_tcsdup(L2TLD);
free(szPtrMark);
return Host;
}
if((*(L2Pos+_tcslen(L2TLD))!=',') && (*(L2Pos+_tcslen(L2TLD))!='\0') && L2Pos!=szSLTLDs){
// We just had an internal false-match
LPTSTR Host=_tcsdup(L2TLD);
free(szPtrMark);
return Host;
}
LPTSTR Host=_tcsdup(szHost);
free(szPtrMark);
return Host;
}
… it might not be the fastest but it will suit any application that needs this conversion and it's UNICODE compatible. ;)
Post Feedback