2010-06-18 102 views
0

我目前正在对rTorrent源进行一些更改。我有以下代码:如何从URL中提取域名?

torrent::Object 
apply_to_domain(const torrent::Object& rawArgs) { 

const char * url = rawArgs.as_string().c_str(); 
char buffer[50]; 
snprintf(buffer, 50, "URL: %s.", url); 

    return std::string(buffer); 
} 

我需要从url中提取域。源代码中包含regex.h,但我不确定是否可以使用它,或者如果我需要使用不同的正则表达式库。

Link to regex.h

回答

4

说, “正则表达式” 实现处理的唯一事情是通配符,*。 (顺便说一句,我只是假设它是一个通配符,因为它是唯一被识别的字符,评论似乎暗示了很多,但我没有真正验证它。)

使用正确的正则表达式库,如Boost.Regex

1

//这是一个破解整个字符串 模式匹配。替换为
// TR1的正则表达式,当它变得广泛时 可用。它旨在用于
//小 字符串。

这不会用于提取域。改为使用BoostVSCRT TR1

0

在Windows中:

#include <winsock2.h> 
#include <windows.h> 
#include <iostream> 
#include <vector> 
#include <string> 
#include <algorithm> 
#include <cctype> 
#include <locale> 
#pragma comment(lib,"ws2_32.lib") 
using namespace std; 

string website_HTML; 
locale local; 

//*************************** 
void get_Website(char *url); 
void extract_URL(); 
//*************************** 


int main() 
{ 
    char *url="www.bbc.com"; 
    get_Website(url); 
    extract_URL(); 

    return 0; 
} 



//*************************** 
void get_Website(char *url) 
{ 
     WSADATA wsaData; 
     SOCKET Socket; 
     SOCKADDR_IN SockAddr; 


     int lineCount=0; 
     int rowCount=0; 

     struct hostent *host; 
     char *get_http= new char[256]; 

     memset(get_http,' ', sizeof(get_http)); 
     strcpy(get_http,"GET/HTTP/1.1\r\nHost: "); 
     strcat(get_http,url); 
     strcat(get_http,"\r\nConnection: close\r\n\r\n"); 

     if (WSAStartup(MAKEWORD(2,2), &wsaData) != 0) 
     { 
      cout << "WSAStartup failed.\n"; 
      exit(0); 
     } 

     Socket=socket(AF_INET,SOCK_STREAM,IPPROTO_TCP); 
     host = gethostbyname(url); 

     SockAddr.sin_port=htons(80); 
     SockAddr.sin_family=AF_INET; 
     SockAddr.sin_addr.s_addr = *((unsigned long*)host->h_addr); 

     cout << "Connecting to ["<< url<<"]...\n"; 
     if(connect(Socket,(SOCKADDR*)(&SockAddr),sizeof(SockAddr)) != 0) 
     { 
      cout << "Could not connect\n"; 
      exit(0); 
     } 
     cout << "Connected. (success!)\n"; 
     std::cout << std::flush; 
     send(Socket,get_http, strlen(get_http),0);  
     char buffer[10000]; 
     int nDataLength; 
     int i = 0; 

     while ((nDataLength = recv(Socket,buffer,10000,0)) > 0) 
     {  


      while (buffer[i] >= 32 || buffer[i] == '\n' || buffer[i] == '\r') 
      {  
       website_HTML+=buffer[i];     
       i += 1; 

      } 

     } 
     cout<<"\n"<<i<<" bytes downloaded \n\n"; 
     closesocket(Socket); 
     WSACleanup(); 
     delete[] get_http; 
} 


void extract_URL() 
{ 
    for (size_t i=0; i<website_HTML.length(); ++i) website_HTML[i]= tolower(website_HTML[i],local); 

    std::string to_find = "http:"; 
    std::vector<string> extracted_website_URL; 
    std::string string_to_split; 
    char chr_String[1000]; 
    int count = 0; 
    char seps[] = "\""; 
    char *token; 

    cout << "\nExtracting url.. "; 
    for (int j = 0; j < website_HTML.length() - to_find.length(); j++) 
    { 
     if (website_HTML.substr(j, to_find.length()) == to_find) 
     { 
      count++; 
      string_to_split=website_HTML.substr(j, to_find.length()+256); 
      strcpy(chr_String , string_to_split.c_str()); 
      token = strtok(chr_String, seps); 
      extracted_website_URL.push_back(token); 
      //cout<<website_HTML.substr(j, to_find.length()+30)<<" \n"; 

     } 

     std::cout << "\b\\" << std::flush;  
     std::cout << "\b|" << std::flush;   
     std::cout << "\b/" << std::flush;   
     std::cout << "\b-" << std::flush; 
    } 


    for(j=0;j<extracted_website_URL.size();j++) cout<<extracted_website_URL[j] <<" \n"; 
    cout<<"\n"<<extracted_website_URL.size()<<" URL's extracted "; 
    cout<<"\n\n"; 

}