Skip to content

Instantly share code, notes, and snippets.

@wesinator
Last active January 30, 2023 14:05
Show Gist options
  • Save wesinator/97f97270c86c863b6eaf00ebae783ceb to your computer and use it in GitHub Desktop.
Save wesinator/97f97270c86c863b6eaf00ebae783ceb to your computer and use it in GitHub Desktop.
Remove line numbers from code snippets of old cnblogs.com posts
// https://web.archive.org/web/20150421052738/https://www.cnblogs.com/wz19860913/archive/2010/04/29/1723586.html
var spans = document.getElementsByTagName('span');
for (var span of spans) {
style = span.attributes.style;
if (style && (style.textContent == "color: #008080" || style.textContent == "color: rgba(0, 128, 128, 1)"))
span.remove();
}
/*
https://web.archive.org/web/20150421052738/https://www.cnblogs.com/wz19860913/archive/2010/04/29/1723586.html
那片土在蓝天上
燃烧的翅膀
[转]利用Winsock实现HTTP的GET请求
网络爬虫需要从指定的URL通过HTTP协议来获得HTML文件信息,以此从一个URL爬到另一个URL。在Windows平台上,这往往通过WinINet接口实现。
  但是,如果对HTTP协议熟悉的话,也可以通过Winsock接口实现。代码如下。
*/
#pragma warning (disable:4996)
#define DEFAULT_URL "http://www.google.com"
BOOL WinsockStartup(BYTE highVer, BYTE lowVer)
{
WSADATA wsaData;
return WSAStartup(MAKEWORD(highVer, lowVer), &wsaData) == 0;
}
int SendData(SOCKET s, char * data)
{
return send(s, data, strlen(data), 0);
}
void ParseTheURL(char * pszURL, char * pszHostName)
{
char * p, * pHostStart;
p = strstr(pszURL, "http://");
if (p && p == pszURL)
{
pHostStart = pszURL + 7;
}
else
{
pHostStart = pszURL;
}
p = strchr(pHostStart, '/');
if (p)
{
memcpy(pszHostName, pHostStart, p - pHostStart);
}
else
{
memcpy(pszHostName, pHostStart, strlen(pHostStart));
}
}
int _tmain()
{
int iRet = 0;
DWORD dwError = 0;
BOOL bOk = FALSE;
char szURL[256] = { 0 }; // 主机文件,即URL
char szHostName[256] = { 0 }; // 主机名
char szPortName[] = "80"; // 端口号
if (!WinsockStartup(2, 2))
{
_tcprintf(TEXT("初始化Windows Sockets失败!"));
cin.getline(szURL, 255);
return -1;
}
addrinfo aiHints = { 0 };
addrinfo * aiList;
aiHints.ai_family = AF_INET;
aiHints.ai_socktype = SOCK_STREAM;
aiHints.ai_protocol = IPPROTO_TCP;
cout<<"输入URL:";
cin.getline(szURL, 255);
if (strcmp(szURL, "") == 0)
{
strcpy(szURL, DEFAULT_URL);
cout<<DEFAULT_URL<<endl;
}
ParseTheURL(szURL, szHostName);
if (getaddrinfo(szHostName, szPortName, NULL, &aiList) != 0)
{
_tcprintf_s(TEXT("getaddrinfo失败:%d"), WSAGetLastError());
WSACleanup();
cin.getline(szURL, 255);
return -1;
}
SOCKET s;
for (addrinfo * aiPtr = aiList; aiPtr != NULL; aiPtr = aiPtr->ai_next)
{
s = socket(aiList->ai_family, aiList->ai_socktype, aiList->ai_protocol);
if (s == INVALID_SOCKET)
{
_tcprintf_s(TEXT("socket创建失败:%d"), WSAGetLastError());
continue;
}
if (connect(s, aiPtr->ai_addr, aiPtr->ai_addrlen) == SOCKET_ERROR)
{
closesocket(s);
s = INVALID_SOCKET;
_tcprintf_s(TEXT("connect失败:%d"), WSAGetLastError());
continue;
}
break;
}
freeaddrinfo(aiList);
if (s == INVALID_SOCKET)
{
WSACleanup();
cin.getline(szURL, 255);
return -1;
}
char requestData[512] = { 0 };
sprintf(requestData, "GET %s HTTP/1.1\r\n", szURL);
SendData(s, requestData);
//SendData(s, "GET / HTTP/1.1\r\n");
sprintf(requestData, "Host:%s\r\n", szHostName);
SendData(s, requestData);
SendData(s, "Accept: */*\r\n");
SendData(s, "User-Agent: Mozilla/4.0(compatible; MSIE 5.00; Windows NT)\r\n");
SendData(s, "Connection:Close\r\n");
//SendData(s, "Connection:Keep-Alive\r\n");
SendData(s, "\r\n");
SendData(s, "\r\n");//最后要加空行
BOOL done = FALSE;
char buffer[1024] = { 0 };
int l, chars = 0;
// 打印http响应的头部
while (!done)
{
l = recv(s, buffer, 1, 0);
if (l <= 0)
done = TRUE;
switch(*buffer)
{
case '\r':
break;
case '\n':
if(chars == 0)
done = TRUE;
chars = 0; // 表示另起一行
break;
default:
++chars;
break;
}
printf("%c",*buffer);
}
// 接收正文部分
int sum = 0;
do
{
l = recv(s, buffer, sizeof (buffer) - 1, 0);
if( l <= 0 )
break;
sum += l;
*(buffer + l) = 0;
printf(buffer);
} while( l > 0 );
//这里输出正文部分大小,发现其实和响应消息头部的Content-length大小是一样的
//这样就可以检查是否接受完毕
printf("\n\n大小 = %d字节\n",sum);
WSACleanup();
cin.getline(szURL, 255);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment