C/C++ 实现URL路径拆分

URL路径拆分: 例如我们传入 http://www.baidu.com/index.php 拆分为 www.baidu.com 和 /index.php

#include <Windows.h>
#include <iostream>

int ParseUrl(char szUrl[], char szHost[], char szPath[])
{
	int iStart = 0;
	int iEnd = 0;
	int iLen = 0;

	if (strncmp(szUrl, "http://", 7) == 0)
		iStart = 7;
	else if (strncmp(szUrl, "https://", 8) == 0)
		iStart = 8;

	while (szUrl[iStart + iLen] != '\0' && szUrl[iStart + iLen] != '/')
	{ iLen++; }

	memcpy(szHost, szUrl + iStart, iLen);
	if (strlen(szUrl) - iStart - iLen == 0)
		szPath[0] = '/';
	else
		memcpy(szPath, szUrl + iStart + iLen, strlen(szUrl) - iStart - iLen);
	return 0;
}

int main(int argc,char *argv [])
{
	char szUrl[] = "http://www.baidu.com/index.html";
	char szHost[1024] = { 0 };
	char szPath[2048] = { 0 };

	int ret = ParseUrl(szUrl,szHost,szPath);

	if (ret == 0)
	{
		printf("主机: %s \n", szHost);
		printf("路径: %s \n", szPath);
	}

	system("pause");
	return 0;
}

http 文件下载

#define _CRT_SECURE_NO_WARNINGS
#include <Windows.h>
#include <iostream>
#include <winsock.h>

#pragma comment(lib,"ws2_32.lib")

int Spide(const char *pszUrl, const char *pszFile)
{
	char szHost[256] = {0};
	char *ptr = (char *)pszUrl;

	// 判断开头是否为http://如果不是则返回-1
	if (_strnicmp(ptr, "http://", 7) != 0) { return -1; }

	ptr = ptr + 7;
	int index = 0;

	while (index < 255 && *ptr && *ptr != '/')
	{
		szHost[index++] = *ptr++;
	}
	szHost[index] = '\0';

	//printf("去掉http后的域名地址: %s \n", szHost);

	char *buffer = new char[1024 * 8];
	index = sprintf(buffer,
		"GET %s HTTP/1.1\r\n"
		"Host: %s\r\n"
		"User-Agent: IE or Chrome\r\n"
		"Accept-Type: */*\r\n"
		"Connection: Close\r\n\r\n",
		ptr, szHost);

		//printf("构建好的请求头:\n %s \n", buffer);

		// ------------------------------------------------------------

		SOCKET fd = socket(AF_INET, SOCK_STREAM, 0);

		SOCKADDR_IN addr;
		addr.sin_addr.S_un.S_addr = 0;
		addr.sin_port = htons(0);
		addr.sin_family = AF_INET;

		index = bind(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
		hostent *p = ::gethostbyname(szHost);

		if (p) {
			ULONG ai = *(ULONG*)p->h_addr_list[0];
			addr.sin_addr.S_un.S_addr = ai;
			addr.sin_port = htons(80);
			index = connect(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
			if (index == NOERROR) {
				index = send(fd, (const char*)buffer, (int)strlen(buffer), 0);
				FILE *pf = fopen(pszFile, "wb");
				do {
					index = recv(fd, buffer, 8191, 0);
					if (index <= 0) {
						break;
					}
					buffer[index] = '\0';
					fwrite(buffer, 1, index, pf);
					printf("%s", buffer);
				} while (TRUE);
				fclose(pf);
			}
		}
		closesocket(fd);
		delete[] buffer;
		return 0;
}


int main(int argc,char *argv[])
{
	WSADATA wsaData;
	WSAStartup(0x0202, &wsaData);

	Spide("http://cn.bing.com/","index.html");

	system("pause");
	return 0;
}

实现HTTP页面下载功能

#include <Windows.h>
#include <iostream>
#include <winsock.h>

#pragma comment(lib,"ws2_32.lib")

int Curl_Get(const char *pszUrl)
{
	char szHost[256] = { 0 };
	char *ptr = (char *)pszUrl;

	// 判断开头是否为http:// 或者 https:// 如果不是则返回-1
	if (_strnicmp(ptr, "http://", 7) == 0)
		ptr = ptr + 7;
	else if (_strnicmp(ptr, "https://", 8) == 0)
		ptr = ptr + 8;
	else
		return -1;

	int index = 0;
	while (index < 255 && *ptr && *ptr != '/')
		szHost[index++] = *ptr++;
	szHost[index] = '\0';

	char *buffer = new char[1024 * 8];
	index = sprintf(buffer,
		"GET %s HTTP/1.1 \r\n"
		"Host: %s \r\n"
		"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0 \r\n"
		"Accept-Type: */* \r\n"
		"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 \r\n"
		"Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 \r\n"
		"Connection: Close \r\n\r\n",
		ptr, szHost);
	printf("%s \n", buffer);

	SOCKADDR_IN addr;
	SOCKET fd = socket(AF_INET, SOCK_STREAM, 0);
	addr.sin_addr.S_un.S_addr = 0;
	addr.sin_port = htons(0);
	addr.sin_family = AF_INET;
	index = bind(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
	hostent *p = gethostbyname(szHost);

	if (p)
	{
		ULONG ai = *(ULONG*)p->h_addr_list[0];
		addr.sin_addr.S_un.S_addr = ai;
		addr.sin_port = htons(80);

		index = connect(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
		if (index == NOERROR)
		{
			index = send(fd, (const char*)buffer, (int)strlen(buffer), 0);
			do
			{
				index = recv(fd, buffer, 8191, 0);
				if (index <= 0) { break; }
				buffer[index] = '\0';
				printf("%s \n", buffer);
			} while (TRUE);
		}
	}
	closesocket(fd);
	return 0;
}

int main(int argc, char *argv[])
{
	WSADATA wsaData;
	WSAStartup(0x0202, &wsaData);
	Curl_Get("http://cn.bing.com/");

	WSACleanup();

	system("pause");
	return 0;
}

原文链接: https://www.cnblogs.com/LyShark/p/12921581.html

欢迎关注

微信关注下方公众号,第一时间获取干货硬货;公众号内回复【pdf】免费获取数百本计算机经典书籍

    C/C++ 实现URL路径拆分

原创文章受到原创版权保护。转载请注明出处:https://www.ccppcoding.com/archives/197617

非原创文章文中已经注明原地址,如有侵权,联系删除

关注公众号【高性能架构探索】,第一时间获取最新文章

转载文章受原作者版权保护。转载请注明原作者出处!

(0)
上一篇 2023年2月12日 下午7:37
下一篇 2023年2月12日 下午7:37

相关推荐