C/C++ 实现URL路径拆分

URL路径拆分: 例如我们传入 http://www.baidu.com/index.php 拆分为 www.baidu.com 和 /index.php

#include <Windows.h>
#include <iostream>

int ParseUrl(char szUrl[], char szHost[], char szPath[])
{
    int iStart = 0;
    int iEnd = 0;
    int iLen = 0;

    if (strncmp(szUrl, "http://", 7) == 0)
        iStart = 7;
    else if (strncmp(szUrl, "https://", 8) == 0)
        iStart = 8;

    while (szUrl[iStart + iLen] != '\0' && szUrl[iStart + iLen] != '/')
    { iLen++; }

    memcpy(szHost, szUrl + iStart, iLen);
    if (strlen(szUrl) - iStart - iLen == 0)
        szPath[0] = '/';
    else
        memcpy(szPath, szUrl + iStart + iLen, strlen(szUrl) - iStart - iLen);
    return 0;
}

int main(int argc,char *argv [])
{
    char szUrl[] = "http://www.baidu.com/index.html";
    char szHost[1024] = { 0 };
    char szPath[2048] = { 0 };

    int ret = ParseUrl(szUrl,szHost,szPath);

    if (ret == 0)
    {
        printf("主机: %s \n", szHost);
        printf("路径: %s \n", szPath);
    }

    system("pause");
    return 0;
}

http 文件下载

#define _CRT_SECURE_NO_WARNINGS
#include <Windows.h>
#include <iostream>
#include <winsock.h>

#pragma comment(lib,"ws2_32.lib")

int Spide(const char *pszUrl, const char *pszFile)
{
    char szHost[256] = {0};
    char *ptr = (char *)pszUrl;

    // 判断开头是否为http://如果不是则返回-1
    if (_strnicmp(ptr, "http://", 7) != 0) { return -1; }

    ptr = ptr + 7;
    int index = 0;

    while (index < 255 && *ptr && *ptr != '/')
    {
        szHost[index++] = *ptr++;
    }
    szHost[index] = '\0';

    //printf("去掉http后的域名地址: %s \n", szHost);

    char *buffer = new char[1024 * 8];
    index = sprintf(buffer,
        "GET %s HTTP/1.1\r\n"
        "Host: %s\r\n"
        "User-Agent: IE or Chrome\r\n"
        "Accept-Type: */*\r\n"
        "Connection: Close\r\n\r\n",
        ptr, szHost);

        //printf("构建好的请求头:\n %s \n", buffer);

        // ------------------------------------------------------------

        SOCKET fd = socket(AF_INET, SOCK_STREAM, 0);

        SOCKADDR_IN addr;
        addr.sin_addr.S_un.S_addr = 0;
        addr.sin_port = htons(0);
        addr.sin_family = AF_INET;

        index = bind(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
        hostent *p = ::gethostbyname(szHost);

        if (p) {
            ULONG ai = *(ULONG*)p->h_addr_list[0];
            addr.sin_addr.S_un.S_addr = ai;
            addr.sin_port = htons(80);
            index = connect(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
            if (index == NOERROR) {
                index = send(fd, (const char*)buffer, (int)strlen(buffer), 0);
                FILE *pf = fopen(pszFile, "wb");
                do {
                    index = recv(fd, buffer, 8191, 0);
                    if (index <= 0) {
                        break;
                    }
                    buffer[index] = '\0';
                    fwrite(buffer, 1, index, pf);
                    printf("%s", buffer);
                } while (TRUE);
                fclose(pf);
            }
        }
        closesocket(fd);
        delete[] buffer;
        return 0;
}


int main(int argc,char *argv[])
{
    WSADATA wsaData;
    WSAStartup(0x0202, &wsaData);

    Spide("http://cn.bing.com/","index.html");

    system("pause");
    return 0;
}

实现HTTP页面下载功能

#include <Windows.h>
#include <iostream>
#include <winsock.h>

#pragma comment(lib,"ws2_32.lib")

int Curl_Get(const char *pszUrl)
{
    char szHost[256] = { 0 };
    char *ptr = (char *)pszUrl;

    // 判断开头是否为http:// 或者 https:// 如果不是则返回-1
    if (_strnicmp(ptr, "http://", 7) == 0)
        ptr = ptr + 7;
    else if (_strnicmp(ptr, "https://", 8) == 0)
        ptr = ptr + 8;
    else
        return -1;

    int index = 0;
    while (index < 255 && *ptr && *ptr != '/')
        szHost[index++] = *ptr++;
    szHost[index] = '\0';

    char *buffer = new char[1024 * 8];
    index = sprintf(buffer,
        "GET %s HTTP/1.1 \r\n"
        "Host: %s \r\n"
        "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0 \r\n"
        "Accept-Type: */* \r\n"
        "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 \r\n"
        "Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 \r\n"
        "Connection: Close \r\n\r\n",
        ptr, szHost);
    printf("%s \n", buffer);

    SOCKADDR_IN addr;
    SOCKET fd = socket(AF_INET, SOCK_STREAM, 0);
    addr.sin_addr.S_un.S_addr = 0;
    addr.sin_port = htons(0);
    addr.sin_family = AF_INET;
    index = bind(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
    hostent *p = gethostbyname(szHost);

    if (p)
    {
        ULONG ai = *(ULONG*)p->h_addr_list[0];
        addr.sin_addr.S_un.S_addr = ai;
        addr.sin_port = htons(80);

        index = connect(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
        if (index == NOERROR)
        {
            index = send(fd, (const char*)buffer, (int)strlen(buffer), 0);
            do
            {
                index = recv(fd, buffer, 8191, 0);
                if (index <= 0) { break; }
                buffer[index] = '\0';
                printf("%s \n", buffer);
            } while (TRUE);
        }
    }
    closesocket(fd);
    return 0;
}

int main(int argc, char *argv[])
{
    WSADATA wsaData;
    WSAStartup(0x0202, &wsaData);
    Curl_Get("http://cn.bing.com/");

    WSACleanup();

    system("pause");
    return 0;
}

原文链接: https://www.cnblogs.com/LyShark/p/12921581.html

欢迎关注

微信关注下方公众号,第一时间获取干货硬货;公众号内回复【pdf】免费获取数百本计算机经典书籍;

也有高质量的技术群,里面有嵌入式、搜广推等BAT大佬

    C/C++ 实现URL路径拆分

原创文章受到原创版权保护。转载请注明出处:https://www.ccppcoding.com/archives/349681

非原创文章文中已经注明原地址,如有侵权,联系删除

关注公众号【高性能架构探索】,第一时间获取最新文章

转载文章受原作者版权保护。转载请注明原作者出处!

(0)
上一篇 2023年3月2日 上午5:49
下一篇 2023年3月2日 上午5:49

相关推荐