用Ji框架进行HTML/XML解析的过程
更新:HHH   时间:2023-1-7


有如下的HTML/XML文件:


<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html lang="zh-CN">

<head>

<meta name="Content-Type" content="text/html;charset=utf-8" />

    <meta name="Referrer" content="unsafe-url" />

        <meta content="True" name="HandheldFriendly" />

            <meta name="theme-color" content="#333344" />

                

                <meta name="detectify-verification" content="d0264f228155c7a1f72c3d91c17ce8fb" />

                    <meta name="alexaVerifyID" content="OFc8dmwZo7ttU4UCnDh2rKDtLlY" />

                        <meta name="baidu-site-verification" content="D00WizvYyr" />

                            <meta name="msvalidate.01" content="D9B08FEA08E3DA402BF07ABAB61D77DE" />

                                <meta property="wb:webmaster" content="f2f4cb229bda06a4" />

                                    <meta name="google-site-verification" content="LM_cJR94XJIqcYJeOCscGVMWdaRUvmyz6cVOqkFplaU" />

                                        

                                        <title>V2EX  我收藏的主题</title>

<link rel="dns-prefetch" href="//static.v2ex.com" />

    <link rel="dns-prefetch" href="//cdn.v2ex.com" />

        <link rel="dns-prefetch" href="//cdn.v2ex.co" />

            <link rel="dns-prefetch" href="//i.v2ex.co" />

                

                <link rel="stylesheet" type="text/css" media="screen" href="/css/basic.css?v=228347:1493519627:3.9.7.5" />

                    

                    <link rel="stylesheet" type="text/css" media="screen" href="/static/css/style.css?v=b1d9e9c39b16b1c91b7e03ac0c2d412c" />

                        <link rel="stylesheet" type="text/css" media="screen" href="/css/desktop.css?v=3.9.7.5" />

                            <link rel="stylesheet" href="//v2ex.assets.uxengine.net/js/highlight/styles/tomorrow.css" type="text/css" />

                                <script type="text/javascript" src="//v2ex.assets.uxengine.net/js/highlight/highlight.pack.js"></script>

<link rel="icon" sizes="192x192" href="/static/img/v2ex_192.png" />

    <link rel="shortcut icon" href="/static/img/icon_rayps_64.png" type="p_w_picpath/png" />

        <link rel="stylesheet" type="text/css" href="/static/css/font-awesome.min.css?v=295235b28b6e649d99539a9d32b95d30" />

            <script src="/static/js/jquery.js?v=8fc25e27d42774aeae6edbc0a18b72aa" type="text/javascript"></script>

<script src="/static/js/jquery-ui.js?v=ba23883b51f5f372d28755e199785526" type="text/javascript"></script>

<script src="//v2ex.assets.uxengine.net/static/js/jquery.autosize.js?v=1.18.9" type="text/javascript"></script>

<link href="/static/css/jquery.textcomplete.css?v=5a041d39010ded8724744170cea6ce8d" rel="stylesheet" />

    <script src="/static/js/lscache.min.js?v=bf403ab76d287d394375662defac76c3" type="text/javascript"></script>

<script src="/static/js/v2ex.js?v=d36f4bbec51ee88f7f8dfbb70e66ac66" type="text/javascript"></script>

<link href="/static/js/select2/select2.css?v=2621fe97ae1aabca8661d60000147412" rel="stylesheet" />

    <script src="/static/js/select2/select2.min.js?v=3225a95b13ab51f570e2544751ee8320" type="text/javascript"></script>

<link href="/static/js/selectboxit/selectboxit.css?v=5dc55d3860ef80ef1875d6800a5fbfa3" rel="stylesheet" >

    <script src="/static/js/selectboxit/selectboxit.min.js?v=379ece65af74a99ef6cd7ca21f8beb6e" type="text/javascript"></script>

<meta name="description" content="" />

    

        </head>

<body>

<div id="Top">

<div class="content">

<div style="padding-top: 6px;">

<table cellpadding="0" cellspacing="0" border="0" width="100%">

<tr>

<td width="110" align="left"><a href="/" name="top" title="way to explore"><img src="//v2ex.assets.uxengine.net/site/logo@2x.png?m=1346064962" border="0" align="default" alt="V2EX" width="94" height="30" /></a></td>

<td width="auto" align="left">

<div id="Search"><form action="https://www.google.com" onsubmit="return dispatch()" target="_blank"><div style="width: 276px; height: 28px; background-size: 276px 28px; background-p_w_picpath: url('/static/img/qbar_light@2x.png'); background-repeat: no-repeat; display: inline-block;"><input type="text" maxlength="40" name="q" id="q" value="" /></div></form></div>

</td>

<td width="570" align="right" style="padding-top: 2px;"><a href="/" class="top">首页</a>&nbsp;&nbsp;&nbsp;<a href="/member/duwei1" class="top">duwei1</a>&nbsp;&nbsp;&nbsp;<a href="/settings" class="top">设置</a>&nbsp;&nbsp;&nbsp;<a href="#;" onclick="if (confirm('确定要从 V2EX 登出?')) { location.href= '/signout?once=29225'; }" class="top">登出</a></td>

</tr>

</table>

</div>

</div>

</div>

<div id="Wrapper">

<div class="content">


<div id="Leftbar"></div>

<div id="Rightbar">

<div class="sep20"></div>



<div class="box">

<div class="cell">

<table cellpadding="0" cellspacing="0" border="0" width="100%">

<tr>

<td width="48" valign="top"><a href="/member/duwei1"><img src="//v2ex.assets.uxengine.net/gravatar/2067bb823aa712eb75ab19b6b150d658?s=48&d=retro" class="avatar" border="0" align="default" style="max-width: 48px; max-height: 48px;" /></a></td>

<td width="10" valign="top"></td>

<td width="auto" align="left"><span class="bigger"><a href="/member/duwei1">duwei1</a></span>


</td>

</tr>

</table>

<div class="sep10"></div>

<table cellpadding="0" cellspacing="0" border="0" width="100%">

<tr>

<td width="33%" align="center"><a href="/my/nodes" class="dark" style="display: block;"><span class="bigger">0</span><div class="sep3"></div><span class="fade">节点收藏</span></a></td>

<td width="34%" style="border-left: 1px solid rgba(100, 100, 100, 0.4); border-right: 1px solid rgba(100, 100, 100, 0.4);" align="center"><a href="/my/topics" class="dark" style="display: block;"><span class="bigger">5</span><div class="sep3"></div><span class="fade">主题收藏</span></a></td>

<td width="33%" align="center"><a href="/my/following" class="dark" style="display: block;"><span class="bigger">0</span><div class="sep3"></div><span class="fade">特别关注</span></a></td>

</tr>

</table>

</div>

<div class="cell">

<div style="width: 250px; background-color: #f0f0f0; height: 3px; display: inline-block; vertical-align: middle;"><div style="width: 42px; background-color: #ccc; height: 3px; display: inline-block;"></div></div>

</div>


<div class="cell" style="padding: 5px;">

<table cellpadding="0" cellspacing="0" border="0" width="100%">

<tr>

<td width="32"><a href="/new"><img src="/static/img/flat_compose.png?v=7d21f0767aeba06f1dec21485cf5d2f1" width="32" border="0" /></a></td>

<td width="10"></td>

<td width="auto" valign="middle" align="left"><a href="/new">创作新主题</a></td>

</tr>

</table>

</div>

<div class="inner"><div class="fr" id="money"><a href="/balance" class="balance_area" style="">0 <img src="//v2ex.assets.uxengine.net/static/img/bronze.png" alt="B" align="absmiddle" border="0" /></a></div><a href="/notifications" class="fade">0 条未读提醒</a></div>


<div class="dock_area">

<div class="inner"><span class="chevron">&nbsp;&nbsp;</span> <a href="/balance">在你开始发帖之前,请先领取初始资本</a></div>

</div>


</div>



<div class="sep20"></div>



<div class="box">

<div class="inner" align="center">

<a href="https://shimo.im/doc/G3ckHEVF3f4qANHk" target="_blank"><img src="//v2ex.assets.uxengine.net/assets/sidebar/shimo_20170315_1.png" border="0" width="250" height="250" alt="石墨文档" /></a>

</div>

<div class="sidebar_compliance"><a href="/advertise" target="_blank">广告</a></div>

</div>



<div class="sep20"></div>


</div>

<div id="Main">

<div class="sep20"></div>

<div class="box">

<div class="header"><a href="/">V2EX</a> <span class="chevron">&nbsp;&nbsp;</span> 我收藏的主题<div class="fr f12"><span class="snow">主题总数&nbsp;</span> <strong class="gray">5</strong></div></div>




<div class="cell item" style="">

<table cellpadding="0" cellspacing="0" border="0" width="100%">

<tr>


<td width="48" valign="top" align="center"><a href="/member/Diss"><img src="//v2ex.assets.uxengine.net/avatar/e4a7/67a3/94330_normal.png?m=1422432421" class="avatar" border="0" align="default" /></a></td>

<td width="10"></td>


<td width="auto" valign="middle"><span class="item_title"><a href="/t/358175#reply1">腾讯云采购节 腾讯云 7.9 官网折扣折上折</a></span>

<div class="sep5"></div>

<span class="small fade"><div class="votes"></div><a class="node" href="/go/cloud">云计算</a> &nbsp;&nbsp; <strong><a href="/member/Diss">Diss</a></strong> &nbsp;&nbsp; 5 分钟前 &nbsp;&nbsp; 最后回复来自 <strong><a href="/member/ik">ik</a></strong></span>

</td>

<td width="70" align="right" valign="middle">


<a href="/t/358175#reply1" class="count_livid">1</a>


</td>

</tr>

</table>

</div>




<div class="cell item" style="">

<table cellpadding="0" cellspacing="0" border="0" width="100%">

<tr>


<td width="48" valign="top" align="center"><a href="/member/lishunli"><img src="//v2ex.assets.uxengine.net/gravatar/bf971b40c441ad731d7a3eb147473c69?s=48&d=retro" class="avatar" border="0" align="default" /></a></td>

<td width="10"></td>


<td width="auto" valign="middle"><span class="item_title"><a href="/t/358153#reply25">vultr 一到晚上丢包就太严重了</a></span>

<div class="sep5"></div>

<span class="small fade"><div class="votes"></div><a class="node" href="/go/cloud">云计算</a> &nbsp;&nbsp; <strong><a href="/member/lishunli">lishunli</a></strong> &nbsp;&nbsp; 4 分钟前 &nbsp;&nbsp; 最后回复来自 <strong><a href="/member/watara">watara</a></strong></span>

</td>

<td width="70" align="right" valign="middle">


<a href="/t/358153#reply25" class="count_livid">25</a>


</td>

</tr>

</table>

</div>


<div class="cell item" style="">

<table cellpadding="0" cellspacing="0" border="0" width="100%">

<tr>


<td width="48" valign="top" align="center"><a href="/member/lbc307"><img src="//v2ex.assets.uxengine.net/gravatar/8a1fb736bbefe5c627301b5e289a2828?s=48&d=retro" class="avatar" border="0" align="default" /></a></td>

<td width="10"></td>


<td width="auto" valign="middle"><span class="item_title"><a href="/t/358125#reply30">新手,我在自学编程当中遇到的一些问题。</a></span>

<div class="sep5"></div>

<span class="small fade"><div class="votes"></div><a class="node" href="/go/java">Java</a> &nbsp;&nbsp; <strong><a href="/member/lbc307">lbc307</a></strong> &nbsp;&nbsp; 8 分钟前 &nbsp;&nbsp; 最后回复来自 <strong><a href="/member/humor66">humor66</a></strong></span>

</td>

<td width="70" align="right" valign="middle">


<a href="/t/358125#reply30" class="count_livid">30</a>


</td>

</tr>

</table>

</div>


</div>


</div>



</div>

<div class="c"></div>

<div class="sep20"></div>

</div>

<div id="Bottom">

<div class="content">

<div class="inner">

<div class="sep10"></div>

<div class="fr">

<a href="https://www.digitalocean.com/?refcode=1b51f1a7651d" target="_blank"><img src="//v2ex.assets.uxengine.net/assets/logos/do_blue.png" width="60" border="0" alt="DigitalOcean" /></a>

</div>

<strong><a href="/about" class="dark" target="_self">关于</a> &nbsp; <span class="snow">·</span> &nbsp; <a href="/faq" class="dark" target="_self">FAQ</a> &nbsp; <span class="snow">·</span> &nbsp; <a href="/p/7v9TEc53" class="dark" target="_self">API</a> &nbsp; <span class="snow">·</span> &nbsp; <a href="/mission" class="dark" target="_self">我们的愿景</a> &nbsp; <span class="snow">·</span> &nbsp; <a href="/advertise" class="dark" target="_self">广告投放</a> &nbsp; <span class="snow">·</span> &nbsp; <a href="/advertise/2016.html" class="dark" target="_self">鸣谢</a> &nbsp; <span class="snow">·</span> &nbsp; 1078 人在线</strong> &nbsp; <span class="fade">最高记录 2466</span> &nbsp; <span class="snow">·</span> &nbsp; <a href="/select/language"><img src="/static/img/lang_zhcn_32.png" align="absmiddle" border="0" width="20" alt="" /></a>

<div class="sep20"></div>

创意工作者们的社区

<div class="sep5"></div>

World is powered by solitude

<div class="sep20"></div>

<span class="small fade">VERSION: 3.9.7.5 · 35ms · UTC 02:45 · PVG 10:45 · LAX 19:45 · JFK 22:45<br /> Do have faith in what you're doing.</span>

<div class="sep20"></div>

<span class="f12 gray"><a href="http://www.miibeian.gov.cn/" target="_blank" rel="nofollow">ICP16043287-1</a></span>

<div class="sep10"></div>

</div>

</div>

</div>


<script>

(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){

(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),

m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)

})(window,document,'script','//www.google-analytics.com/analytics.js','ga');


ga('create', 'UA-11940834-2', 'v2ex.com');

ga('send', 'pageview');


</script>



<script src="/static/js/jquery.textcomplete.min.js?v=43bfb325d9b6b784e680aa9eaef91925" type="text/javascript"></script>


</body>

</html>

显示的web图片在上传文件中。


解析的过程如下:


   /**

     获取我的收藏帖子列表


     */

    class func getFavoriteList(_ page:Int = 1, completionHandler: @escaping (V2ValueResponse<([TopicListModel],Int)>) -> Void){

        //先获取请求地址,如: https://www.v2ex.com/my/topics?p=1

        

        Alamofire.request(V2EXURL+"my/topics?p=\(page)", headers: MOBILE_CLIENT_HEADERS).responseJiHtml { (response) -> Void in

            var resultArray:[TopicListModel] = []

            var maxPage = 1

            if let jiHtml = response.result.value {

                // 先找到 class="cell item" 的这个节点

                

                if let aRootNode = jiHtml.xPath("//*[@class='cell item']"){

                    for aNode in aRootNode {

                        let topic = TopicListModel(favoritesRootNode:aNode)

                        resultArray.append(topic);

                    }

                }

                //更新通知数量

                V2User.sharedInstance.getNotificationsCount(jiHtml.rootNode!)


                //获取最大页码 只有第一页需要获取maxPage

                if page <= 1

                    ,let aRootNode = jiHtml.xPath("//*[@class='page_normal']")?.last

                    , let page = aRootNode.content

                    , let pageInt = Int(page)

                {

                    maxPage = pageInt

                }

            }


            let t = V2ValueResponse<([TopicListModel],Int)>(value:(resultArray,maxPage), success: response.result.isSuccess)

            completionHandler(t);

        }

    }


 //////////////////////////////


    if let aRootNode = jiHtml.xPath("//*[@class='cell item']"){

                    for aNode in aRootNode {

                        let topic = TopicListModel(favoritesRootNode:aNode)

                        resultArray.append(topic);

                    }

                }


这部分是主要的,找到节点。


init(favoritesRootNode:JiNode) {

        super.init()

        self.avata = favoritesRootNode.xPath("./table/tr/td[1]/a[1]/img[@class='avatar']").first?["src"]

        self.nodeName = favoritesRootNode.xPath("./table/tr/td[3]/span[2]/a[1]").first?.content

        self.userName = favoritesRootNode.xPath("./table/tr/td[3]/span[2]/strong[1]/a").first?.content


        let node = favoritesRootNode.xPath("./table/tr/td[3]/span/a[1]").first

        self.topicTitle = node?.content

        self.setupTitleLayout()


        var topicIdUrl = node?["href"];


        if var id = topicIdUrl {

            if let range = id.range(of: "/t/") {

                id.replaceSubrange(range, with: "");

            }

            if let range = id.range(of: "#") {

                id = id.substring(to: range.lowerBound)

                topicIdUrl = id

            }

        }

        self.topicId = topicIdUrl



        let date = favoritesRootNode.xPath("./table/tr/td[3]/span[2]").first?.content

        if let date = date {

            let array = date.components(separatedBy: "")

            if array.count == 4 {

                self.date = array[3].trimmingCharacters(in: NSCharacterSet.whitespaces)

                

            }

        }


        self.lastReplyUserName = favoritesRootNode.xPath("./table/tr/td[3]/span[2]/strong[2]/a[1]").first?.content


        self.replies = favoritesRootNode.xPath("./table/tr/td[4]/a[1]").first?.content

    }


这个方法里面代码的作用是解析得到cell中模型的数据。比如cell中的一个模型数据为userName,有一个值为lishunli,根据favoritesRootNode.xPath("./table/tr/td[3]/span[2]/strong[1]/a").first?.content后进行赋值。


解析后赋值给模型的图片在上传文件中。


附件:http://down.51cto.com/data/2366623
返回web开发教程...