python爬取Font Awesome字体库

2018-12-29 | python beautifulsoup 爬虫

我最近做一个java后台系统，因为新增弹窗的时候要选择icon字体库。如果手工复制这么多个icon图标，这样会浪费了时间。所以我打算要写python抓取Font Awesome官方网站的内容，可以放到数据库。

先获取section标签的ID属性值

from urllib.request import urlopen
from bs4 import BeautifulSoup
import pymysql

# 打开数据库连接
db = pymysql.connect(host='localhost', user='root', password='root', port=3306, db='pydemo')
# 使用cursor()方法获取操作游标
cursor = db.cursor()

url = "http://www.fontawesome.com.cn/faicons/"
# 请求网址
html = urlopen(url)
# 解析网页信息
soup = BeautifulSoup(html, "lxml")

icons = soup.select("#icons section")

arr = []
for item in zip(icons):
    arr.append(item[0]['id'])
    print(item[0]['id'])

输出多个ID属性值

new
web-application
accessibility
hand
transportation
gender
file-type
spinner
form-control
payment
chart
currency
text-editor
directional
video-player
brand
medical

元素的 id 属性new和其他id属性有一样的内容，所以要删除第一个元素

arr.pop(0)
print(arr)

打印结果：

['web-application', 'accessibility', 'hand', 'transportation', 'gender', 'file-type', 'spinner', 'form-control', 'payment', 'chart', 'currency', 'text-editor', 'directional', 'video-player', 'brand', 'medical']

遍历获取序号和元素的id属性的值

for i,item in zip(arr,icons):
    print ("序号：%s 值：%s" % (arr.index(i) + 1, i))

打印结果：

序号：1 值：web-application
序号：2 值：accessibility
序号：3 值：hand
序号：4 值：transportation
序号：5 值：gender
序号：6 值：file-type
序号：7 值：spinner
序号：8 值：form-control
序号：9 值：payment
序号：10 值：chart
序号：11 值：currency
序号：12 值：text-editor
序号：13 值：directional
序号：14 值：video-player
序号：15 值：brand
序号：16 值：medical

遍历获取图标名称信息

    for h2 in soup.find_all(id=i):
        for a in h2.find_all("a"):
            try:
                name = a.span.next_sibling
                print("[*] 图标名称：", name)
                icon = ' '.join(a.i['class'])
                print("[*] 图标icon：", icon)
              except AttributeError:
                pass

打印结果

[*] 图标名称： address-book
[*] 图标icon： fa fa-address-book
[*] 图标名称： address-book-o
[*] 图标icon： fa fa-address-book-o
[*] 图标名称： address-card
[*] 图标icon： fa fa-address-card
[*] 图标名称： address-card-o
[*] 图标icon： fa fa-address-card-o
[*] 图标名称： adjust

插入数据库

               # SQL语句
                sql = 'INSERT INTO sys_icon(name,icon) values(%s,%s)'
                try:
                    # 执行SQL语句
                    cursor.execute(sql, (name, icon))
                    # 提交到数据库执行
                    db.commit()
                except:
                    # 发生错误时回滚
                    db.rollback()

完整如下:

# -*- coding: utf-8 -*-
#---------------------------------------
#   程序：爬取Font Awesome字体库
#   版本：0.0.1
#   作者：周玉龙
#   日期：2018-12-29 周六 下午9:35
#   语言：Python 3.6
#   操作：输入网址后就获取icon，然后存到mysql数据库
#---------------------------------------

from urllib.request import urlopen
from bs4 import BeautifulSoup
import pymysql

# 打开数据库连接
db = pymysql.connect(host='localhost', user='root', password='root', port=3306, db='pydemo')
# 使用cursor()方法获取操作游标
cursor = db.cursor()

url = "http://www.fontawesome.com.cn/faicons/"
# 请求网址
html = urlopen(url)
# 解析网页信息
soup = BeautifulSoup(html, "lxml")

icons = soup.select("#icons section")

arr = []
for item in zip(icons):
    arr.append(item[0]['id'])
    print(item[0]['id'])

#删除第一个元素
arr.pop(0)
print(arr)

for i,item in zip(arr,icons):

    print ("序号：%s 值：%s" % (arr.index(i) + 1, i))

    for h2 in soup.find_all(id=i):
        for a in h2.find_all("a"):
            try:
                name = a.span.next_sibling
                print("[*] 图标名称：", name)
                icon = ' '.join(a.i['class'])
                print("[*] 图标icon：", icon)

                # SQL语句
                sql = 'INSERT INTO sys_icon(name,icon) values(%s,%s)'
                try:
                    # 执行SQL语句
                    cursor.execute(sql, (name, icon))
                    # 提交到数据库执行
                    db.commit()
                except:
                    # 发生错误时回滚
                    db.rollback()

            except AttributeError:
                pass

db.close()