工具向——脚本下载裘宗燕python课件

python基础

浏览数:171

2019-8-25

AD:资源代下载服务
  • 裘宗燕的数据结构与算法:Python语言描述个人觉得写得还是不错的,于是在网上找了下课件,发现不好打包下载,于是弄了个简陋的脚本来帮助减少重复劳动。

网站:
http://www.math.pku.edu.cn/teachers/qiuzy/ds_python/courseware/index.htm

Python语言描述

  • 用python3运行脚本即在当前目录下创建了文件夹存储下载文件。

另外,关于python数据结构与算法的书好评的有: (。。。都是英文版)

  1. Data Structures and Algorithms in Python pdf下载链接
  2. Problem Solving with Algorithms and Data Structures using Python 在线阅读链接
# -*- coding: utf-8 -*-
"""
Created on Sat Jul  9 15:28:39 2016

@author: 树中湖
"""
import os
import re
import urllib.request
from bs4 import BeautifulSoup

url = 'http://www.math.pku.edu.cn/teachers/qiuzy/ds_python/courseware/index.htm'
html = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html, 'html.parser', from_encoding = 'utf-8')
#nodes = soup.find_all('td', {'align':'left'})
nodes_tr = soup.find_all('tr')
nodes = []
for node in nodes_tr:
    try: 
        nodes.append((node.find_all('td', align = "left")[0]))
    except:
        pass
urls = {}
url_head = 'http://www.math.pku.edu.cn/teachers/qiuzy/ds_python/courseware/'
code_subs = {}

for node in nodes:
    key = (node.get_text().split(','))[0]
    value = node.find_all('a',href = re.compile("(.*)\.[(py)(pdf)]"))
    urls[key] = value    
    for i in node.get_text().split(','):
        a = i.find('代码文件')
        if  a != -1:
            code_subs[key] = i[0:a]

os.mkdir('裘宗燕python')
os.chdir('裘宗燕python')

from multiprocessing.dummy import Pool, freeze_support
func = urllib.request.urlretrieve

with Pool(4) as pool:
    for key in urls:
        try:    
            os.mkdir(key)
        except:
            pass
        os.chdir(key)
        a = urls[key]
        toCrawl = []
        for i in a:
            url_temp = i.attrs['href']
            text_temp = i.getText()
            if text_temp == '代码文件':
                text_temp = code_subs[key]
            toCrawl.append((url_head + url_temp, url_temp))
            with open('%s.txt' % text_temp, 'w') as f: 
                f.write(str(i))
                print('%s' % text_temp, 'is done!')
        pool.starmap(func, toCrawl)
        os.chdir('..')

作者:treelake