1 2 3 4 5 6 7 8 9 10 11 12 13 14
| import re import requests def input_url(i): if i == 0: url = "http://www.cnnic.net.cn/hlwfzyj/hlwxzbg/index" + ".htm" else: url = "http://www.cnnic.net.cn/hlwfzyj/hlwxzbg/index_" + str(i) + ".htm" res = requests.get(url) urls = re.findall("<a href=\'./(.*?).pdf\'.*?target=\"_blank\">", res.content.decode("utf-8") , re.M) for m in range(0, len(urls)): dome = "http://www.cnnic.net.cn/hlwfzyj/hlwxzbg/" html = urls[m] urls[m] = dome + html + ".pdf" return urls
|