import requestsimport refrom bs4 import BeautifulSoupurl='http://news.gzcc.cn/html/xiaoyuanxinwen/'res=requests.get(url)res.encoding='utf-8'soup=BeautifulSoup(res.text,'html.parser')li=soup.select('li')def get(gzcc): dj=re.search('_.*/(.*).html',gzcc).groups(0)[0] djcs=int(requests.get('http://oa.gzcc.cn/api.php?op=count&id={}&modelid=80'.format(dj)).text.split('.')[-1].lstrip("html('").rstrip("');")) return djcsdef sss(label): for news in label: if len(news.select('.news-list-title'))>0:
title=news.select('.news-list-title')[0].text #标题 time=news.select('.news-list-info')[0].contents[0].text#时间 url1=news.select('a')[0]['href']#url bumen=news.select('.news-list-info')[0].contents[1].text#部门 description=news.select('.news-list-description')[0].text #描述 cs=get(url1) print(time,title,url,cs)sss(li)pages=int(soup.select('.a1')[0].text.rstrip('条'))//10+1 for list in range(2,pages+1): pageurl="http://news.gzcc.cn/html/xiaoyuanxinwen/{}.html".format(list) pageres=requests.get(pageurl) pageres.encoding='utf-8' pagesoup=BeautifulSoup(pageres.text,'html.parser') pagelist=pagesoup.select('li') sss(pagelist) break