-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathcrawler.py
More file actions
28 lines (20 loc) · 686 Bytes
/
crawler.py
File metadata and controls
28 lines (20 loc) · 686 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import requests
from bs4 import BeautifulSoup
import os
import models
def get_page(url, writeto=None, title=None):
res = requests.get(url)
if res.status_code == 200:
if writeto:
if title:
f = open(os.path.join(writeto, title.replace('/', '-')), mode="w")
else:
url_last_part = url.split('/')[-1]
f = open(os.path.join(writeto, url_last_part), mode="w")
f.write(res.text)
f.close()
return res
def get_page_soup(url, writeto=None, title=None):
res = get_page(url, writeto=writeto, title=title)
soup = BeautifulSoup(res.text, 'html.parser')
return soup