Skip to content
This repository was archived by the owner on Jan 25, 2020. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified app.db
Binary file not shown.
22 changes: 21 additions & 1 deletion app/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,24 @@
# The routes module is imported at the bottom and not at the top of the script
# as it is always done. The bottom import is a workaround to circular imports,
# a common problem with Flask applications.
from app import routes, models
from app import models
import atexit
from app.parser import parse_anekdot
from apscheduler.schedulers.background import BackgroundScheduler

def scheduler_parser():
'''
Starts the parser.
'''
parse_anekdot()

# Scheduler settings and start.
# Variables locate in config.Config

scheduler = BackgroundScheduler()
scheduler.add_job(func=scheduler_parser, trigger="interval", hours=Config.PARSE_TIME_HOURS)
scheduler.start()

# Shut down the scheduler when exiting the app
atexit.register(lambda: scheduler.shutdown())

2 changes: 1 addition & 1 deletion app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class Joke(db.Model):
user_id = db.Column(db.Integer, db.ForeignKey('user.id'))

def __repr__(self):
return f'<Episode id: {self.id}>, name: {self.name}'
return f'<Joke id: {self.id}>, joke_text: {self.joke_text}'

def get_file_path(self):
'''
Expand Down
29 changes: 29 additions & 0 deletions app/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Parsing jokes from anekdotitut.ru and adds
# them to the database.
import urllib.request
from urllib.parse import quote
from urllib.parse import unquote
from bs4 import BeautifulSoup
import re
from app.models import Joke
from app import db


def parse_anekdot():
'''
Simple func for collecting jokes from anekdotitut.ru
and add them to DB.
'''
jokes_out = []
for i in range(1, 10):
url = url = 'https://anekdotitut.ru/pro_armyanskoe_radio' + str(
i) + '.php'
html_doc = urllib.request.urlopen(url)
soup_doc = BeautifulSoup(html_doc, 'html.parser')
jokes = soup_doc.body(class_='noselect', id=re.compile('anekdot\d+'))
for joke in jokes:
# Check for entry in DB.
if not bool(Joke.query.filter_by(joke_text = joke.text).first()):
j = Joke(joke_text = joke.text, user_id = 999)
db.session.add(j)
db.session.commit()
9 changes: 0 additions & 9 deletions app/routes.py

This file was deleted.

3 changes: 3 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@ class Config(object):
ADMINS = ['your-email@example.com']

STATIC_ROOT = '/static/'

# Time period for parser
PARSE_TIME_HOURS = 40
29 changes: 23 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
# core
Flask ~= 1.1.1

# database
Flask-SQLAlchemy ~= 2.4.0
flask-migrate ~= 2.5.2
alembic==1.0.11
Click==7.0
dominate==2.4.0
Flask==1.1.1
Flask-Bootstrap==3.3.7.1
Flask-Login==0.4.1
Flask-Migrate==2.5.2
Flask-SQLAlchemy==2.4.0
Flask-WTF==0.14.2
itsdangerous==1.1.0
Jinja2==2.10.1
Mako==1.1.0
MarkupSafe==1.1.1
pydub==0.23.1
python-dateutil==2.8.0
python-editor==1.0.4
six==1.12.0
SQLAlchemy==1.3.7
visitor==0.1.3
Werkzeug==0.15.5
WTForms==2.2.1
beautifulsoup4==4.7.1
APScheduler==3.6.1