Refactor, rename and fix pydantic errors

This commit is contained in:
Vladan Popovic 2020-09-04 18:22:49 +02:00
parent d9b04b9468
commit 60a944cf5e
8 changed files with 105 additions and 74 deletions

View File

@ -1,6 +1,13 @@
kafka_servers: kafka:
servers:
- "localhost:9992" - "localhost:9992"
kafka_topic: "sample" topic: "sample"
postgres:
dbhost: "localhost"
dbport: 5432
dbname: "chweb"
dbuser: "vladan"
dbpass: ""
sites: sites:
- url: "https://example.com" - url: "https://example.com"
regex: "domain" regex: "domain"

View File

@ -1,6 +1,6 @@
[metadata] [metadata]
name = webstat name = chweb
summary = Tool for collecting website stats. summary = Tool for checking websites and collecting the results.
description-file = README.rst description-file = README.rst
author = Vladan Popovic author = Vladan Popovic
author-email = vladanovic@gmail.com author-email = vladanovic@gmail.com
@ -19,8 +19,8 @@ where=src
[options.entry_points] [options.entry_points]
console_scripts = console_scripts =
wstat_collect = webstat.cmd:collect chweb_collect = chweb.cmd:collect
wstat_consume = webstat.cmd:consume chweb_consume = chweb.cmd:consume
[bdist_wheel] [bdist_wheel]
universal = 1 universal = 1

51
src/chweb/cmd.py Normal file
View File

@ -0,0 +1,51 @@
"""
A module containing all console script functions.
"""
import argparse
import asyncio
import yaml
from chweb.collector import Collector
from chweb.consumer import Consumer
from chweb.models import Config
def configure() -> Config:
"""
Gets the configuration and creates a Pydantic model from the parsed YAML.
"""
parser = argparse.ArgumentParser(
description='Website availibility checker.')
parser.add_argument('--config', type=str,
default="/etc/checker.yaml",
help=('The yaml config file. '
'Defaults to /etc/checker.yaml'))
args = parser.parse_args()
with open(args.config, 'r') as conf_file:
config = yaml.load(conf_file, Loader=yaml.FullLoader)
return Config(**config)
def run(Service):
"""
Runs a service in an event loop.
"""
loop = asyncio.get_event_loop()
queue = asyncio.Queue()
config = configure()
service = Service(config, loop, queue)
service.run()
def collect():
"""
Main producer event loop.
"""
run(Collector)
def consume():
"""
Main consumer event loop.
"""
run(Consumer)

View File

@ -4,25 +4,28 @@ Checks status of web servers and sends them to a configured Kafka topic.
import asyncio import asyncio
import json import json
import re import re
from typing import Any, Dict, List, Optional from typing import Optional
from urllib.parse import urlparse
import aiokafka # type: ignore import aiokafka # type: ignore
import requests import requests
from chweb.models import Config, Check
class Collector: class Collector:
""" """
A class that contains all methods needed to check the statuses of all A class that contains all methods needed to check the statuses of all
websites present in the config. websites present in the config.
""" """
def __init__(self, config: Dict[str, Any], def __init__(self, config: Config,
event_loop: asyncio.AbstractEventLoop, event_loop: asyncio.AbstractEventLoop,
queue: asyncio.Queue): queue: asyncio.Queue):
self.config = config self.config = config
self.loop = event_loop self.loop = event_loop
self.queue = queue self.queue = queue
async def get_status(self, url: str, regex: Optional[str]) -> Dict[str, Any]: async def check(self, url: str, regex: Optional[str]) -> Check:
""" """
Checks the status of a website and optionally matches a regex on the Checks the status of a website and optionally matches a regex on the
response body. response body.
@ -36,15 +39,16 @@ class Collector:
# be ommited from the config. # be ommited from the config.
if regex is not None: if regex is not None:
matches = re.search(regex, res.text) is not None matches = re.search(regex, res.text) is not None
return { return Check(
'url': url, domain=urlparse(res.url).netloc,
'regex': regex, regex=regex,
'status': res.status_code, response_time=res.elapsed.microseconds,
'response_time': res.elapsed.microseconds, regex_matches=matches,
'regex_matches': matches, status=res.status_code,
} url=res.url,
)
async def create_periodic_task(self, site): async def check_forever(self, site):
""" """
A void function that gets the status of a site and sends it to an A void function that gets the status of a site and sends it to an
``asyncio.Queue`` for further processing (sending to a Kafka topic). ``asyncio.Queue`` for further processing (sending to a Kafka topic).
@ -52,9 +56,9 @@ class Collector:
:param site: A site object from the config. :param site: A site object from the config.
""" """
while True: while True:
data = await self.get_status(site["url"], site.get("regex")) data = await self.check(site.url, site.regex)
self.queue.put_nowait(data) self.queue.put_nowait(data)
await asyncio.sleep(site["check_interval"]) await asyncio.sleep(site.check_interval)
async def produce(self): async def produce(self):
""" """
@ -64,23 +68,23 @@ class Collector:
""" """
producer = aiokafka.AIOKafkaProducer( producer = aiokafka.AIOKafkaProducer(
loop=self.loop, loop=self.loop,
bootstrap_servers=self.config["kafka_servers"]) bootstrap_servers=self.config.kafka.servers)
await producer.start() await producer.start()
try: try:
while True: while True:
status = await self.queue.get() check = await self.queue.get()
msg = bytes(json.dumps(status).encode("utf-8")) msg = bytes(check.json().encode("utf-8"))
await producer.send_and_wait(self.config["kafka_topic"], msg) await producer.send_and_wait(self.config.kafka.topic, msg)
finally: finally:
await producer.stop() await producer.stop()
def tasks(self) -> List[asyncio.Task]: def run(self):
""" """
Creates a task for every site. Runs all tasks in the event loop.
""" """
def create_task(site) -> asyncio.Task: def create_task(site) -> asyncio.Task:
return self.loop.create_task(self.create_periodic_task(site)) return self.loop.create_task(self.check_forever(site))
tasks = list(map(create_task, self.config["sites"])) tasks = list(map(create_task, self.config.sites))
tasks.append(self.loop.create_task(self.produce())) tasks.append(self.loop.create_task(self.produce()))
return tasks self.loop.run_until_complete(asyncio.gather(*tasks))

View File

@ -3,11 +3,13 @@ Sample consumer.
""" """
import asyncio import asyncio
import json import json
from typing import Any, Dict, List from typing import Any, Dict
import aiokafka # type: ignore import aiokafka # type: ignore
import asyncpg # type: ignore import asyncpg # type: ignore
from chweb.models import Check
class Consumer: class Consumer:
def __init__(self, config: Dict[str, Any], def __init__(self, config: Dict[str, Any],
@ -22,15 +24,15 @@ class Consumer:
Consumes messages from a Kafka topic. Consumes messages from a Kafka topic.
""" """
consumer = aiokafka.AIOKafkaConsumer( consumer = aiokafka.AIOKafkaConsumer(
self.config['kafka_topic'], self.config.kafka.topic,
loop=self.loop, loop=self.loop,
bootstrap_servers=self.config['kafka_servers']) bootstrap_servers=self.config.kafka.servers)
await consumer.start() await consumer.start()
try: try:
# Consume messages # Consume messages
async for msg in consumer: async for msg in consumer:
self.queue.put_nowait(json.loads(msg.value)) self.queue.put_nowait(Check(**json.loads(msg.value)))
finally: finally:
# Will leave consumer group; perform autocommit if enabled. # Will leave consumer group; perform autocommit if enabled.
await consumer.stop() await consumer.stop()
@ -49,11 +51,12 @@ class Consumer:
finally: finally:
print("EXITED!") print("EXITED!")
def tasks(self) -> List[asyncio.Task]: def run(self):
""" """
Creates tasks for reading from the Kafka topic and writing in Runs all tasks in the event loop.
PostgreSQL.
""" """
kafka_consumer = self.loop.create_task(self.consume()) tasks = [
psql_writer = self.loop.create_task(self.write()) self.loop.create_task(self.consume()),
return [kafka_consumer, psql_writer] self.loop.create_task(self.write()),
]
self.loop.run_until_complete(asyncio.gather(*tasks))

View File

@ -1,34 +0,0 @@
"""
A module containing all console script functions.
"""
import asyncio
import yaml
from webstat.collector import Collector
from webstat.consumer import Consumer
def run(Service):
"""
A factory kinda that runs both services in an event loop.
"""
loop = asyncio.get_event_loop()
queue = asyncio.Queue()
with open('config.yaml', 'r') as conf_file:
config = yaml.load(conf_file, Loader=yaml.FullLoader)
tasks = Service(config, loop, queue).tasks()
loop.run_until_complete(asyncio.gather(*tasks))
def collect():
"""
Main producer event loop.
"""
run(Collector)
def consume():
"""
Main consumer event loop.
"""
run(Consumer)

View File

@ -8,7 +8,7 @@ deps =
pytest-cov pytest-cov
pytest-mock pytest-mock
commands = commands =
pytest --cov=webstat --cov-append --cov-report=term-missing {posargs} pytest --cov=chweb --cov-append --cov-report=term-missing {posargs}
[testenv:lint] [testenv:lint]
deps = pylint deps = pylint