Refactor, rename and fix pydantic errors

This commit is contained in:
Vladan Popovic 2020-09-04 18:22:49 +02:00
parent d9b04b9468
commit 60a944cf5e
8 changed files with 105 additions and 74 deletions

51
src/chweb/cmd.py Normal file
View file

@ -0,0 +1,51 @@
"""
A module containing all console script functions.
"""
import argparse
import asyncio
import yaml
from chweb.collector import Collector
from chweb.consumer import Consumer
from chweb.models import Config
def configure() -> Config:
"""
Gets the configuration and creates a Pydantic model from the parsed YAML.
"""
parser = argparse.ArgumentParser(
description='Website availibility checker.')
parser.add_argument('--config', type=str,
default="/etc/checker.yaml",
help=('The yaml config file. '
'Defaults to /etc/checker.yaml'))
args = parser.parse_args()
with open(args.config, 'r') as conf_file:
config = yaml.load(conf_file, Loader=yaml.FullLoader)
return Config(**config)
def run(Service):
"""
Runs a service in an event loop.
"""
loop = asyncio.get_event_loop()
queue = asyncio.Queue()
config = configure()
service = Service(config, loop, queue)
service.run()
def collect():
"""
Main producer event loop.
"""
run(Collector)
def consume():
"""
Main consumer event loop.
"""
run(Consumer)

90
src/chweb/collector.py Normal file
View file

@ -0,0 +1,90 @@
"""
Checks status of web servers and sends them to a configured Kafka topic.
"""
import asyncio
import json
import re
from typing import Optional
from urllib.parse import urlparse
import aiokafka # type: ignore
import requests
from chweb.models import Config, Check
class Collector:
"""
A class that contains all methods needed to check the statuses of all
websites present in the config.
"""
def __init__(self, config: Config,
event_loop: asyncio.AbstractEventLoop,
queue: asyncio.Queue):
self.config = config
self.loop = event_loop
self.queue = queue
async def check(self, url: str, regex: Optional[str]) -> Check:
"""
Checks the status of a website and optionally matches a regex on the
response body.
:param url: The URL of the site that needs to be checked.
:param regex: An optional regex to match on the response body.
:returns: A dict ready to be sent to the queue for further processing.
"""
res = await self.loop.run_in_executor(None, requests.get, url)
matches = None # The matches value should be None since the regex can
# be ommited from the config.
if regex is not None:
matches = re.search(regex, res.text) is not None
return Check(
domain=urlparse(res.url).netloc,
regex=regex,
response_time=res.elapsed.microseconds,
regex_matches=matches,
status=res.status_code,
url=res.url,
)
async def check_forever(self, site):
"""
A void function that gets the status of a site and sends it to an
``asyncio.Queue`` for further processing (sending to a Kafka topic).
:param site: A site object from the config.
"""
while True:
data = await self.check(site.url, site.regex)
self.queue.put_nowait(data)
await asyncio.sleep(site.check_interval)
async def produce(self):
"""
Creates and starts an ``aiokafka.AIOKafkaProducer`` and runs a loop that
reads from the ``queue`` and sends the messages to the topic from the
``config``.
"""
producer = aiokafka.AIOKafkaProducer(
loop=self.loop,
bootstrap_servers=self.config.kafka.servers)
await producer.start()
try:
while True:
check = await self.queue.get()
msg = bytes(check.json().encode("utf-8"))
await producer.send_and_wait(self.config.kafka.topic, msg)
finally:
await producer.stop()
def run(self):
"""
Runs all tasks in the event loop.
"""
def create_task(site) -> asyncio.Task:
return self.loop.create_task(self.check_forever(site))
tasks = list(map(create_task, self.config.sites))
tasks.append(self.loop.create_task(self.produce()))
self.loop.run_until_complete(asyncio.gather(*tasks))

62
src/chweb/consumer.py Normal file
View file

@ -0,0 +1,62 @@
"""
Sample consumer.
"""
import asyncio
import json
from typing import Any, Dict
import aiokafka # type: ignore
import asyncpg # type: ignore
from chweb.models import Check
class Consumer:
def __init__(self, config: Dict[str, Any],
event_loop: asyncio.AbstractEventLoop,
queue: asyncio.Queue):
self.config = config
self.loop = event_loop
self.queue = queue
async def consume(self):
"""
Consumes messages from a Kafka topic.
"""
consumer = aiokafka.AIOKafkaConsumer(
self.config.kafka.topic,
loop=self.loop,
bootstrap_servers=self.config.kafka.servers)
await consumer.start()
try:
# Consume messages
async for msg in consumer:
self.queue.put_nowait(Check(**json.loads(msg.value)))
finally:
# Will leave consumer group; perform autocommit if enabled.
await consumer.stop()
async def save(self, pool, data):
async with pool.acquire() as conn:
async with conn.cursor() as cur:
await cur.execute("SELECT 1")
async def write(self):
try:
while True:
status = await self.queue.get()
print(status)
finally:
print("EXITED!")
def run(self):
"""
Runs all tasks in the event loop.
"""
tasks = [
self.loop.create_task(self.consume()),
self.loop.create_task(self.write()),
]
self.loop.run_until_complete(asyncio.gather(*tasks))

62
src/chweb/models.py Normal file
View file

@ -0,0 +1,62 @@
# pylint: disable=too-few-public-methods
"""
Schemas that are used in all modules. This module contains classes for:
- Configuring the ``chweb.collector.Collector``.
- Configuring the ``chweb.consumer.Consumer``.
- The schema for the stats being sent in the Kafka topic.
"""
from datetime import datetime
from typing import List, Optional
from pydantic import BaseModel
class Check(BaseModel):
"""
Information for a website check request.
"""
domain: str = ""
regex: Optional[str] = None
regex_matches: Optional[bool] = None
request_time: datetime = datetime.now()
response_time: int = 0
status: int = 0
url: str = ""
class KafkaConfig(BaseModel):
"""
Kafka broker configuration.
"""
servers: List[str] = ["localhost:9992"]
topic: str = "sample"
class PostgresConfig(BaseModel):
"""
PostgreSQL server configuration.
"""
dbhost: str = "localhost"
dbport: int = 5432
dbname: str = "chweb"
dbuser: str = "vladan"
dbpass: str = ""
class SiteConfig(BaseModel):
"""
Single website configuration.
"""
url: str = "https://example.com"
regex: str = "domain"
check_interval: int = 5
class Config(BaseModel):
"""
Main application configuration. Same for the checker and the kafka
consumer / postgres writer for simplicity while deploying.
"""
kafka: KafkaConfig = KafkaConfig()
postgres: PostgresConfig = PostgresConfig()
sites: List[SiteConfig] = []