Refactor, rename and fix pydantic errors
This commit is contained in:
parent
d9b04b9468
commit
60a944cf5e
8 changed files with 105 additions and 74 deletions
13
config.yaml
13
config.yaml
|
@ -1,6 +1,13 @@
|
|||
kafka_servers:
|
||||
- "localhost:9992"
|
||||
kafka_topic: "sample"
|
||||
kafka:
|
||||
servers:
|
||||
- "localhost:9992"
|
||||
topic: "sample"
|
||||
postgres:
|
||||
dbhost: "localhost"
|
||||
dbport: 5432
|
||||
dbname: "chweb"
|
||||
dbuser: "vladan"
|
||||
dbpass: ""
|
||||
sites:
|
||||
- url: "https://example.com"
|
||||
regex: "domain"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[metadata]
|
||||
name = webstat
|
||||
summary = Tool for collecting website stats.
|
||||
name = chweb
|
||||
summary = Tool for checking websites and collecting the results.
|
||||
description-file = README.rst
|
||||
author = Vladan Popovic
|
||||
author-email = vladanovic@gmail.com
|
||||
|
@ -19,8 +19,8 @@ where=src
|
|||
|
||||
[options.entry_points]
|
||||
console_scripts =
|
||||
wstat_collect = webstat.cmd:collect
|
||||
wstat_consume = webstat.cmd:consume
|
||||
chweb_collect = chweb.cmd:collect
|
||||
chweb_consume = chweb.cmd:consume
|
||||
|
||||
[bdist_wheel]
|
||||
universal = 1
|
||||
|
|
51
src/chweb/cmd.py
Normal file
51
src/chweb/cmd.py
Normal file
|
@ -0,0 +1,51 @@
|
|||
"""
|
||||
A module containing all console script functions.
|
||||
"""
|
||||
import argparse
|
||||
import asyncio
|
||||
import yaml
|
||||
|
||||
from chweb.collector import Collector
|
||||
from chweb.consumer import Consumer
|
||||
from chweb.models import Config
|
||||
|
||||
|
||||
def configure() -> Config:
|
||||
"""
|
||||
Gets the configuration and creates a Pydantic model from the parsed YAML.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Website availibility checker.')
|
||||
parser.add_argument('--config', type=str,
|
||||
default="/etc/checker.yaml",
|
||||
help=('The yaml config file. '
|
||||
'Defaults to /etc/checker.yaml'))
|
||||
args = parser.parse_args()
|
||||
with open(args.config, 'r') as conf_file:
|
||||
config = yaml.load(conf_file, Loader=yaml.FullLoader)
|
||||
return Config(**config)
|
||||
|
||||
|
||||
def run(Service):
|
||||
"""
|
||||
Runs a service in an event loop.
|
||||
"""
|
||||
loop = asyncio.get_event_loop()
|
||||
queue = asyncio.Queue()
|
||||
config = configure()
|
||||
service = Service(config, loop, queue)
|
||||
service.run()
|
||||
|
||||
|
||||
def collect():
|
||||
"""
|
||||
Main producer event loop.
|
||||
"""
|
||||
run(Collector)
|
||||
|
||||
|
||||
def consume():
|
||||
"""
|
||||
Main consumer event loop.
|
||||
"""
|
||||
run(Consumer)
|
|
@ -4,25 +4,28 @@ Checks status of web servers and sends them to a configured Kafka topic.
|
|||
import asyncio
|
||||
import json
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import aiokafka # type: ignore
|
||||
import requests
|
||||
|
||||
from chweb.models import Config, Check
|
||||
|
||||
|
||||
class Collector:
|
||||
"""
|
||||
A class that contains all methods needed to check the statuses of all
|
||||
websites present in the config.
|
||||
"""
|
||||
def __init__(self, config: Dict[str, Any],
|
||||
def __init__(self, config: Config,
|
||||
event_loop: asyncio.AbstractEventLoop,
|
||||
queue: asyncio.Queue):
|
||||
self.config = config
|
||||
self.loop = event_loop
|
||||
self.queue = queue
|
||||
|
||||
async def get_status(self, url: str, regex: Optional[str]) -> Dict[str, Any]:
|
||||
async def check(self, url: str, regex: Optional[str]) -> Check:
|
||||
"""
|
||||
Checks the status of a website and optionally matches a regex on the
|
||||
response body.
|
||||
|
@ -36,15 +39,16 @@ class Collector:
|
|||
# be ommited from the config.
|
||||
if regex is not None:
|
||||
matches = re.search(regex, res.text) is not None
|
||||
return {
|
||||
'url': url,
|
||||
'regex': regex,
|
||||
'status': res.status_code,
|
||||
'response_time': res.elapsed.microseconds,
|
||||
'regex_matches': matches,
|
||||
}
|
||||
return Check(
|
||||
domain=urlparse(res.url).netloc,
|
||||
regex=regex,
|
||||
response_time=res.elapsed.microseconds,
|
||||
regex_matches=matches,
|
||||
status=res.status_code,
|
||||
url=res.url,
|
||||
)
|
||||
|
||||
async def create_periodic_task(self, site):
|
||||
async def check_forever(self, site):
|
||||
"""
|
||||
A void function that gets the status of a site and sends it to an
|
||||
``asyncio.Queue`` for further processing (sending to a Kafka topic).
|
||||
|
@ -52,9 +56,9 @@ class Collector:
|
|||
:param site: A site object from the config.
|
||||
"""
|
||||
while True:
|
||||
data = await self.get_status(site["url"], site.get("regex"))
|
||||
data = await self.check(site.url, site.regex)
|
||||
self.queue.put_nowait(data)
|
||||
await asyncio.sleep(site["check_interval"])
|
||||
await asyncio.sleep(site.check_interval)
|
||||
|
||||
async def produce(self):
|
||||
"""
|
||||
|
@ -64,23 +68,23 @@ class Collector:
|
|||
"""
|
||||
producer = aiokafka.AIOKafkaProducer(
|
||||
loop=self.loop,
|
||||
bootstrap_servers=self.config["kafka_servers"])
|
||||
bootstrap_servers=self.config.kafka.servers)
|
||||
|
||||
await producer.start()
|
||||
try:
|
||||
while True:
|
||||
status = await self.queue.get()
|
||||
msg = bytes(json.dumps(status).encode("utf-8"))
|
||||
await producer.send_and_wait(self.config["kafka_topic"], msg)
|
||||
check = await self.queue.get()
|
||||
msg = bytes(check.json().encode("utf-8"))
|
||||
await producer.send_and_wait(self.config.kafka.topic, msg)
|
||||
finally:
|
||||
await producer.stop()
|
||||
|
||||
def tasks(self) -> List[asyncio.Task]:
|
||||
def run(self):
|
||||
"""
|
||||
Creates a task for every site.
|
||||
Runs all tasks in the event loop.
|
||||
"""
|
||||
def create_task(site) -> asyncio.Task:
|
||||
return self.loop.create_task(self.create_periodic_task(site))
|
||||
tasks = list(map(create_task, self.config["sites"]))
|
||||
return self.loop.create_task(self.check_forever(site))
|
||||
tasks = list(map(create_task, self.config.sites))
|
||||
tasks.append(self.loop.create_task(self.produce()))
|
||||
return tasks
|
||||
self.loop.run_until_complete(asyncio.gather(*tasks))
|
|
@ -3,11 +3,13 @@ Sample consumer.
|
|||
"""
|
||||
import asyncio
|
||||
import json
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict
|
||||
|
||||
import aiokafka # type: ignore
|
||||
import asyncpg # type: ignore
|
||||
|
||||
from chweb.models import Check
|
||||
|
||||
|
||||
class Consumer:
|
||||
def __init__(self, config: Dict[str, Any],
|
||||
|
@ -22,15 +24,15 @@ class Consumer:
|
|||
Consumes messages from a Kafka topic.
|
||||
"""
|
||||
consumer = aiokafka.AIOKafkaConsumer(
|
||||
self.config['kafka_topic'],
|
||||
self.config.kafka.topic,
|
||||
loop=self.loop,
|
||||
bootstrap_servers=self.config['kafka_servers'])
|
||||
bootstrap_servers=self.config.kafka.servers)
|
||||
|
||||
await consumer.start()
|
||||
try:
|
||||
# Consume messages
|
||||
async for msg in consumer:
|
||||
self.queue.put_nowait(json.loads(msg.value))
|
||||
self.queue.put_nowait(Check(**json.loads(msg.value)))
|
||||
finally:
|
||||
# Will leave consumer group; perform autocommit if enabled.
|
||||
await consumer.stop()
|
||||
|
@ -49,11 +51,12 @@ class Consumer:
|
|||
finally:
|
||||
print("EXITED!")
|
||||
|
||||
def tasks(self) -> List[asyncio.Task]:
|
||||
def run(self):
|
||||
"""
|
||||
Creates tasks for reading from the Kafka topic and writing in
|
||||
PostgreSQL.
|
||||
Runs all tasks in the event loop.
|
||||
"""
|
||||
kafka_consumer = self.loop.create_task(self.consume())
|
||||
psql_writer = self.loop.create_task(self.write())
|
||||
return [kafka_consumer, psql_writer]
|
||||
tasks = [
|
||||
self.loop.create_task(self.consume()),
|
||||
self.loop.create_task(self.write()),
|
||||
]
|
||||
self.loop.run_until_complete(asyncio.gather(*tasks))
|
|
@ -1,34 +0,0 @@
|
|||
"""
|
||||
A module containing all console script functions.
|
||||
"""
|
||||
import asyncio
|
||||
import yaml
|
||||
|
||||
from webstat.collector import Collector
|
||||
from webstat.consumer import Consumer
|
||||
|
||||
|
||||
def run(Service):
|
||||
"""
|
||||
A factory kinda that runs both services in an event loop.
|
||||
"""
|
||||
loop = asyncio.get_event_loop()
|
||||
queue = asyncio.Queue()
|
||||
with open('config.yaml', 'r') as conf_file:
|
||||
config = yaml.load(conf_file, Loader=yaml.FullLoader)
|
||||
tasks = Service(config, loop, queue).tasks()
|
||||
loop.run_until_complete(asyncio.gather(*tasks))
|
||||
|
||||
|
||||
def collect():
|
||||
"""
|
||||
Main producer event loop.
|
||||
"""
|
||||
run(Collector)
|
||||
|
||||
|
||||
def consume():
|
||||
"""
|
||||
Main consumer event loop.
|
||||
"""
|
||||
run(Consumer)
|
2
tox.ini
2
tox.ini
|
@ -8,7 +8,7 @@ deps =
|
|||
pytest-cov
|
||||
pytest-mock
|
||||
commands =
|
||||
pytest --cov=webstat --cov-append --cov-report=term-missing {posargs}
|
||||
pytest --cov=chweb --cov-append --cov-report=term-missing {posargs}
|
||||
|
||||
[testenv:lint]
|
||||
deps = pylint
|
||||
|
|
Loading…
Add table
Reference in a new issue