Refactor, rename and fix pydantic errors
This commit is contained in:
parent
d9b04b9468
commit
60a944cf5e
8 changed files with 105 additions and 74 deletions
13
config.yaml
13
config.yaml
|
@ -1,6 +1,13 @@
|
||||||
kafka_servers:
|
kafka:
|
||||||
- "localhost:9992"
|
servers:
|
||||||
kafka_topic: "sample"
|
- "localhost:9992"
|
||||||
|
topic: "sample"
|
||||||
|
postgres:
|
||||||
|
dbhost: "localhost"
|
||||||
|
dbport: 5432
|
||||||
|
dbname: "chweb"
|
||||||
|
dbuser: "vladan"
|
||||||
|
dbpass: ""
|
||||||
sites:
|
sites:
|
||||||
- url: "https://example.com"
|
- url: "https://example.com"
|
||||||
regex: "domain"
|
regex: "domain"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[metadata]
|
[metadata]
|
||||||
name = webstat
|
name = chweb
|
||||||
summary = Tool for collecting website stats.
|
summary = Tool for checking websites and collecting the results.
|
||||||
description-file = README.rst
|
description-file = README.rst
|
||||||
author = Vladan Popovic
|
author = Vladan Popovic
|
||||||
author-email = vladanovic@gmail.com
|
author-email = vladanovic@gmail.com
|
||||||
|
@ -19,8 +19,8 @@ where=src
|
||||||
|
|
||||||
[options.entry_points]
|
[options.entry_points]
|
||||||
console_scripts =
|
console_scripts =
|
||||||
wstat_collect = webstat.cmd:collect
|
chweb_collect = chweb.cmd:collect
|
||||||
wstat_consume = webstat.cmd:consume
|
chweb_consume = chweb.cmd:consume
|
||||||
|
|
||||||
[bdist_wheel]
|
[bdist_wheel]
|
||||||
universal = 1
|
universal = 1
|
||||||
|
|
51
src/chweb/cmd.py
Normal file
51
src/chweb/cmd.py
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
"""
|
||||||
|
A module containing all console script functions.
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from chweb.collector import Collector
|
||||||
|
from chweb.consumer import Consumer
|
||||||
|
from chweb.models import Config
|
||||||
|
|
||||||
|
|
||||||
|
def configure() -> Config:
|
||||||
|
"""
|
||||||
|
Gets the configuration and creates a Pydantic model from the parsed YAML.
|
||||||
|
"""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Website availibility checker.')
|
||||||
|
parser.add_argument('--config', type=str,
|
||||||
|
default="/etc/checker.yaml",
|
||||||
|
help=('The yaml config file. '
|
||||||
|
'Defaults to /etc/checker.yaml'))
|
||||||
|
args = parser.parse_args()
|
||||||
|
with open(args.config, 'r') as conf_file:
|
||||||
|
config = yaml.load(conf_file, Loader=yaml.FullLoader)
|
||||||
|
return Config(**config)
|
||||||
|
|
||||||
|
|
||||||
|
def run(Service):
|
||||||
|
"""
|
||||||
|
Runs a service in an event loop.
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
queue = asyncio.Queue()
|
||||||
|
config = configure()
|
||||||
|
service = Service(config, loop, queue)
|
||||||
|
service.run()
|
||||||
|
|
||||||
|
|
||||||
|
def collect():
|
||||||
|
"""
|
||||||
|
Main producer event loop.
|
||||||
|
"""
|
||||||
|
run(Collector)
|
||||||
|
|
||||||
|
|
||||||
|
def consume():
|
||||||
|
"""
|
||||||
|
Main consumer event loop.
|
||||||
|
"""
|
||||||
|
run(Consumer)
|
|
@ -4,25 +4,28 @@ Checks status of web servers and sends them to a configured Kafka topic.
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Optional
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import aiokafka # type: ignore
|
import aiokafka # type: ignore
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from chweb.models import Config, Check
|
||||||
|
|
||||||
|
|
||||||
class Collector:
|
class Collector:
|
||||||
"""
|
"""
|
||||||
A class that contains all methods needed to check the statuses of all
|
A class that contains all methods needed to check the statuses of all
|
||||||
websites present in the config.
|
websites present in the config.
|
||||||
"""
|
"""
|
||||||
def __init__(self, config: Dict[str, Any],
|
def __init__(self, config: Config,
|
||||||
event_loop: asyncio.AbstractEventLoop,
|
event_loop: asyncio.AbstractEventLoop,
|
||||||
queue: asyncio.Queue):
|
queue: asyncio.Queue):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.loop = event_loop
|
self.loop = event_loop
|
||||||
self.queue = queue
|
self.queue = queue
|
||||||
|
|
||||||
async def get_status(self, url: str, regex: Optional[str]) -> Dict[str, Any]:
|
async def check(self, url: str, regex: Optional[str]) -> Check:
|
||||||
"""
|
"""
|
||||||
Checks the status of a website and optionally matches a regex on the
|
Checks the status of a website and optionally matches a regex on the
|
||||||
response body.
|
response body.
|
||||||
|
@ -36,15 +39,16 @@ class Collector:
|
||||||
# be ommited from the config.
|
# be ommited from the config.
|
||||||
if regex is not None:
|
if regex is not None:
|
||||||
matches = re.search(regex, res.text) is not None
|
matches = re.search(regex, res.text) is not None
|
||||||
return {
|
return Check(
|
||||||
'url': url,
|
domain=urlparse(res.url).netloc,
|
||||||
'regex': regex,
|
regex=regex,
|
||||||
'status': res.status_code,
|
response_time=res.elapsed.microseconds,
|
||||||
'response_time': res.elapsed.microseconds,
|
regex_matches=matches,
|
||||||
'regex_matches': matches,
|
status=res.status_code,
|
||||||
}
|
url=res.url,
|
||||||
|
)
|
||||||
|
|
||||||
async def create_periodic_task(self, site):
|
async def check_forever(self, site):
|
||||||
"""
|
"""
|
||||||
A void function that gets the status of a site and sends it to an
|
A void function that gets the status of a site and sends it to an
|
||||||
``asyncio.Queue`` for further processing (sending to a Kafka topic).
|
``asyncio.Queue`` for further processing (sending to a Kafka topic).
|
||||||
|
@ -52,9 +56,9 @@ class Collector:
|
||||||
:param site: A site object from the config.
|
:param site: A site object from the config.
|
||||||
"""
|
"""
|
||||||
while True:
|
while True:
|
||||||
data = await self.get_status(site["url"], site.get("regex"))
|
data = await self.check(site.url, site.regex)
|
||||||
self.queue.put_nowait(data)
|
self.queue.put_nowait(data)
|
||||||
await asyncio.sleep(site["check_interval"])
|
await asyncio.sleep(site.check_interval)
|
||||||
|
|
||||||
async def produce(self):
|
async def produce(self):
|
||||||
"""
|
"""
|
||||||
|
@ -64,23 +68,23 @@ class Collector:
|
||||||
"""
|
"""
|
||||||
producer = aiokafka.AIOKafkaProducer(
|
producer = aiokafka.AIOKafkaProducer(
|
||||||
loop=self.loop,
|
loop=self.loop,
|
||||||
bootstrap_servers=self.config["kafka_servers"])
|
bootstrap_servers=self.config.kafka.servers)
|
||||||
|
|
||||||
await producer.start()
|
await producer.start()
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
status = await self.queue.get()
|
check = await self.queue.get()
|
||||||
msg = bytes(json.dumps(status).encode("utf-8"))
|
msg = bytes(check.json().encode("utf-8"))
|
||||||
await producer.send_and_wait(self.config["kafka_topic"], msg)
|
await producer.send_and_wait(self.config.kafka.topic, msg)
|
||||||
finally:
|
finally:
|
||||||
await producer.stop()
|
await producer.stop()
|
||||||
|
|
||||||
def tasks(self) -> List[asyncio.Task]:
|
def run(self):
|
||||||
"""
|
"""
|
||||||
Creates a task for every site.
|
Runs all tasks in the event loop.
|
||||||
"""
|
"""
|
||||||
def create_task(site) -> asyncio.Task:
|
def create_task(site) -> asyncio.Task:
|
||||||
return self.loop.create_task(self.create_periodic_task(site))
|
return self.loop.create_task(self.check_forever(site))
|
||||||
tasks = list(map(create_task, self.config["sites"]))
|
tasks = list(map(create_task, self.config.sites))
|
||||||
tasks.append(self.loop.create_task(self.produce()))
|
tasks.append(self.loop.create_task(self.produce()))
|
||||||
return tasks
|
self.loop.run_until_complete(asyncio.gather(*tasks))
|
|
@ -3,11 +3,13 @@ Sample consumer.
|
||||||
"""
|
"""
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict
|
||||||
|
|
||||||
import aiokafka # type: ignore
|
import aiokafka # type: ignore
|
||||||
import asyncpg # type: ignore
|
import asyncpg # type: ignore
|
||||||
|
|
||||||
|
from chweb.models import Check
|
||||||
|
|
||||||
|
|
||||||
class Consumer:
|
class Consumer:
|
||||||
def __init__(self, config: Dict[str, Any],
|
def __init__(self, config: Dict[str, Any],
|
||||||
|
@ -22,15 +24,15 @@ class Consumer:
|
||||||
Consumes messages from a Kafka topic.
|
Consumes messages from a Kafka topic.
|
||||||
"""
|
"""
|
||||||
consumer = aiokafka.AIOKafkaConsumer(
|
consumer = aiokafka.AIOKafkaConsumer(
|
||||||
self.config['kafka_topic'],
|
self.config.kafka.topic,
|
||||||
loop=self.loop,
|
loop=self.loop,
|
||||||
bootstrap_servers=self.config['kafka_servers'])
|
bootstrap_servers=self.config.kafka.servers)
|
||||||
|
|
||||||
await consumer.start()
|
await consumer.start()
|
||||||
try:
|
try:
|
||||||
# Consume messages
|
# Consume messages
|
||||||
async for msg in consumer:
|
async for msg in consumer:
|
||||||
self.queue.put_nowait(json.loads(msg.value))
|
self.queue.put_nowait(Check(**json.loads(msg.value)))
|
||||||
finally:
|
finally:
|
||||||
# Will leave consumer group; perform autocommit if enabled.
|
# Will leave consumer group; perform autocommit if enabled.
|
||||||
await consumer.stop()
|
await consumer.stop()
|
||||||
|
@ -49,11 +51,12 @@ class Consumer:
|
||||||
finally:
|
finally:
|
||||||
print("EXITED!")
|
print("EXITED!")
|
||||||
|
|
||||||
def tasks(self) -> List[asyncio.Task]:
|
def run(self):
|
||||||
"""
|
"""
|
||||||
Creates tasks for reading from the Kafka topic and writing in
|
Runs all tasks in the event loop.
|
||||||
PostgreSQL.
|
|
||||||
"""
|
"""
|
||||||
kafka_consumer = self.loop.create_task(self.consume())
|
tasks = [
|
||||||
psql_writer = self.loop.create_task(self.write())
|
self.loop.create_task(self.consume()),
|
||||||
return [kafka_consumer, psql_writer]
|
self.loop.create_task(self.write()),
|
||||||
|
]
|
||||||
|
self.loop.run_until_complete(asyncio.gather(*tasks))
|
|
@ -1,34 +0,0 @@
|
||||||
"""
|
|
||||||
A module containing all console script functions.
|
|
||||||
"""
|
|
||||||
import asyncio
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
from webstat.collector import Collector
|
|
||||||
from webstat.consumer import Consumer
|
|
||||||
|
|
||||||
|
|
||||||
def run(Service):
|
|
||||||
"""
|
|
||||||
A factory kinda that runs both services in an event loop.
|
|
||||||
"""
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
queue = asyncio.Queue()
|
|
||||||
with open('config.yaml', 'r') as conf_file:
|
|
||||||
config = yaml.load(conf_file, Loader=yaml.FullLoader)
|
|
||||||
tasks = Service(config, loop, queue).tasks()
|
|
||||||
loop.run_until_complete(asyncio.gather(*tasks))
|
|
||||||
|
|
||||||
|
|
||||||
def collect():
|
|
||||||
"""
|
|
||||||
Main producer event loop.
|
|
||||||
"""
|
|
||||||
run(Collector)
|
|
||||||
|
|
||||||
|
|
||||||
def consume():
|
|
||||||
"""
|
|
||||||
Main consumer event loop.
|
|
||||||
"""
|
|
||||||
run(Consumer)
|
|
2
tox.ini
2
tox.ini
|
@ -8,7 +8,7 @@ deps =
|
||||||
pytest-cov
|
pytest-cov
|
||||||
pytest-mock
|
pytest-mock
|
||||||
commands =
|
commands =
|
||||||
pytest --cov=webstat --cov-append --cov-report=term-missing {posargs}
|
pytest --cov=chweb --cov-append --cov-report=term-missing {posargs}
|
||||||
|
|
||||||
[testenv:lint]
|
[testenv:lint]
|
||||||
deps = pylint
|
deps = pylint
|
||||||
|
|
Loading…
Reference in a new issue