From cd6a53be5d31df4d07516902ec1c4a413eefe65a Mon Sep 17 00:00:00 2001 From: sermuns <sermuns@lysator.liu.se> Date: Fri, 25 Apr 2025 22:17:16 +0200 Subject: [PATCH] Done? --- fetch.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++ main.py | 77 +------------------------------------------------------- 2 files changed, 64 insertions(+), 76 deletions(-) create mode 100644 fetch.py diff --git a/fetch.py b/fetch.py new file mode 100644 index 0000000..34ac387 --- /dev/null +++ b/fetch.py @@ -0,0 +1,63 @@ +import aiohttp +import asyncio +import json +from urllib.parse import urlparse + +BASE_URL = ( + "http://milou.lysator.liu.se:49001/v1/projects/e511835c-a77f-414c-8817-26fb5d7b99b8" +) + + +async def get_all_builds(session): + url = f"{BASE_URL}/builds?limit=1000" + async with session.get(url) as response: + response.raise_for_status() + return await response.json() + + +async def get_build_statistics(session, build_id): + url = f"{BASE_URL}/builds/{build_id}/statistics" + async with session.get(url) as response: + response.raise_for_status() + return await response.json() + + +async def scrape_lcp_values(): + async with aiohttp.ClientSession() as session: + builds = await get_all_builds(session) + lcp_values_by_branch = {} + + tasks = [get_build_statistics(session, b["id"]) for b in builds] + statistics_list = await asyncio.gather(*tasks) + + for build, statistics in zip(builds, statistics_list): + branch = build["branch"] + lcp_stat = next( + ( + s + for s in statistics + if s["name"] == "audit_largest-contentful-paint_median" + ), + None, + ) + if not lcp_stat: + continue + + path = urlparse(statistics[0]["url"]).path[1:] or "index" + entry = { + "lcpMillis": lcp_stat["value"], + "createdAt": build["createdAt"], + "buildId": build["id"], + } + + lcp_values_by_branch.setdefault(branch, {}).setdefault(path, []).append( + entry + ) + + return lcp_values_by_branch + + +if __name__ == "__main__": + data = asyncio.run(scrape_lcp_values()) + with open("lcp_values_by_branch_and_url.json", "w") as f: + json.dump(data, f, indent=2) diff --git a/main.py b/main.py index c277190..6c3b70e 100644 --- a/main.py +++ b/main.py @@ -1,79 +1,8 @@ -import aiohttp -import asyncio import json -from urllib.parse import urlparse from flask import Flask, jsonify, render_template_string app = Flask(__name__) -base_url = ( - "http://milou.lysator.liu.se:49001/v1/projects/e511835c-a77f-414c-8817-26fb5d7b99b8" -) - - -async def get_all_builds(session): - url = f"{base_url}/builds?limit=1000" - async with session.get(url) as response: - response.raise_for_status() - return await response.json() - - -async def get_build_statistics(session, build_id): - url = f"{base_url}/builds/{build_id}/statistics" - async with session.get(url) as response: - response.raise_for_status() - return await response.json() - - -async def scrape_lcp_values(): - async with aiohttp.ClientSession() as session: - builds = await get_all_builds(session) - lcp_values_by_branch = {} - - tasks = [] - for build in builds: - build_id = build["id"] - tasks.append(get_build_statistics(session, build_id)) - - statistics_list = await asyncio.gather(*tasks) - - for build, statistics in zip(builds, statistics_list): - branch = build["branch"] - - for stat in statistics: - lcp_stat = next( - ( - s - for s in statistics - if s["name"] == "audit_largest-contentful-paint_median" - ), - None, - ) - - if not lcp_stat: - continue - - path = urlparse(stat["url"]).path[1:] - - if path == "": - path = "index" - - lcp_entry = { - "lcpMillis": lcp_stat["value"], - "createdAt": build["createdAt"], - "buildId": build["id"], - } - - if branch not in lcp_values_by_branch: - lcp_values_by_branch[branch] = {} - - if path not in lcp_values_by_branch[branch]: - lcp_values_by_branch[branch][path] = [] - - lcp_values_by_branch[branch][path].append(lcp_entry) - - return lcp_values_by_branch - @app.route("/<branch>/<path>", methods=["GET"]) def serve_plot(branch, path): @@ -223,8 +152,4 @@ def serve_branch(branch): if __name__ == "__main__": - lcp_values_by_branch = asyncio.run(scrape_lcp_values()) - with open("lcp_values_by_branch_and_url.json", "w") as f: - json.dump(lcp_values_by_branch, f, indent=4) - - app.run(debug=True, host="127.0.0.1", port=8000) + app.run(host="0.0.0.0", port=49002) -- GitLab