Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
scrape-lhci-server
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Samuel Åkesson
scrape-lhci-server
Commits
cd6a53be
Commit
cd6a53be
authored
3 months ago
by
Samuel Åkesson
Browse files
Options
Downloads
Patches
Plain Diff
Done?
parent
d2d1d99b
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
fetch.py
+63
-0
63 additions, 0 deletions
fetch.py
main.py
+1
-76
1 addition, 76 deletions
main.py
with
64 additions
and
76 deletions
fetch.py
0 → 100644
+
63
−
0
View file @
cd6a53be
import
aiohttp
import
asyncio
import
json
from
urllib.parse
import
urlparse
BASE_URL
=
(
"
http://milou.lysator.liu.se:49001/v1/projects/e511835c-a77f-414c-8817-26fb5d7b99b8
"
)
async
def
get_all_builds
(
session
):
url
=
f
"
{
BASE_URL
}
/builds?limit=1000
"
async
with
session
.
get
(
url
)
as
response
:
response
.
raise_for_status
()
return
await
response
.
json
()
async
def
get_build_statistics
(
session
,
build_id
):
url
=
f
"
{
BASE_URL
}
/builds/
{
build_id
}
/statistics
"
async
with
session
.
get
(
url
)
as
response
:
response
.
raise_for_status
()
return
await
response
.
json
()
async
def
scrape_lcp_values
():
async
with
aiohttp
.
ClientSession
()
as
session
:
builds
=
await
get_all_builds
(
session
)
lcp_values_by_branch
=
{}
tasks
=
[
get_build_statistics
(
session
,
b
[
"
id
"
])
for
b
in
builds
]
statistics_list
=
await
asyncio
.
gather
(
*
tasks
)
for
build
,
statistics
in
zip
(
builds
,
statistics_list
):
branch
=
build
[
"
branch
"
]
lcp_stat
=
next
(
(
s
for
s
in
statistics
if
s
[
"
name
"
]
==
"
audit_largest-contentful-paint_median
"
),
None
,
)
if
not
lcp_stat
:
continue
path
=
urlparse
(
statistics
[
0
][
"
url
"
]).
path
[
1
:]
or
"
index
"
entry
=
{
"
lcpMillis
"
:
lcp_stat
[
"
value
"
],
"
createdAt
"
:
build
[
"
createdAt
"
],
"
buildId
"
:
build
[
"
id
"
],
}
lcp_values_by_branch
.
setdefault
(
branch
,
{}).
setdefault
(
path
,
[]).
append
(
entry
)
return
lcp_values_by_branch
if
__name__
==
"
__main__
"
:
data
=
asyncio
.
run
(
scrape_lcp_values
())
with
open
(
"
lcp_values_by_branch_and_url.json
"
,
"
w
"
)
as
f
:
json
.
dump
(
data
,
f
,
indent
=
2
)
This diff is collapsed.
Click to expand it.
main.py
+
1
−
76
View file @
cd6a53be
import
aiohttp
import
asyncio
import
json
import
json
from
urllib.parse
import
urlparse
from
flask
import
Flask
,
jsonify
,
render_template_string
from
flask
import
Flask
,
jsonify
,
render_template_string
app
=
Flask
(
__name__
)
app
=
Flask
(
__name__
)
base_url
=
(
"
http://milou.lysator.liu.se:49001/v1/projects/e511835c-a77f-414c-8817-26fb5d7b99b8
"
)
async
def
get_all_builds
(
session
):
url
=
f
"
{
base_url
}
/builds?limit=1000
"
async
with
session
.
get
(
url
)
as
response
:
response
.
raise_for_status
()
return
await
response
.
json
()
async
def
get_build_statistics
(
session
,
build_id
):
url
=
f
"
{
base_url
}
/builds/
{
build_id
}
/statistics
"
async
with
session
.
get
(
url
)
as
response
:
response
.
raise_for_status
()
return
await
response
.
json
()
async
def
scrape_lcp_values
():
async
with
aiohttp
.
ClientSession
()
as
session
:
builds
=
await
get_all_builds
(
session
)
lcp_values_by_branch
=
{}
tasks
=
[]
for
build
in
builds
:
build_id
=
build
[
"
id
"
]
tasks
.
append
(
get_build_statistics
(
session
,
build_id
))
statistics_list
=
await
asyncio
.
gather
(
*
tasks
)
for
build
,
statistics
in
zip
(
builds
,
statistics_list
):
branch
=
build
[
"
branch
"
]
for
stat
in
statistics
:
lcp_stat
=
next
(
(
s
for
s
in
statistics
if
s
[
"
name
"
]
==
"
audit_largest-contentful-paint_median
"
),
None
,
)
if
not
lcp_stat
:
continue
path
=
urlparse
(
stat
[
"
url
"
]).
path
[
1
:]
if
path
==
""
:
path
=
"
index
"
lcp_entry
=
{
"
lcpMillis
"
:
lcp_stat
[
"
value
"
],
"
createdAt
"
:
build
[
"
createdAt
"
],
"
buildId
"
:
build
[
"
id
"
],
}
if
branch
not
in
lcp_values_by_branch
:
lcp_values_by_branch
[
branch
]
=
{}
if
path
not
in
lcp_values_by_branch
[
branch
]:
lcp_values_by_branch
[
branch
][
path
]
=
[]
lcp_values_by_branch
[
branch
][
path
].
append
(
lcp_entry
)
return
lcp_values_by_branch
@app.route
(
"
/<branch>/<path>
"
,
methods
=
[
"
GET
"
])
@app.route
(
"
/<branch>/<path>
"
,
methods
=
[
"
GET
"
])
def
serve_plot
(
branch
,
path
):
def
serve_plot
(
branch
,
path
):
...
@@ -223,8 +152,4 @@ def serve_branch(branch):
...
@@ -223,8 +152,4 @@ def serve_branch(branch):
if
__name__
==
"
__main__
"
:
if
__name__
==
"
__main__
"
:
lcp_values_by_branch
=
asyncio
.
run
(
scrape_lcp_values
())
app
.
run
(
host
=
"
0.0.0.0
"
,
port
=
49002
)
with
open
(
"
lcp_values_by_branch_and_url.json
"
,
"
w
"
)
as
f
:
json
.
dump
(
lcp_values_by_branch
,
f
,
indent
=
4
)
app
.
run
(
debug
=
True
,
host
=
"
127.0.0.1
"
,
port
=
8000
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment