s = CDPSession(port=9222)
assert s._cdp is None and s.port == 9222
s2 = CDPSession(port=9223, profile_dir='/tmp/chrome')
assert s2.port == 9223 and s2.profile_dir == '/tmp/chrome'
assert '9222' in repr(s) and 'False' in repr(s)
assert isinstance(_default_profile_dir(), str)
assert _find_chrome('/usr/bin/chrome') == '/usr/bin/chrome'CDP — network sniffing and replay
The cdp module wraps Chrome’s DevTools Protocol behind a synchronous interface. automation_browser() is the single entry point — it launches Chrome with a persistent debug profile so cookies and sessions survive across runs. For SSO or enterprise sites, log in once in the browser window; every subsequent call picks up that session automatically.
Use cdp when scrapling’s stealth mode is not enough: the site checks for enterprise SSO, reads cookies from a previous session, or ties requests to a specific browser fingerprint.
CDP.start_recording
async def start_recording(
path:NoneType=None, fps:int=25, quality:int=80, sid:NoneType=None
):
Start recording the active page to an mp4 file via ffmpeg
CDP.stop_recording
async def stop_recording(
sid:NoneType=None
):
Stop recording and finalize the mp4 file
PostCapture
def PostCapture(
args:VAR_POSITIONAL, kwargs:VAR_KEYWORD
):
dict subclass that also provides access to keys as attrs, and has a pretty markdown repr
CDPSession
def CDPSession(
port:int=9222, profile_dir:NoneType=None
):
Sync-facing CDP session. Lazily connects to Chrome on first use.
CDPSession.launch
def launch(
wait:int=10, chrome_path:NoneType=None
)->CDPSession:
Launch Chrome with remote debugging on self.port
setup_chrome_daemon
def setup_chrome_daemon(
port:int=9222, chrome_path:NoneType=None, profile_dir:NoneType=None, uninstall:bool=False
)->bool:
Install (or uninstall) a system service that starts Chrome with remote debugging at login
CDPSession.sniff
def sniff(
pattern:str='.*', timeout:int=30, url:NoneType=None, record:bool=False, record_path:NoneType=None,
posts_only:bool=False, screenshot_path:NoneType=None
)->list:
Sniff HTTP requests from Chrome. Passive (user browses) if url=None, active if url given.
CDPSession.replay
def replay(
cap:PostCapture, body:dict=None, headers:dict=None
)->dict:
Replay a captured request with optional body/header overrides. Returns {url, status, html, data}.
CDPSession.to_paginate_args
def to_paginate_args(
capture:PostCapture
)->dict:
Convert a PostCapture into kwargs ready to unpack into paginate_api()
CDPSession.goto
def goto(
url:str, timeout:int=30
)->None:
Navigate the active page to url
CDPSession.source
def source(
)->dict:
Return current page as {‘html’: …, ‘url’: …} — pass directly to to_md()
cap = PostCapture(url='https://api.danmurphys.com.au/apis/ui/Search/products',
method='POST', request_headers={'content-type': 'application/json', 'x-api-key': 'abc'},
request_body={'pageNumber': 1, 'pageSize': 48, 'query': 'wine'},
response_body={'Items': [{'Name': 'Test Wine'}]}, timestamp=time.time())
s = CDPSession(port=9222)
args = s.to_paginate_args(cap)
assert args['url'] == cap.url and args['method'] == 'POST'
assert args['payload'] == cap.request_body and args['headers'] == cap.request_headers
cap2 = PostCapture(url='https://t.com', method='POST', request_headers={},
request_body=[{'id': 1}], response_body=None, timestamp=time.time())
assert cap2.request_body == [{'id': 1}]automation_browser
def automation_browser(
port:int=9222, profile_dir:NoneType=None
)->CDPSession:
CDPSession with a persistent debug profile — sessions and cookies survive across runs
assert setup_chrome_daemon(port=9222) is True
assert setup_chrome_daemon(port=9222, uninstall=True) is TrueIntegration Examples
The first example navigates to a site with a headless browser, captures the POST requests the page makes, then replays one to paginate through all results without keeping the browser open. The second shows the same workflow using scrapling’s fetch_all() for pages that do not need JavaScript.
# automation_browser starts Chrome with a persistent profile on port 9222
with automation_browser() as s:
caps = s.sniff(url='https://www.danmurphys.com.au/wine', pattern='*api.danmurphys.com.au/apis*', timeout=10,
posts_only=True, screenshot_path='dm_sniff.png')
print(f"Captured {len(caps)} POST(s):")
if caps:
all_items = paginate_api(
**s.to_paginate_args(caps[0]),
results_field='Items',
size_field='pageSize',
page_field='pageNumber',
max_pages=2
)
print(f"\nTotal products: {len(all_items)}")
print("Sample:", all_items[0]['Name'].strip() if all_items else 'none')Captured 0 POST(s):
# Approach A: discover sargas from the page dropdown, fetch all in parallel
BASE = 'https://www.valmiki.iitk.ac.in/sloka'
first = fetch(f'{BASE}?field_kanda_tid=1&language=dv&field_sarga_value=1')
sopts = get_options(first, '#edit-field-sarga-value')
print(f"Found {len(sopts)} sargas: {[o['value'] for o in sopts[:5]]}...")
urls = [f'{BASE}?field_kanda_tid=1&language=dv&field_sarga_value={o["value"]}' for o in sopts[:5]]
pages = fetch_all(urls, sel='.view-content')
shlokas = {p['url']: to_md(p) for p in pages if p['status'] == 200}
print(f"Fetched {len(shlokas)} sargas")
if shlokas: print(next(iter(shlokas.values()))[:200])[2026-06-03 08:12:49] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=1&language=dv&field_sarga_value=1> (referer: https://www.google.com/)
Found 77 sargas: ['1', '2', '3', '4', '5']...
[2026-06-03 08:12:49] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=1&language=dv&field_sarga_value=5> (referer: https://www.google.com/)
[2026-06-03 08:12:49] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=1&language=dv&field_sarga_value=3> (referer: https://www.google.com/)
[2026-06-03 08:12:49] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=1&language=dv&field_sarga_value=2> (referer: https://www.google.com/)
[2026-06-03 08:12:49] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=1&language=dv&field_sarga_value=4> (referer: https://www.google.com/)
[2026-06-03 08:12:49] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=1&language=dv&field_sarga_value=1> (referer: https://www.google.com/)
Fetched 5 sargas
[Saint Narada visits hermitage of Valmiki -- Valmiki queries about a single perfect individual bestowed with all good qualities enumerated by him -- Narada, knower of past, present and future, identif
Enterprise / SSO sites
automation_browser uses a persistent profile — log in once in the browser window and subsequent runs reuse those cookies automatically. Works with any site that ties auth to the browser session: Jira, Confluence, LinkedIn, internal tools.
# Log in once in the browser; subsequent runs reuse the session automatically.
# Read any SSO-protected page as markdown
with automation_browser() as s:
s.goto('https://www.linkedin.com/feed/')
print(to_md(s.source())[:2000])## 0 notifications
Skip to main contentSkip to sidebarSkip to primary contentSkip to aside
* Home
* My Network
* Jobs
* Messaging
* 4Notifications
* Me
* * *
* For Business
Reactivate Premium: 50% Off
Karthik Rajgopal Laxmi Naarayanan
Director of Machine learning at Bain
Greater Melbourne Area
Bain & Company
Boost your job search
Reactivate Premium: 50% Off
Profile viewers
138
Post impressions
39
Saved items
Groups
Newsletters
Events
Start a post
Video
Photo
Write article
* * *
Sort by: **Top**
New posts
## Feed post
Suggested
* * *
Jacob👋 Voytko
• 3rd+
Staff Backend Engineer
18h • Edited •
Follow
Aaron Levie, the CEO of Box, recently tweeted that CEOs are uniquely prone to AI psychosis, the type where they are delusional about what can be accomplished with an LLM, because CEOs do not perform the "last mile" of work. They never see the effort required to coax LLMs to perform useful work. They just see the happy path.
So I did my part and made a blog post and YouTube video, explaining how to cure yourself of AI psychosis.
Blog post: **https://lnkd.in/eVkkaUHb**
YouTube video: **https://lnkd.in/e7Pfti3Y**
It's simple, and comes down to asking yourself two questions:
1\. How easily can the LLM supplement its context?
2\. How quickly can the LLM verify its output?
You can use these questions to discover when tasks have a lot of hidden work involved.
How easily can the LLM supplement its context? For a programming problem, the LLM might be able to use a tool search and quickly look up documentation for third-party libraries. But if it needs to search through every document at a Fortune 500 organization for the answer, then it's not so simple and likely requires expert guidance.
Verification is also a sticking point. When a LLM can trivially verify a task, like a coding prompt that needs the task to compile and unit tests to pass, then the LLM can save quite a bit of time. But what if