get_options
def get_options(
page_or_html, # Page dict (from fetch) or raw HTML string
sel:str , # CSS selector for the <select> element
)-> list :
Extract options from a element; returns [{‘value’: …, ‘text’: …}]
_sel_html = '''<html><body>
<select id="kanda">
<option value="1">Balakanda</option>
<option value="2">Ayodhyakanda</option>
<option value="3">Aranyakanda</option>
</select>
</body></html>'''
opts = get_options(_sel_html, '#kanda')
assert opts == [{'value': '1', 'text': 'Balakanda'},
{'value': '2', 'text': 'Ayodhyakanda'},
{'value': '3', 'text': 'Aranyakanda'}]
# accepts Page dict
_page = {'url': 'x', 'status': 200, 'html': _sel_html, 'data': None, 'xhr': []}
assert get_options(_page, '#kanda') == opts
# no match → empty list
assert get_options(_sel_html, '#missing') == []
# fetch_all: parallel fetch, order preserved
_urls = ['https://httpbin.org/get', 'https://httpbin.org/ip']
_pages = fetch_all(_urls, verify=False)
assert len(_pages) == 2
assert _pages[0]['url'] == _urls[0]
assert _pages[1]['url'] == _urls[1]
assert all(p['status'] == 200 for p in _pages)
# live: Valmiki Ramayana — discover sargas 1–3 of Balakanda and Ayodhyakanda, fetch in parallel
_base = 'https://www.valmiki.iitk.ac.in/sloka'
_home = fetch(f'{_base}?field_kanda_tid=1&language=dv&field_sarga_value=1')
_kandas = [o for o in get_options(_home, '#edit-field-kanda-tid') if o['value']] # drop placeholder
assert len(_kandas) >= 6, f"Expected ≥6 kandas, got {len(_kandas)}: {_kandas}"
assert any('BALA' in k['text'].upper() for k in _kandas)
for _k in _kandas[:2]: # Balakanda, Ayodhyakanda
_kp = fetch(f'{_base}?field_kanda_tid={_k["value"]}&language=dv&field_sarga_value=1')
_sargas = [o for o in get_options(_kp, '#edit-field-sarga-value') if o['value']]
assert len(_sargas) > 0, f"No sargas found for {_k['text']}"
_urls = [f'{_base}?field_kanda_tid={_k["value"]}&language=dv&field_sarga_value={s["value"]}'
for s in _sargas[:3]]
_pages = fetch_all(_urls, sel='.view-content')
assert len(_pages) == 3
assert all(p['status'] == 200 for p in _pages)
assert all(len(to_md(p)) > 50 for p in _pages), "Expected non-trivial markdown content"
print(f"{_k['text']}: {len(_sargas)} sargas, first 3 fetched OK")
print(f" sarga 1 preview: {to_md(_pages[0])[:120]!r}")
[2026-06-03 08:26:24] INFO: Fetched (200) <GET https://httpbin.org/get> (referer: https://www.google.com/)
[2026-06-03 08:26:24] INFO: Fetched (200) <GET https://httpbin.org/ip> (referer: https://www.google.com/)
[2026-06-03 08:26:26] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=1&language=dv&field_sarga_value=1> (referer: https://www.google.com/)
[2026-06-03 08:26:26] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=1&language=dv&field_sarga_value=1> (referer: https://www.google.com/)
[2026-06-03 08:26:27] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=1&language=dv&field_sarga_value=1> (referer: https://www.google.com/)
[2026-06-03 08:26:28] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=1&language=dv&field_sarga_value=2> (referer: https://www.google.com/)
[2026-06-03 08:26:28] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=1&language=dv&field_sarga_value=3> (referer: https://www.google.com/)
BALAKANDA: 77 sargas, first 3 fetched OK
sarga 1 preview: '[Saint Narada visits hermitage of Valmiki -- Valmiki queries about a single perfect individual bestowed with all good qu'
[2026-06-03 08:26:29] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=2&language=dv&field_sarga_value=1> (referer: https://www.google.com/)
[2026-06-03 08:26:30] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=2&language=dv&field_sarga_value=1> (referer: https://www.google.com/)
[2026-06-03 08:26:30] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=2&language=dv&field_sarga_value=3> (referer: https://www.google.com/)
[2026-06-03 08:26:30] INFO: Fetched (200) <GET https://www.valmiki.iitk.ac.in/sloka?field_kanda_tid=2&language=dv&field_sarga_value=2> (referer: https://www.google.com/)
AYODHYAKANDA: 119 sargas, first 3 fetched OK
sarga 1 preview: "[Description of Rama's virtues Dasaratha contemplates to install Rama as heirapparent Invites kings and elders from town"
API Discovery & Pagination
find_xhr() visits a page with a real browser, captures all XHR and fetch calls it makes, and returns those matching a URL pattern. This surfaces the undocumented JSON endpoints that JavaScript-heavy sites use to load their data. paginate_api() replays one of those captured requests across pages until results are exhausted.
source
find_xhr
def find_xhr(
url:str, # URL to visit with browser
pattern:str='*', # Glob or regex pattern to filter captured XHR URLs
json_only:bool=True, # Return only JSON responses
kw:VAR_KEYWORD
)->list: # Extra kwargs passed to fetch (verify, network_idle, etc.)
Visit url with a headless browser, return [{url, content_type, data}] for each XHR/fetch call made
source
compile_pattern
def compile_pattern(
pattern
):
Compile pattern as regex; if invalid (e.g. bare glob like foo), convert via fnmatch first
assert compile_pattern('.*products.*').search('https://api.example.com/products?q=1')
assert compile_pattern('*products*').search('https://api.example.com/products?q=1')
assert not compile_pattern('*products*').search('https://api.example.com/search')
assert compile_pattern('.*[Ss]earch.*').search('https://api.example.com/Search')
source
paginate_api
def paginate_api(
url:str, # API endpoint URL
payload:dict=None, # Request body (POST) or params (GET)
page_field:str='pageNumber', # Payload key to increment for each page
size_field:str='pageSize', # Payload key for page size (detects last page)
results_field:str=None, # Response key with items list (auto-detect if None)
method:str='POST', # HTTP method
max_pages:int=10, page_size:int=24, # Page size to request (only used if not in payload)
page_start:int=1, # Starting page number (default 1)
save:bool=False, # If True, saves each page's items to disk
save_file:str='{url}_page_{page}.json', # Filepath pattern for saving (only used if save=True)
force:bool=False, # If True, forces re-fetching even if saved file exists
kw:VAR_KEYWORD
)->list: # Extra kwargs passed to fetch() (verify, headers, etc.)
Paginate through a JSON API, collecting all results. Auto-detects the items list in response.
from fastcore.test import test_eq
test_eq(clean_md('before <!-- a comment --> after'), 'before after')
# surrounding newlines are consumed along with the block
test_eq(clean_md('a\n<details>hidden</details>\nb'), 'ab')
test_eq(clean_md('a\n\n<details>hidden</details>\n\nb'), 'a\n\nb')
test_eq(clean_md('no change', rm_comments=False, rm_details=False), 'no change')
md = html2md('<h1>Hello</h1><p>World</p>')
assert '# Hello' in md and 'World' in md
# to_md tests
html_ = '<html><body><h1>Hello</h1><p>World</p><p class="x">Keep</p></body></html>'
_md = to_md(html_)
assert 'Hello' in _md
assert 'World' in _md
# accepts Page dict — extracts html field
_page = {'url': 'https://example.com', 'status': 200, 'html': html_, 'data': None, 'xhr': []}
assert to_md(_page) == _md
# sel extracts first matching element only
_md_h1 = to_md(html_, sel='h1')
assert 'Hello' in _md_h1
assert 'World' not in _md_h1
# multi=True returns all matches joined
_md_ps = to_md(html_, sel='p', multi=True)
assert 'World' in _md_ps
assert 'Keep' in _md_ps
# sel with no match returns empty string
_md_none = to_md(html_, sel='div.missing')
assert _md_none == '' or len(_md_none) < 5 # html2md of empty string may produce minimal whitespace
# wrap_tag wraps each multi-result
_md_wrapped = to_md(html_, sel='p', multi=True, wrap_tag='item')
assert '<item>' in _md_wrapped
assert '</item>' in _md_wrapped
_pg = fetch('https://httpbin.org/get', verify=False)
assert isinstance(_pg, dict), f"Expected dict, got {type(_pg)}"
assert set(_pg.keys()) == {'url', 'status', 'html', 'data', 'xhr'}, f"Keys mismatch: {_pg.keys()}"
assert _pg['status'] == 200, f"Expected 200, got {_pg['status']}"
assert _pg['xhr'] == [], "xhr should be empty without capture_xhr"
assert len(_pg['html']) > 0, "html should be non-empty"
# httpbin.org/get returns JSON — data should be parsed dict
assert _pg['data'] is not None, "data should be parsed JSON for a JSON response"
assert _pg['data']['url'] == 'https://httpbin.org/get'
# to_md integration — fetch + convert
_text = to_md(_pg)
assert isinstance(_text, str) and len(_text) > 0
[2026-06-03 08:26:53] INFO: Fetched (200) <GET https://httpbin.org/get> (referer: https://www.google.com/)
_pages = crawl('https://httpbin.org', max_pages=2, verify=False)
assert isinstance(_pages, list), f"Expected list, got {type(_pages)}"
assert len(_pages) > 0, "Expected at least one page"
assert all(isinstance(p, dict) for p in _pages)
assert all(set(p.keys()) == {'url', 'status', 'html', 'data', 'xhr'} for p in _pages), \
f"Unexpected keys: {[set(p.keys()) for p in _pages]}"
assert all(p['status'] == 200 for p in _pages), "Non-200 pages should be skipped"
assert all(len(p['html']) > 0 for p in _pages), "html should be non-empty"
assert len({p['url'] for p in _pages}) == len(_pages), "url values should be unique"
[2026-06-03 08:27:03] INFO: Fetched (200) <GET https://httpbin.org/> (referer: https://www.google.com/)
[2026-06-03 08:27:03] INFO: Fetched (200) <GET https://httpbin.org/forms/post> (referer: https://www.google.com/)
# Step 1: visit the listing page with a headless browser, capture all XHR/fetch calls
apis = find_xhr('https://www.danmurphys.com.au/list/wine-all', verify=False)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://www.danmurphys.com.au/list/wine-all> (referer: https://www.google.com/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://aem.danmurphys.com.au/list/wine-all.model.json> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://aem.danmurphys.com.au/content/experience-fragments/dm/au/en/site/gsa/global-alert/master.model.json> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://aem.danmurphys.com.au/content/experience-fragments/dm/au/en/site/header/meganav-updated-2026/meganav/master2.model.json> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (404) <GET https://aem.danmurphys.com.au/content/experience-fragments/dm/au/en/site/hello_bar/hello-bar/master.model.json> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://www.danmurphys.com.au/dd-web-assets/address/allcitiesaddresses.json> (referer: https://www.danmurphys.com.au/list/wine-all)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://api.danmurphys.com.au/apis/ui/Trolley?summary=true&IncludeOrderHeader=true> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://api.danmurphys.com.au/apis/ui/Fulfilment/Preferences?IsCheckoutV2=true> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://aem.danmurphys.com.au/content/experience-fragments/dm/au/en/site/footer/footer/master.model.json> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://auth.danmurphys.com.au/33bf8038-5ecb-400c-8262-2d0725361522/b2c_1a_dans_signup_signin/v2.0/.well-known/openid-configuration> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://www.danmurphys.com.au/cdn-cgi/challenge-platform/h/g/jsd/oneshot/8fc8ed1d8752/0.5003493194173835:1780437729:2FTe7KQGzLk8msSckVvw4UFmw-wRsRAMr16ZnQ5QJAA/a059e2701ec2e69c> (referer: None)
[2026-06-03 08:27:22] ERROR: Error getting page content: Response.body: Response body is unavailable for redirect responses
[2026-06-03 08:27:22] INFO: Fetched (302) <GET https://dpm.demdex.net/id?d_visid_ver=5.5.0&d_fieldgroup=MC&d_rtbd=json&d_ver=2&d_verify=1&d_orgid=1124C2D754E497DC0A4C98C6%40AdobeOrg&d_nsid=0&ts=1780439238952> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://api.danmurphys.com.au/apis/ui/ProductGroup/Products/wine%20all> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://dpm.demdex.net/id/rd?d_visid_ver=5.5.0&d_fieldgroup=MC&d_rtbd=json&d_ver=2&d_verify=1&d_orgid=1124C2D754E497DC0A4C98C6%40AdobeOrg&d_nsid=0&ts=1780439238952> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://www.danmurphys.com.au/assets/icons/v2/icons-sprite.svg> (referer: https://www.danmurphys.com.au/list/wine-all)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://api.edg.com.au/murphybot/chatserver/api/Settings/IsFeatureEnabled/SddPrompt> (referer: https://murphystorage.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://api.edg.com.au/murphybot/chatserver/api/Settings/GetWidgetConfiguration/murphy> (referer: https://murphystorage.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://api.edg.com.au/murphybot/chatserver/api/Settings/IsFeatureEnabled/EnableWebSocketConnection> (referer: https://murphystorage.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://nebula-cdn.kampyle.com/au/wau/147215/onsite/onsiteData1780295168824.json> (referer: https://murphystorage.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://adobeanalytics.danmurphys.com.au/id?d_visid_ver=5.5.0&d_fieldgroup=A&mcorgid=1124C2D754E497DC0A4C98C6%40AdobeOrg&mid=67286515494590275130725496634452323425&ts=1780439239244> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://target.danmurphys.com.au/rest/v1/delivery?client=wwservices&sessionId=e7042422f7814b3fb239d24471eb7be9&version=2.11.7> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://analytics-fe.digital-cloud-syd1.medallia.com.au/api/web/events> (referer: https://murphystorage.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://api2.branch.io/v1/open> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://api2.branch.io/v1/pageview> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://bf57098vsi.bf.dynatrace.com/bf?type=js3&sn=v_4_srv_3_sn_8C1A5CCF106F46BE4FFFCB293F09B88E_perc_100000_ol_0_mul_1_app-3Ad2db12a9193b7932_1_rcs-3Acss_0&svrid=3&flavor=cors&vi=CUEASFDHGHRPMAHBTBOCLFBBRSAQFSOH-0&modifiedSince=1780361666240&bp=3&app=d2db12a9193b7932&crc=87587069&en=qzbd7895&end=1> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://use.typekit.net/afy4xey.css> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://p.typekit.net/p.css?s=1&k=afy4xey&ht=tk&f=32224.32227.32228.32231.32232.32235.32237.10875.32265&a=49605153&app=typekit&e=css> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,100..1000;1,9..40,100..1000&display=swap> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,100..1000;1,9..40,100..1000&display=swap> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://nebula-cdn.kampyle.com/au/wau/147215/onsite/onsiteData1780295168824.json> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://analytics-fe.digital-cloud-syd1.medallia.com.au/api/web/events> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://bam.nr-data.net/1/b75a833ed4?a=1109840273&v=1.315.0&to=ZAdUNkRZX0ZVWkxYC11NdwFCUV5bG359ZTtQDVkJX11CallYVlADVhA%3D&rst=308&ck=0&s=aeacdca596c9e612&ref=https://widgets.merchants.danmurphys.com.au/cookies/manager&ptid=47975b5d0a4528da&ap=21&be=106&fe=104&dc=103&at=SEBXQAxDTEg%3D&fsh=1&perf=%7B%22timing%22:%7B%22of%22:1780439240288,%22n%22:0,%22f%22:45,%22dn%22:45,%22dne%22:45,%22c%22:45,%22s%22:45,%22ce%22:45,%22rq%22:45,%22rp%22:106,%22rpe%22:110,%22di%22:114,%22ds%22:208,%22de%22:208,%22dc%22:209,%22l%22:209,%22le%22:209%7D,%22navigation%22:%7B%7D%7D> (referer: https://widgets.merchants.danmurphys.com.au/)
[2026-06-03 08:27:22] ERROR: Error getting page content: Response.body: Protocol error (Network.getResponseBody): No data found for resource with given identifier
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://www.google.com/ccm/collect?rcb=1&frm=0&auid=618148141.1780439241&dt=Buy%20Best%20Wine%20Online%20(Top%20Wine%20Brands%20%40Lowest%20Prices)%20Australia%20%7C%20Dan%20Murphy%E2%80%99s&en=page_view&dr=www.google.com&dl=https%3A%2F%2Fwww.danmurphys.com.au%2Flist%2Fwine-all&scrsrc=www.googletagmanager.com&lps=1&rnd=98062875.1780439241&navt=n&npa=0>m=45be6611v871764131za200zd871764131xec&gcd=13l3l3l3l1l1&dma=0&tag_exp=0~115616985~115938466~115938468~117776793~119027222&apve=1&apvf=f&apvc=1&tids=AW-949833488&tid=AW-949833488&tft=1780439240777&tfd=3848> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] ERROR: Error getting page content: Response.body: Protocol error (Network.getResponseBody): No data found for resource with given identifier
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://www.google.com/ccm/collect?rcb=0&frm=0&auid=618148141.1780439241&dt=Buy%20Best%20Wine%20Online%20(Top%20Wine%20Brands%20%40Lowest%20Prices)%20Australia%20%7C%20Dan%20Murphy%E2%80%99s&en=page_view&dr=www.google.com&dl=https%3A%2F%2Fwww.danmurphys.com.au%2Flist%2Fwine-all&scrsrc=www.googletagmanager.com&lps=1&rnd=98062875.1780439241&navt=n&npa=0>m=45fe6611v9190909461za200zd9190909461xec&gcd=13l3l3l3l1l1&dma=0&tag_exp=0~115616985~115938466~115938469~118228215~119027222&apve=1&apvf=f&apvc=0&tids=DC-10298563&tid=DC-10298563&tft=1780439240807&tfd=3877> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] ERROR: Error getting page content: Response.body: Protocol error (Network.getResponseBody): No data found for resource with given identifier
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://www.google.com/rmkt/collect/949833488/?random=1780439240748&cv=11&fst=1780439240748&fmt=8&bg=ffffff&guid=ON&async=1&en=gtag.config>m=45be6611v871764131za200zd871764131xec&gcd=13l3l3l3l1l1&dma=0&tag_exp=0~115616985~115938466~115938468~117776793~119027222&u_w=1280&u_h=720&url=https%3A%2F%2Fwww.danmurphys.com.au%2Flist%2Fwine-all&ref=https%3A%2F%2Fwww.google.com%2F&rcb=1&frm=0&tiba=Buy%20Best%20Wine%20Online%20(Top%20Wine%20Brands%20%40Lowest%20Prices)%20Australia%20%7C%20Dan%20Murphy%E2%80%99s&hn=www.googleadservices.com&npa=0&pscdl=noapi&auid=618148141.1780439241&data=event%3Dgtag.config&gcp=5> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] ERROR: Error getting page content: Response.body: Protocol error (Network.getResponseBody): No data found for resource with given identifier
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://www.googleadservices.com/pagead/set_partitioned_cookie?rcb=1&frm=0&apvc=1&auid=618148141.1780439241&dt=Buy%20Best%20Wine%20Online%20(Top%20Wine%20Brands%20%40Lowest%20Prices)%20Australia%20%7C%20Dan%20Murphy%E2%80%99s&tid=AW-949833488&en=page_view&ref=www.google.com&url=https%3A%2F%2Fwww.danmurphys.com.au%2Flist%2Fwine-all&scrsrc=www.googletagmanager.com&lps=1&rnd=98062875.1780439241&navt=n&npa=0>m=45be6611v871764131za200zd871764131xec&gcd=13l3l3l3l1l1&dma=0&tag_exp=0~115616985~115938466~115938468~117776793~119027222&tft=1780439240754&tfd=3824&apve=1&apvf=f> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] ERROR: Error getting page content: Response.body: Protocol error (Network.getResponseBody): No data found for resource with given identifier
[2026-06-03 08:27:22] INFO: Fetched (400) <GET https://ad.doubleclick.net/ccm/s/collect?auid=618148141.1780439241>m=45be6611v871764131za200zd871764131xec> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (202) <GET https://bam.nr-data.net/events/1/b75a833ed4?a=1109840273&v=1.315.0&to=ZAdUNkRZX0ZVWkxYC11NdwFCUV5bG359ZTtQDVkJX11CallYVlADVhA%3D&rst=603&ck=0&s=aeacdca596c9e612&ref=https://widgets.merchants.danmurphys.com.au/cookies/manager&ptid=47975b5d0a4528da> (referer: https://widgets.merchants.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://bf57098vsi.bf.dynatrace.com/bf?type=js3&sn=v_4_srv_3_sn_8C1A5CCF106F46BE4FFFCB293F09B88E_app-3Ad2db12a9193b7932_1_ol_0_perc_100000_mul_1_rcs-3Acss_0&svrid=3&flavor=cors&vi=CUEASFDHGHRPMAHBTBOCLFBBRSAQFSOH-0&contentType=srBm&modifiedSince=1780361666240&bp=3&app=d2db12a9193b7932&v=10337260504112724&crc=3279443255&en=qzbd7895&end=1> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://bf57098vsi.bf.dynatrace.com/bf?type=js3&sn=v_4_srv_3_sn_8C1A5CCF106F46BE4FFFCB293F09B88E_app-3Ad2db12a9193b7932_1_ol_0_perc_100000_mul_1_rcs-3Acss_0&svrid=3&flavor=cors&vi=CUEASFDHGHRPMAHBTBOCLFBBRSAQFSOH-0&contentType=srBm&modifiedSince=1780361666240&bp=3&app=d2db12a9193b7932&v=10337260504112724&crc=3265762191&en=qzbd7895&end=1> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://bf57098vsi.bf.dynatrace.com/bf?type=js3&sn=v_4_srv_3_sn_8C1A5CCF106F46BE4FFFCB293F09B88E_app-3Ad2db12a9193b7932_1_ol_0_perc_100000_mul_1_rcs-3Acss_0&svrid=3&flavor=cors&vi=CUEASFDHGHRPMAHBTBOCLFBBRSAQFSOH-0&contentType=srBm&modifiedSince=1780361666240&bp=3&app=d2db12a9193b7932&v=10337260504112724&crc=268463667&en=qzbd7895&end=1> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://bf57098vsi.bf.dynatrace.com/bf?type=js3&sn=v_4_srv_3_sn_8C1A5CCF106F46BE4FFFCB293F09B88E_app-3Ad2db12a9193b7932_1_ol_0_perc_100000_mul_1_rcs-3Acss_0&svrid=3&flavor=cors&vi=CUEASFDHGHRPMAHBTBOCLFBBRSAQFSOH-0&contentType=srBm&modifiedSince=1780361666240&bp=3&app=d2db12a9193b7932&v=10337260504112724&crc=4010833027&en=qzbd7895&end=1> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://bf57098vsi.bf.dynatrace.com/bf?type=js3&sn=v_4_srv_3_sn_8C1A5CCF106F46BE4FFFCB293F09B88E_app-3Ad2db12a9193b7932_1_ol_0_perc_100000_mul_1_rcs-3Acss_0&svrid=3&flavor=cors&vi=CUEASFDHGHRPMAHBTBOCLFBBRSAQFSOH-0&contentType=srBm&modifiedSince=1780361666240&bp=3&app=d2db12a9193b7932&v=10337260504112724&crc=2424940912&en=qzbd7895&end=1> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://bf57098vsi.bf.dynatrace.com/bf?type=js3&sn=v_4_srv_3_sn_8C1A5CCF106F46BE4FFFCB293F09B88E_app-3Ad2db12a9193b7932_1_ol_0_perc_100000_mul_1_rcs-3Acss_0&svrid=3&flavor=cors&vi=CUEASFDHGHRPMAHBTBOCLFBBRSAQFSOH-0&contentType=srBm&modifiedSince=1780361666240&bp=3&app=d2db12a9193b7932&v=10337260504112724&crc=3099318030&en=qzbd7895&end=1> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://bf57098vsi.bf.dynatrace.com/bf?type=js3&sn=v_4_srv_3_sn_8C1A5CCF106F46BE4FFFCB293F09B88E_app-3Ad2db12a9193b7932_1_ol_0_perc_100000_mul_1_rcs-3Acss_0&svrid=3&flavor=cors&vi=CUEASFDHGHRPMAHBTBOCLFBBRSAQFSOH-0&contentType=srBm&modifiedSince=1780361666240&bp=3&app=d2db12a9193b7932&v=10337260504112724&crc=2534073245&en=qzbd7895&end=1> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://bf57098vsi.bf.dynatrace.com/bf?type=js3&sn=v_4_srv_3_sn_8C1A5CCF106F46BE4FFFCB293F09B88E_app-3Ad2db12a9193b7932_1_ol_0_perc_100000_mul_1_rcs-3Acss_0&svrid=3&flavor=cors&vi=CUEASFDHGHRPMAHBTBOCLFBBRSAQFSOH-0&contentType=srTe&modifiedSince=1780361666240&bp=3&app=d2db12a9193b7932&v=10337260504112724&crc=606904560&en=qzbd7895&end=1> (referer: https://www.danmurphys.com.au/)
[2026-06-03 08:27:22] INFO: Fetched (200) <GET https://bf57098vsi.bf.dynatrace.com/bf?type=js3&sn=v_4_srv_3_sn_8C1A5CCF106F46BE4FFFCB293F09B88E_app-3Ad2db12a9193b7932_1_ol_0_perc_100000_mul_1_rcs-3Acss_0&svrid=3&flavor=cors&vi=CUEASFDHGHRPMAHBTBOCLFBBRSAQFSOH-0&modifiedSince=1780361666240&bp=3&app=d2db12a9193b7932&crc=4096453473&en=qzbd7895&end=1> (referer: https://www.danmurphys.com.au/)
# Dan Murphy's wines — full workflow: discover API → paginate → save JSON
#
# Dan Murphy's is a SPA: the product listing page loads products via a hidden JSON API.
# Step 1 visits with a real browser to intercept those calls; step 2 replays the API
# directly (no browser needed) to collect all 120 wines with full pricing data.
# find the product API — large JSON response containing 'Items' list
wine_api = next(
a for a in apis
if 'api.danmurphys.com.au/apis/ui/ProductGroup/Products/wine%20all' in a['url']
and isinstance(a.get('data'), dict)
and 'Items' in a['data']
)
print(f"API endpoint : {wine_api['url']}")
print(f"Response keys: {list(wine_api['data'].keys())}")
print(f"Total available: {wine_api['data'].get('TotalRecordCount', '?')} products")
_items = wine_api['data']['Items']
_sample = next(iter(_items.values() if isinstance(_items, dict) else _items))
print(f"Fields per item: {list(_sample.keys())}")
# Step 2: paginate the API directly — 5 pages × 24 = 120 wines, no browser required
wines = paginate_api(
wine_api['url'],
payload={
'pageSize': 5, 'pageNumber': 1,
'sortType': 'Relevance', 'Location': 'ProductGroup',
'Filters': [], 'ShowOnlyAvailable': False,
},
page_field='pageNumber',
size_field='pageSize',
results_field='Items',
max_pages=1,
save=True,
save_file='test_page_{page}.json',
verify=False,
)
print(f"\nCollected {len(wines)} wines")
# Step 3: save full product data — includes Price, PromoPrice, Name, Brand, Rating, etc.
Path('wines.json').write_text(json.dumps(wines, indent=2))
print(f"Saved to wines.json ({Path('wines.json').stat().st_size // 1024} KB)")
# preview first result
wines[0]
API endpoint : https://api.danmurphys.com.au/apis/ui/ProductGroup/Products/wine%20all
Response keys: ['Aggregations', 'Banners', 'Cards', 'DisplayName', 'SearchSource', 'Items', 'TotalRecordCount']
Total available: 7837 products
Fields per item: ['Name', 'PackDefaultStockCode', 'PackParentStockCode', 'Products', 'PackMessage', 'IsInDefaultList', 'IsPersonalised']
Page 1 already saved, skipping fetch
Collected 7 wines
Saved to wines.json (0 KB)
'Aggregations'
wines=paginate_api(wine_api['url'], page_field='pageNumber', size_field='pageSize', results_field='Items',
payload={'pageSize': 48, 'pageNumber': 1, 'sortType': 'Relevance', 'Location': 'ProductGroup',
'Filters': [], 'ShowOnlyAvailable': False}, max_pages=100, save=True,
save_file='downloads/danmurphys_wines_page_{page}.json', verify=False)
Path('wines.json').write_text(json.dumps(wines, indent=2))
print(f"Saved to wines.json ({Path('wines.json').stat().st_size // 1024} KB)")
Page 1 already saved, skipping fetch
Page 2 already saved, skipping fetch
Page 3 already saved, skipping fetch
Page 4 already saved, skipping fetch
Page 5 already saved, skipping fetch
Page 6 already saved, skipping fetch
Page 7 already saved, skipping fetch
Page 8 already saved, skipping fetch
Page 9 already saved, skipping fetch
Page 10 already saved, skipping fetch
Page 11 already saved, skipping fetch
Page 12 already saved, skipping fetch
Page 13 already saved, skipping fetch
Page 14 already saved, skipping fetch
Page 15 already saved, skipping fetch
Page 16 already saved, skipping fetch
Page 17 already saved, skipping fetch
Page 18 already saved, skipping fetch
Page 19 already saved, skipping fetch
Page 20 already saved, skipping fetch
Page 21 already saved, skipping fetch
Page 22 already saved, skipping fetch
Page 23 already saved, skipping fetch
Page 24 already saved, skipping fetch
Page 25 already saved, skipping fetch
Page 26 already saved, skipping fetch
Page 27 already saved, skipping fetch
Page 28 already saved, skipping fetch
Page 29 already saved, skipping fetch
Page 30 already saved, skipping fetch
Page 31 already saved, skipping fetch
Page 32 already saved, skipping fetch
Page 33 already saved, skipping fetch
Page 34 already saved, skipping fetch
Page 35 already saved, skipping fetch
Page 36 already saved, skipping fetch
Page 37 already saved, skipping fetch
Page 38 already saved, skipping fetch
Page 39 already saved, skipping fetch
Page 40 already saved, skipping fetch
Page 41 already saved, skipping fetch
Page 42 already saved, skipping fetch
Page 43 already saved, skipping fetch
Page 44 already saved, skipping fetch
Page 45 already saved, skipping fetch
Page 46 already saved, skipping fetch
Page 47 already saved, skipping fetch
Page 48 already saved, skipping fetch
Page 49 already saved, skipping fetch
Page 50 already saved, skipping fetch
Page 51 already saved, skipping fetch
Page 52 already saved, skipping fetch
Page 53 already saved, skipping fetch
Page 54 already saved, skipping fetch
Page 55 already saved, skipping fetch
Page 56 already saved, skipping fetch
Page 57 already saved, skipping fetch
Page 58 already saved, skipping fetch
Page 59 already saved, skipping fetch
Page 60 already saved, skipping fetch
Page 61 already saved, skipping fetch
Page 62 already saved, skipping fetch
Page 63 already saved, skipping fetch
Page 64 already saved, skipping fetch
Page 65 already saved, skipping fetch
Page 66 already saved, skipping fetch
Page 67 already saved, skipping fetch
Page 68 already saved, skipping fetch
Page 69 already saved, skipping fetch
Page 70 already saved, skipping fetch
Page 71 already saved, skipping fetch
Page 72 already saved, skipping fetch
Page 73 already saved, skipping fetch
Page 74 already saved, skipping fetch
Page 75 already saved, skipping fetch
Page 76 already saved, skipping fetch
Page 77 already saved, skipping fetch
Page 78 already saved, skipping fetch
Page 79 already saved, skipping fetch
Page 80 already saved, skipping fetch
Page 81 already saved, skipping fetch
Page 82 already saved, skipping fetch
Page 83 already saved, skipping fetch
Page 84 already saved, skipping fetch
Page 85 already saved, skipping fetch
Page 86 already saved, skipping fetch
Page 87 already saved, skipping fetch
Page 88 already saved, skipping fetch
Page 89 already saved, skipping fetch
Page 90 already saved, skipping fetch
Page 91 already saved, skipping fetch
Page 92 already saved, skipping fetch
Page 93 already saved, skipping fetch
Page 94 already saved, skipping fetch
Page 95 already saved, skipping fetch
Page 96 already saved, skipping fetch
Page 97 already saved, skipping fetch
Page 98 already saved, skipping fetch
Page 99 already saved, skipping fetch
Page 100 already saved, skipping fetch
Saved to wines.json (10 KB)
L(apis).filter(lambda a: 'api.danmurphys.com.au/apis/ui/ProductGroup/Products/wine%20all' in a['url'])[0]['url']
'https://api.danmurphys.com.au/apis/ui/ProductGroup/Products/wine%20all'
# paginate_api: test with JSONPlaceholder (free public REST API, GET-based)
posts = paginate_api(
'https://jsonplaceholder.typicode.com/posts',
payload={'_page': 1, '_limit': 5},
page_field='_page',
size_field='_limit',
method='GET',
verify=False,
)
assert len(posts) >= 5
assert 'title' in posts[0]
[2026-06-03 08:27:32] INFO: Fetched (200) <GET https://jsonplaceholder.typicode.com/posts?_page=1&_limit=5> (referer: https://www.google.com/)
[2026-06-03 08:27:33] INFO: Fetched (200) <GET https://jsonplaceholder.typicode.com/posts?_page=2&_limit=5> (referer: https://www.google.com/)
[2026-06-03 08:27:33] INFO: Fetched (200) <GET https://jsonplaceholder.typicode.com/posts?_page=3&_limit=5> (referer: https://www.google.com/)
[2026-06-03 08:27:34] INFO: Fetched (200) <GET https://jsonplaceholder.typicode.com/posts?_page=4&_limit=5> (referer: https://www.google.com/)
[2026-06-03 08:27:35] INFO: Fetched (200) <GET https://jsonplaceholder.typicode.com/posts?_page=5&_limit=5> (referer: https://www.google.com/)
[2026-06-03 08:27:36] INFO: Fetched (200) <GET https://jsonplaceholder.typicode.com/posts?_page=6&_limit=5> (referer: https://www.google.com/)
[2026-06-03 08:27:36] INFO: Fetched (200) <GET https://jsonplaceholder.typicode.com/posts?_page=7&_limit=5> (referer: https://www.google.com/)
[2026-06-03 08:27:37] INFO: Fetched (200) <GET https://jsonplaceholder.typicode.com/posts?_page=8&_limit=5> (referer: https://www.google.com/)
[2026-06-03 08:27:38] INFO: Fetched (200) <GET https://jsonplaceholder.typicode.com/posts?_page=9&_limit=5> (referer: https://www.google.com/)
[2026-06-03 08:27:38] INFO: Fetched (200) <GET https://jsonplaceholder.typicode.com/posts?_page=10&_limit=5> (referer: https://www.google.com/)
YouTube
search_yt() runs a YouTube search and returns metadata for each video. read_yt() fetches the auto-generated English captions as plain text, disk-cached by video ID. download_yt() saves audio or video to disk.
source
search_yt
def search_yt(
q:str, n:int=10
)->L:
Search YouTube; returns L of dicts: id, title, url, duration, view_count, channel, description, thumbnail
results = search_yt('3blue1brown neural networks', n=3)
assert isinstance(results, L), f"expected L, got {type(results)}"
assert len(results) >= 1, f"expected results, got {len(results)}"
assert any(kw in results[0]['title'].lower() for kw in ('3blue1brown', 'neural', 'network')), \
f"unexpected title: {results[0]['title']}"
assert results[0]['url'].startswith('https://www.youtube.com'), f"bad url: {results[0]['url']}"
print(results[0]['title'], '|', results[0]['url'])
But what is a neural network? | Deep learning chapter 1 | https://www.youtube.com/watch?v=aircAruvnKk
source
read_yt
def read_yt(
url:str, force:bool=False
)->dict:
Fetch YouTube metadata + English transcript (auto-captions); result disk-cached by video ID
meta = read_yt('https://www.youtube.com/watch?v=aircAruvnKk')
assert meta['title'], "title should be non-empty"
assert isinstance(meta['source'], str), "source should be a string"
assert len(meta['source']) > 100, f"transcript too short: {len(meta['source'])} chars"
assert '3blue1brown' in meta['channel'].lower(), f"unexpected channel: {meta['channel']}"
print(f"title: {meta['title']}")
print(f"transcript preview: {meta['source'][:200]}")
title: But what is a neural network? | Deep learning chapter 1
transcript preview: [Music] This is a three. It's sloppily written and rendered at an extremely low resolution of 28x 28 pixels. But your brain has no trouble recognizing it as a three. And I want you to take a moment to
source
download_yt
def download_yt(
url:str, format:str='audio', save_dir:str='.', quality:str=None
)->Path:
Download YouTube media; format=‘audio’|‘video’|yt-dlp format string. Returns Path to saved file.
p = download_yt('https://www.youtube.com/watch?v=aircAruvnKk', format='audio', save_dir='/tmp/fossick_test')
assert p.exists(), f"file not found: {p}"
assert p.suffix == '.mp3', f"expected .mp3, got {p.suffix}"
print(f"saved to: {p} ({p.stat().st_size // 1024} KB)")
WARNING: [youtube] [jsc] Remote components challenge solver script (deno) and NPM package (deno) were skipped. These may be required to solve JS challenges. You can enable these downloads with --remote-components ejs:github (recommended) or --remote-components ejs:npm , respectively. For more information and alternatives, refer to https://github.com/yt-dlp/yt-dlp/wiki/EJS
WARNING: [youtube] aircAruvnKk: n challenge solving failed: Some formats may be missing. Ensure you have a supported JavaScript runtime and challenge solver script distribution installed. Review any warnings presented before this message. For more details, refer to https://github.com/yt-dlp/yt-dlp/wiki/EJS
saved to: /tmp/fossick_test/But what is a neural network? | Deep learning chapter 1.mp3 (26250 KB)
Install
source
mv_skill_md
def mv_skill_md(
dry_run:bool=True, dir:NoneType=None
)->None:
Copy bundled SKILL.md to skill directories.
source
repo_root
def repo_root(
)->Path:
Find the root of the current git repository, or None if not in a repo.
root = repo_root()
assert root is not None and (root/'.git').exists(), f"Expected git root, got {root}"
mv_skill_md(dry_run=True)
fossick fossick CDP — network sniffing and replay fossick fossick core CDP — network sniffing and replay search cli core
core – fossick core – fossick core – fossick fossick web search made easy web search made easy simple web search