Source code for agent.browsing.manual.sources.pubmed

"""PubMed manual browsing using NCBI E-utilities (ESearch + ESummary).

No additional dependencies required. Network calls use ``requests`` and return
lightweight ``SearchItem`` objects with stable PubMed IDs.
"""

from typing import Iterator, List, Optional, override

import requests

from .base import ManualSource, SearchItem


EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"


[docs] class PubMedBrowser(ManualSource): """Manual source for PubMed articles using E-utilities JSON endpoints."""
[docs] @override def search( self, query: str, max_results: int = 25, start: int = 0, **kwargs: object ) -> List[SearchItem]: """Search PubMed and return a page of results. This uses ``esearch.fcgi`` to obtain a list of PMIDs, then ``esummary.fcgi`` to fetch basic metadata. :param query: Free-text query string. :param max_results: Maximum number of results to return. :param start: Zero-based start index for pagination. :returns: List of normalized search items with PMIDs. """ esearch_params = { "db": "pubmed", "retmode": "json", "retmax": str(max_results), "retstart": str(start), "term": query, } esearch_resp = requests.get( f"{EUTILS_BASE}/esearch.fcgi", params=esearch_params, timeout=20 ) esearch_resp.raise_for_status() esearch_json = esearch_resp.json() id_list = esearch_json.get("esearchresult", {}).get("idlist", []) if not id_list: return [] esummary_params = { "db": "pubmed", "retmode": "json", "id": ",".join(id_list), } esummary_resp = requests.get( f"{EUTILS_BASE}/esummary.fcgi", params=esummary_params, timeout=20 ) esummary_resp.raise_for_status() esummary_json = esummary_resp.json() result = esummary_json.get("result", {}) items: List[SearchItem] = [] for pmid in id_list: info = result.get(pmid, {}) title = str(info.get("title") or "") pubdate = str(info.get("pubdate") or "") url = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" snippet = pubdate if pubdate else None items.append( SearchItem( title=title, url=url, snippet=snippet, item_id=pmid, extra={"pubdate": pubdate}, ) ) return items
[docs] @override def iter_all( self, query: str, chunk_size: int = 100, limit: Optional[int] = None, **kwargs: object, ) -> Iterator[SearchItem]: """Iterate through PubMed results by fetching in chunks. :param query: Free-text query string. :param chunk_size: Number of results per request. :param limit: Optional maximum number of items to yield. :returns: Iterator over normalized search items. """ yielded = 0 start = 0 while True: page = self.search(query=query, max_results=chunk_size, start=start) if not page: return for item in page: if limit is not None and yielded >= limit: return yielded += 1 yield item start += len(page) if len(page) < chunk_size: return
[docs] @override def search_all( self, query: str, chunk_size: int = 100, limit: Optional[int] = None, **kwargs: object, ) -> List[SearchItem]: """Collect PubMed results for a query into a list. :param query: Free-text query string. :param chunk_size: Number of results per request. :param limit: Optional maximum number of items to collect. :returns: List of normalized search items. """ results: List[SearchItem] = [] for item in self.iter_all(query=query, chunk_size=chunk_size, limit=limit): results.append(item) return results