mirror of
https://github.com/penpot/penpot-mcp.git
synced 2026-04-25 11:18:37 +00:00
Obtain structured information on each type, storing it in a yaml file
This commit is contained in:
parent
068817709e
commit
536250410c
37
prepare-api-docs/pixi.lock
generated
37
prepare-api-docs/pixi.lock
generated
@ -31,6 +31,8 @@ environments:
|
||||
- conda: https://conda.anaconda.org/conda-forge/win-64/python-3.11.13-h3f84c4b_0_cpython.conda
|
||||
- conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.11-8_cp311.conda
|
||||
- conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.32.5-pyhd8ed1ab_0.conda
|
||||
- conda: https://conda.anaconda.org/conda-forge/win-64/ruamel.yaml-0.18.15-py311h3485c13_1.conda
|
||||
- conda: https://conda.anaconda.org/conda-forge/win-64/ruamel.yaml.clib-0.2.12-py311h3485c13_1.conda
|
||||
- conda: https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhe01879c_1.conda
|
||||
- conda: https://conda.anaconda.org/conda-forge/noarch/soupsieve-2.8-pyhd8ed1ab_0.conda
|
||||
- conda: https://conda.anaconda.org/conda-forge/win-64/tk-8.6.13-h2c6b04d_2.conda
|
||||
@ -357,6 +359,41 @@ packages:
|
||||
- pkg:pypi/requests?source=hash-mapping
|
||||
size: 59263
|
||||
timestamp: 1755614348400
|
||||
- conda: https://conda.anaconda.org/conda-forge/win-64/ruamel.yaml-0.18.15-py311h3485c13_1.conda
|
||||
sha256: 4a222db2ec50db5a11ace74090045170a611b24f82d80535e8d98bf478c32cc2
|
||||
md5: be5d5993e755c3edc3ef860da01c67b4
|
||||
depends:
|
||||
- python >=3.11,<3.12.0a0
|
||||
- python_abi 3.11.* *_cp311
|
||||
- ruamel.yaml.clib >=0.1.2
|
||||
- ucrt >=10.0.20348.0
|
||||
- vc >=14.3,<15
|
||||
- vc14_runtime >=14.44.35208
|
||||
arch: x86_64
|
||||
platform: win
|
||||
license: MIT
|
||||
license_family: MIT
|
||||
purls:
|
||||
- pkg:pypi/ruamel-yaml?source=hash-mapping
|
||||
size: 274899
|
||||
timestamp: 1756839144620
|
||||
- conda: https://conda.anaconda.org/conda-forge/win-64/ruamel.yaml.clib-0.2.12-py311h3485c13_1.conda
|
||||
sha256: ad383a91985153438817e6b241c9f151692e01ef257279f83dec55f8d024e213
|
||||
md5: 713991ee78f7fbbcecfe03c7226dec24
|
||||
depends:
|
||||
- python >=3.11,<3.12.0a0
|
||||
- python_abi 3.11.* *_cp311
|
||||
- ucrt >=10.0.20348.0
|
||||
- vc >=14.3,<15
|
||||
- vc14_runtime >=14.44.35208
|
||||
arch: x86_64
|
||||
platform: win
|
||||
license: MIT
|
||||
license_family: MIT
|
||||
purls:
|
||||
- pkg:pypi/ruamel-yaml-clib?source=hash-mapping
|
||||
size: 107842
|
||||
timestamp: 1756829092915
|
||||
- pypi: https://files.pythonhosted.org/packages/ad/d5/62a0e693230bace8e9a767d6d187a4d9421a7c6ee4b48551f8ff7bd1629a/sensai_utils-1.5.0-py3-none-any.whl
|
||||
name: sensai-utils
|
||||
version: 1.5.0
|
||||
|
||||
@ -14,6 +14,7 @@ pixi-pycharm = ">=0.0.9,<0.0.10"
|
||||
beautifulsoup4 = ">=4.13.5,<5"
|
||||
markdownify = ">=1.1.0,<2"
|
||||
requests = ">=2.32.5,<3"
|
||||
"ruamel.yaml" = ">=0.18.15,<0.19"
|
||||
|
||||
[pypi-dependencies]
|
||||
sensai-utils = ">=1.5.0, <2"
|
||||
|
||||
@ -1,9 +1,14 @@
|
||||
import dataclasses
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from markdownify import MarkdownConverter
|
||||
from ruamel.yaml import YAML
|
||||
from ruamel.yaml.scalarstring import LiteralScalarString
|
||||
from sensai.util import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@ -20,7 +25,7 @@ class PenpotAPIContentMarkdownConverter(MarkdownConverter):
|
||||
if "class" in node.attrs and "tsd-breadcrumb" in node.attrs["class"]:
|
||||
return ""
|
||||
|
||||
# convert h3 and h4 to plain text
|
||||
# convert h5 and h4 to plain text
|
||||
if node.name in ["h5", "h4"]:
|
||||
return soup.get_text()
|
||||
|
||||
@ -52,7 +57,9 @@ class PenpotAPIContentMarkdownConverter(MarkdownConverter):
|
||||
|
||||
# convert <pre> blocks to markdown code blocks
|
||||
if node.name == "pre":
|
||||
return f"\n```\n{text.strip()}\n```\n\n"
|
||||
for button in soup.find_all("button"):
|
||||
button.decompose()
|
||||
return f"\n```\n{soup.get_text().strip()}\n```\n\n"
|
||||
|
||||
# convert tsd-signature elements to code blocks, converting <br> to newlines
|
||||
if "class" in node.attrs and "tsd-signature" in node.attrs["class"]:
|
||||
@ -64,11 +71,53 @@ class PenpotAPIContentMarkdownConverter(MarkdownConverter):
|
||||
return super().process_tag(node, parent_tags=parent_tags)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TypeInfo:
|
||||
overview: str
|
||||
"""
|
||||
the main type information, which contains all the declarations/signatures but no descriptions
|
||||
"""
|
||||
members: dict[str, dict[str, str]]
|
||||
"""
|
||||
mapping from member type (e.g. "Properties", "Methods") to a mapping of member name to markdown description
|
||||
"""
|
||||
|
||||
|
||||
class YamlConverter:
|
||||
"""Convert dictionaries to YAML with all strings in block literal style"""
|
||||
|
||||
def __init__(self):
|
||||
self.yaml = YAML()
|
||||
self.yaml.preserve_quotes = True
|
||||
self.yaml.width = 4096 # Prevent line wrapping
|
||||
|
||||
def _convert_strings_to_block(self, obj):
|
||||
if isinstance(obj, dict):
|
||||
return {k: self._convert_strings_to_block(v) for k, v in obj.items()}
|
||||
elif isinstance(obj, list):
|
||||
return [self._convert_strings_to_block(item) for item in obj]
|
||||
elif isinstance(obj, str):
|
||||
return LiteralScalarString(obj)
|
||||
else:
|
||||
return obj
|
||||
|
||||
def to_yaml(self, data):
|
||||
processed_data = self._convert_strings_to_block(data)
|
||||
stream = StringIO()
|
||||
self.yaml.dump(processed_data, stream)
|
||||
return stream.getvalue()
|
||||
|
||||
def to_file(self, data, filepath):
|
||||
processed_data = self._convert_strings_to_block(data)
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
self.yaml.dump(processed_data, f)
|
||||
|
||||
|
||||
class PenpotAPIDocsProcessor:
|
||||
def __init__(self):
|
||||
self.md_converter = PenpotAPIContentMarkdownConverter()
|
||||
self.base_url = "https://penpot-plugins-api-doc.pages.dev"
|
||||
self.pages = {}
|
||||
self.types: dict[str, TypeInfo] = {}
|
||||
|
||||
def run(self, target_dir: str):
|
||||
os.makedirs(target_dir, exist_ok=True)
|
||||
@ -83,15 +132,16 @@ class PenpotAPIDocsProcessor:
|
||||
for link in links:
|
||||
href = link['href']
|
||||
if href.startswith("interfaces/") or href.startswith("types/"):
|
||||
page_name = href.split("/")[-1].replace(".html", "")
|
||||
log.info("Processing page: %s", page_name)
|
||||
page_md = self._process_page(href)
|
||||
type_name = href.split("/")[-1].replace(".html", "")
|
||||
log.info("Processing page: %s", type_name)
|
||||
type_info = self._process_page(href)
|
||||
self.types[type_name] = type_info
|
||||
|
||||
# save to md file
|
||||
md_path = os.path.abspath(os.path.join(target_dir, f"{page_name}.md"))
|
||||
log.info("Writing %s", md_path)
|
||||
with open(md_path, "w", encoding="utf-8") as f:
|
||||
f.write(page_md)
|
||||
# save to yaml
|
||||
yaml_path = os.path.join(target_dir, "api_types.yml")
|
||||
log.info("Writing API type information to %s", yaml_path)
|
||||
data_dict = {k: dataclasses.asdict(v) for k, v in self.types.items()}
|
||||
YamlConverter().to_file(data_dict, yaml_path)
|
||||
|
||||
def _fetch(self, rel_url: str) -> str:
|
||||
response = requests.get(f"{self.base_url}/{rel_url}")
|
||||
@ -100,18 +150,49 @@ class PenpotAPIDocsProcessor:
|
||||
html_content = response.text
|
||||
return html_content
|
||||
|
||||
def _process_page(self, rel_url: str):
|
||||
def _html_to_markdown(self, html_content: str) -> str:
|
||||
md = self.md_converter.convert(html_content)
|
||||
md = md.replace("\xa0", " ") # replace non-breaking spaces
|
||||
return md.strip()
|
||||
|
||||
def _process_page(self, rel_url: str) -> TypeInfo:
|
||||
html_content = self._fetch(rel_url)
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
content = soup.find(attrs={"class": "col-content"})
|
||||
# full_text = self._html_to_markdown(str(content))
|
||||
|
||||
markdown = self.md_converter.convert(str(content))
|
||||
return markdown
|
||||
# extract individual members
|
||||
members = {}
|
||||
member_group_tags = []
|
||||
for el in content.children:
|
||||
if isinstance(el, Tag):
|
||||
if "class" in el.attrs and "tsd-member-group" in el.attrs["class"]:
|
||||
member_group_tags.append(el)
|
||||
members_type = el.find("h2").get_text().strip()
|
||||
members_in_group = {}
|
||||
members[members_type] = members_in_group
|
||||
for member_tag in el.find_all(attrs={"class": "tsd-member"}):
|
||||
member_anchor = member_tag.find("a", attrs={"class": "tsd-anchor"}, recursive=False)
|
||||
member_name = member_anchor.attrs["id"]
|
||||
member_tag.find("h3").decompose() # remove heading
|
||||
members_in_group[member_name] = self._html_to_markdown(str(member_tag))
|
||||
|
||||
# remove the member groups from the soup
|
||||
for tag in member_group_tags:
|
||||
tag.decompose()
|
||||
|
||||
# overview is what remains in content after removing member groups
|
||||
overview = self._html_to_markdown(str(content))
|
||||
|
||||
return TypeInfo(
|
||||
overview=overview,
|
||||
members=members
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
target_dir = Path(__file__).parent.parent / "mcp-server" / "data" / "api"
|
||||
target_dir = Path(__file__).parent.parent / "mcp-server" / "data"
|
||||
PenpotAPIDocsProcessor().run(target_dir=str(target_dir))
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user