Source code for arxiv_post.article
__all__ = ["Article"]
# standard library
from dataclasses import dataclass, field, replace
from typing import List, Optional
# dependencies
import re
from arxiv import Result
from pylatexenc.latex2text import LatexNodes2Text
# dataclasses
[docs]@dataclass
class Article:
    """Dataclass for arXiv articles."""
    title: str
    """Title of the article."""
    authors: List[str]
    """Author list of the article."""
    summary: str
    """Summary of the article."""
    arxiv_url: str
    """arXiv URL of the article."""
    original: Optional["Article"] = field(default=None, compare=False)
    """Original article before translation (if any)."""
    @property
    def arxiv_pdf_url(self) -> str:
        """arXiv PDF URL of the article."""
        return self.arxiv_url.replace("abs", "pdf")
[docs]    @classmethod
    def from_arxiv_result(cls, result: Result) -> "Article":
        """Create an article from an arXiv query result."""
        return Article(
            title=result.title,
            authors=[a.name for a in result.authors],
            summary=result.summary,
            arxiv_url=str(result),
        ) 
[docs]    def replace(self, original: str, translated: str) -> "Article":
        """Text replacement method for translation."""
        title, summary = translated.split("\n", 1)
        return replace(self, title=title, summary=summary, original=self) 
    def __str__(self) -> str:
        """Text output method for translation."""
        return f"{self.title}\n{self.summary}"
    def __post_init__(self) -> None:
        """Remove TeX's control commands from texts."""
        self.title = detex(self.title)
        self.summary = detex(self.summary) 
# runtime functions
def detex(text: str) -> str:
    """Remove TeX's control commands from a text."""
    text = re.sub(r"(\n+\s*|\n*\s+)", " ", text)
    return LatexNodes2Text(keep_comments=True).latex_to_text(text)