Source code for arxiv_post.article

__all__ = ["Article"]


# standard library
from dataclasses import dataclass, field, replace
from typing import List, Optional


# dependencies
import re
from arxiv import Result
from pylatexenc.latex2text import LatexNodes2Text


# dataclasses
[docs]@dataclass class Article: """Dataclass for arXiv articles.""" title: str """Title of the article.""" authors: List[str] """Author list of the article.""" summary: str """Summary of the article.""" arxiv_url: str """arXiv URL of the article.""" original: Optional["Article"] = field(default=None, compare=False) """Original article before translation (if any).""" @property def arxiv_pdf_url(self) -> str: """arXiv PDF URL of the article.""" return self.arxiv_url.replace("abs", "pdf")
[docs] @classmethod def from_arxiv_result(cls, result: Result) -> "Article": """Create an article from an arXiv query result.""" return Article( title=result.title, authors=[a.name for a in result.authors], summary=result.summary, arxiv_url=str(result), )
[docs] def replace(self, original: str, translated: str) -> "Article": """Text replacement method for translation.""" title, summary = translated.split("\n", 1) return replace(self, title=title, summary=summary, original=self)
def __str__(self) -> str: """Text output method for translation.""" return f"{self.title}\n{self.summary}" def __post_init__(self) -> None: """Remove TeX's control commands from texts.""" self.title = detex(self.title) self.summary = detex(self.summary)
# runtime functions def detex(text: str) -> str: """Remove TeX's control commands from a text.""" text = re.sub(r"(\n+\s*|\n*\s+)", " ", text) return LatexNodes2Text(keep_comments=True).latex_to_text(text)