「利用者:夜泣き/スクリプト」の版間の差分

利用者:夜泣き/スクリプト (ソースを閲覧)

2023年11月6日 (月) 00:03時点における版

204 バイト追加、 2023年11月6日 (月)

→‎コード: v4.1.6

匿名利用者

>Fet-Fe

@@ 11行目: / 11行目: @@
 """Twitter自動収集スクリプト
-ver4.1.5 2023/11/4恒心
+ver4.1.6 2023/11/6恒心
 当コードは恒心停止してしまった https://rentry.co/7298g の降臨ショーツイート自動収集スクリプトの復刻改善版です
@@ 70行目: / 70行目: @@
 from datetime import datetime
 from enum import Enum
-from logging import Logger, getLogger
 from re import Match, Pattern
 from time import sleep
@@ 91行目: / 90行目: @@
 logging.basicConfig(format='{asctime} [{levelname:.4}] : {message}', style='{')
-logger: Final[Logger] = getLogger(__name__)
+logger: Final[logging.Logger] = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)  # basicConfigで設定するとモジュールのDEBUGログなども出力される
@@ 114行目: / 113行目: @@
      """Final[int]: HTTPリクエスト成功失敗関わらず待機時間。
-秒待つだけで行儀がいいクローラーだそうなので既定では1秒。
+     Note:
-     しかし日本のポリホーモは1秒待っていても捕まえてくるので注意。
+秒待つだけで行儀がいいクローラーだそうなので既定では1秒。
-    https://ja.wikipedia.org/wiki/?curid=2187212
+        しかし日本のポリホーモは1秒待っていても捕まえてくるので注意 [1]_。
+     References:
+        .. [1] 岡崎市立中央図書館事件. (2022, June 21). In Wikipedia.
+           https://ja.wikipedia.org/wiki/?curid=2187212&oldid=79121945
      """
@@ 184行目: / 187行目: @@
          """コンストラクタ。
          """
-         self._proxies: dict[str, str] | None = (
+        # Ｔorに必要なプロキシをセット
-            self._choose_tor_proxies()
+         self._proxies: dict[str, str] | None = self._choose_tor_proxies()
-        )  # Ｔorに必要なプロキシをセット
-     def _execute(self,
+     def _execute(self, url: str) -> requests.models.Response:
-                 url: str) -> requests.models.Response:
          """引数のURLにRequestsモジュールでHTTP接続する。
@@ 838行目: / 839行目: @@
      """Final[str]: archive.todayの魚拓のクリアネットドメイン。
-     記事にはクリアネット用のarchive.todayリンクを貼る。
+     Wiki上の記事にはクリアネット用のarchive.todayリンクを貼る。
      Note:
@@ 973行目: / 974行目: @@
          # 日付取得
          timeline_item: Final[Tag | NavigableString | None] = BeautifulSoup(
-             self._page, 'html.parser').find(
+             self._page, 'html.parser'
-            class_='timeline-item')
+        ).find(class_='timeline-item')
          assert isinstance(timeline_item, Tag)
          date: Final[datetime] = self._tweet_date(timeline_item)
@@ 1,007行目: / 1,008行目: @@
              ['which', 'ffmpeg'],
              stdout=subprocess.DEVNULL,
-             stderr=subprocess.DEVNULL).returncode == 0
+             stderr=subprocess.DEVNULL
+        ).returncode == 0
      def _check_nitter_instance(
@@ 1,211行目: / 1,213行目: @@
                      '-acodec', 'copy', '-vcodec', 'copy', mp4_filename
                  ],
-                 stdout=subprocess.DEVNULL).returncode
+                 stdout=subprocess.DEVNULL
+            ).returncode
              if ts2mp4_returncode == 0:
                  return FfmpegStatus.MP4
@@ 1,336行目: / 1,339行目: @@
          return media_txt
-     def _get_tweet_quote(
+     def _get_tweet_quote(self, tweet: Tag, accessor: AccessorHandler) -> str:
-            self,
-            tweet: Tag,
-            accessor: AccessorHandler) -> str:
          """引用リツイートの引用元へのリンクを取得する。
@@ 1,412行目: / 1,412行目: @@
          return poll_txt
-     def _get_timeline_items(
+     def _get_timeline_items(self, soup: BeautifulSoup) -> list[Tag]:
-            self,
-            soup: BeautifulSoup) -> list[Tag]:
          """タイムラインのツイートを取得。
@@ 1,438行目: / 1,436行目: @@
          return timeline_item_list
-     def _archive_soup(
+     def _archive_soup(self, tag: Tag, accessor: AccessorHandler) -> None:
-            self,
-            tag: Tag,
-            accessor: AccessorHandler) -> None:
          """ツイート内のaタグをテンプレートArchiveの文字列に変化させる。
@@ 1,691行目: / 1,686行目: @@
              return False
          new_page_soup: Final[BeautifulSoup] = BeautifulSoup(res, 'html.parser')
-         if new_page_soup.find(
+         if new_page_soup.find(class_='timeline-end') is None:
-                class_='timeline-end') is None:
              # ツイートの終端ではtimeline-endだけのページになるので判定
              logger.info(new_url + 'に移動しますを')
@@ 1,946行目: / 1,940行目: @@
                 self._next_url(accessor, '16', 5)
+        See Also:
+            * :const:`~TWEET_URL_PREFIX_DEFAULT`: `tweet_url_prefix` のデフォルト値。
+            * :const:`~INCREMENTED_NUM_DEFAULT`: `incremented_num` のデフォルト値。
          """
          assert 0 <= incremented_num and incremented_num <= 9, \