Fix #5: HTML injection in Telegram messages #20

Merged
shoko merged 1 commits from fix/issue-5-html-injection-telegram into master 2026-03-25 13:13:52 +01:00
2 changed files with 92 additions and 2 deletions
Showing only changes of commit d0534aedbf - Show all commits

View File

@@ -4,6 +4,7 @@ Polymarket Event Browser
Browse tradeable Polymarket events by game category.
"""
import html
Review

is this stdlib or user need to install this package before running this script?

is this stdlib or user need to install this package before running this script?
import json
import time
import argparse
@@ -577,6 +578,15 @@ def print_detail(e, detail):
# TELEGRAM
# ============================================================
def escape_html(text):
Review

does escape_html remove the symbols when sending it to telegram? or is it keeping the symbol with different format? can you also make a test run by sending it to my telegram?

does escape_html remove the symbols when sending it to telegram? or is it keeping the symbol with different format? can you also make a test run by sending it to my telegram?
"""Escape HTML-sensitive characters for Telegram parse_mode=HTML."""
return (text
.replace("&", "&")
.replace("<", "&lt;")
.replace(">", "&gt;")
.replace('"', "&quot;"))
def send_telegram_message(bot_token, chat_id, text, timeout=10):
"""Send a message via Telegram bot API. Returns the message ID on success.
@@ -645,7 +655,7 @@ def send_to_telegram(match_events, non_match_events, category, matches_only=Fals
odds_b = format_odds(float(prices[1])) if len(prices) > 1 else "?"
tournament = get_tournament(title)
title_clean = title.split(" - ")[0].strip() if " - " in title else title
lines.append(f"<b>{i}.</b> <a href=\"{url}\">{title_clean}</a>")
lines.append(f"<b>{i}.</b> <a href=\"{url}\">{escape_html(title_clean)}</a>")
lines.append(f" {start_time_wib} | {rel_time}")
lines.append(f" Vol: ${vol:,.0f}")
if tournament:
@@ -666,7 +676,7 @@ def send_to_telegram(match_events, non_match_events, category, matches_only=Fals
start_time_wib, rel_time = get_start_time_wib(e)
total_vol = sum(float(m.get("volume", 0)) for m in e.get("markets", []))
market_count = len(e.get("markets", []))
lines.append(f"<b>{i}.</b> <a href=\"{url}\">{title}</a>")
lines.append(f"<b>{i}.</b> <a href=\"{url}\">{escape_html(title)}</a>")
lines.append(f" {start_time_wib} | {rel_time}")
lines.append(f" Markets: {market_count} | Total Vol: ${total_vol:,.0f}")
lines.append("")

View File

@@ -121,5 +121,85 @@ class TestSendTelegramMessage(unittest.TestCase):
self.assertIn(b"parse_mode=HTML", req.data)
class TestHtmlInjection(unittest.TestCase):
"""Tests for HTML injection prevention in Telegram messages."""
@patch.dict('os.environ', {'TELEGRAM_BOT_TOKEN': 'test_token', 'CHAT_ID': 'test_chat'})
@patch('browse.send_telegram_message')
def test_send_to_telegram_html_injection_in_match_title(self, mock_send_msg):
"""
titles in match events are NOT escaped before inserting into HTML.
This test FAILS if HTML chars are unescaped (vulnerable),
and PASSES once escape_html() is implemented.
"""
mock_send_msg.return_value = 123
# Simulate a Polymarket event with HTML injection in the title
malicious_event = {
"title": "<script>alert('XSS')</script> - Team A vs Team B",
"slug": "test-event",
"startTime": "2027-03-26T12:00:00Z",
"markets": [{
"sportsMarketType": "moneyline",
"outcomes": '["Team A", "Team B"]',
Review

I think its also possible that the HTML injection come from either of outcomes or sportsMarketType, since we render both of these values to telegram, no? what do you think about it?

I think its also possible that the HTML injection come from either of outcomes or sportsMarketType, since we render both of these values to telegram, no? what do you think about it?
"outcomePrices": "[0.55, 0.45]",
"bestBid": "0.54",
"bestAsk": "0.56",
"volume": 50000,
"acceptingOrders": True,
"closed": False,
}],
}
from browse import send_to_telegram
send_to_telegram([malicious_event], [], "Counter Strike")
# Check what was passed to send_telegram_message
self.assertEqual(mock_send_msg.called, True)
sent_text = mock_send_msg.call_args[0][2] # text arg (3rd positional)
# AFTER FIX: <script> should be escaped as &lt;script&gt;
# BEFORE FIX: raw <script> appears in text (vulnerable — test would fail here)
self.assertIn("&lt;script&gt;", sent_text,
"HTML injection still present — title may NOT be escaped")
self.assertIn("&lt;/script&gt;", sent_text)
@patch.dict('os.environ', {'TELEGRAM_BOT_TOKEN': 'test_token', 'CHAT_ID': 'test_chat'})
@patch('browse.send_telegram_message')
def test_send_to_telegram_ampersand_in_title(self, mock_send_msg):
"""
Ampersands in titles should be escaped as &amp; when using HTML parse_mode.
BEFORE fix: "&" appears raw in the HTML (vulnerable).
AFTER fix: "&" appears as "&amp;".
"""
mock_send_msg.return_value = 123
event_with_ampersand = {
"title": "Team A & Team B vs Team C",
"slug": "amp-test",
"startTime": "2027-03-26T12:00:00Z",
"markets": [{
"sportsMarketType": "moneyline",
"outcomes": '["Team A & Team B", "Team C"]',
"outcomePrices": "[0.50, 0.50]",
"bestBid": "0.49",
"bestAsk": "0.51",
"volume": 10000,
"acceptingOrders": True,
"closed": False,
}],
}
from browse import send_to_telegram
send_to_telegram([event_with_ampersand], [], "Dota 2")
sent_text = mock_send_msg.call_args[0][2]
# AFTER FIX: & should be escaped as &amp;
# BEFORE FIX: raw & appears (vulnerable — test would fail here)
self.assertIn("&amp;", sent_text,
"Ampersand not escaped — title may NOT be escaped")
if __name__ == "__main__":
unittest.main()