This function does not rely on RL to guess but on Information theory. For more details refer to this video: https://youtu.be/v68zYyaEmEA?si=Lz2N-on3pT4HSvak

## Download the list of words

In [3]:
import requests
from typing import Set

def load_word_list_from_url(url: str, output_path: str) -> Set[str]:
    """
    Loads a set of words from a remote URL.

    Args:
        url (str): The URL of the text file to load.

    Returns:
        Set[str]: A set of uppercase 5-letter words, or an empty set if loading fails.
    """
    try:
        # Step 1: Make an HTTP GET request to the URL
        response = requests.get(url)

        # Step 2: Check if the request was successful (status code 200)
        # This will raise an exception for bad status codes (4xx or 5xx)
        response.raise_for_status()

        # Step 3: Process the response text
        # The response text is a single string, so we split it into lines
        words = {
            line.strip().upper()
            for line in response.text.splitlines()
            if len(line.strip()) == 5
        }
        print(f"Successfully loaded {len(words)} words from the URL.")
        if output_path:
            with open(output_path, "w") as f:
                f.write("\n".join(sorted(list(words))))
        return words

    except requests.exceptions.RequestException as e:
        print(f"Error: Could not fetch word list from URL. {e}")
        return set()



file_url = "https://gist.githubusercontent.com/kcwhite/bb598f1b3017b5477cb818c9b086a5d9/raw/5a0adbbb9830ed93a573cb87a7c14bb5dd0b1883/wordle_possibles.txt"
file_url = "https://raw.githubusercontent.com/Roy-Orbison/wordle-guesses-answers/refs/heads/main/answers.txt"
# Call the function with the URL
FULL_WORD_LIST = load_word_list_from_url(file_url, "nyt_answers_wordle_list.txt")

# Print a few words to confirm it worked
if FULL_WORD_LIST:
    print("Full word list: ", len(FULL_WORD_LIST))
    print("\nSample of loaded words:")
    print(list(FULL_WORD_LIST)[:10])

Successfully loaded 2309 words from the URL.
Full word list:  2309

Sample of loaded words:
['ANGLE', 'ABLED', 'SAUCE', 'MELEE', 'GAMUT', 'NORTH', 'WOMAN', 'MAMMA', 'WHOSE', 'ABUSE']


In [4]:
import math
from collections import Counter

def generate_feedback(guess: str, answer: str) -> str:
    """A robust, corrected function to generate feedback."""
    guess = guess.upper()
    answer = answer.upper()

    if not guess:
      return None

    # Pre-scan for letters in the answer for yellow/grey determination
    answer_letters = Counter(answer)
    feedback = [''] * 5

    # 1st Pass: Greens
    # Mark all correct letters in the correct spots first.
    # This "uses them up" from the answer_letters pool.
    for i in range(5):
        if guess[i] == answer[i]:
            feedback[i] = 'âœ“'
            answer_letters[guess[i]] -= 1

    # 2nd Pass: Yellows and Greys
    # Now, process the rest of the letters.
    for i in range(5):
        if feedback[i] == '':  # If not already marked green
            # If the letter is still present in the answer pool, it's a yellow
            if answer_letters[guess[i]] > 0:
                feedback[i] = '-'
                answer_letters[guess[i]] -= 1
            # Otherwise, it's a grey
            else:
                feedback[i] = 'x'
    return "".join(feedback)


def is_consistent(word: str, guess: str, feedback: str) -> bool:
    """A final, corrected, rule-based function to check consistency."""
    # This function uses the generate_feedback function to check consistency.
    # A word is consistent if and only if the feedback generated from
    # that word and the guess matches the given feedback.
    # This is the most robust way to ensure perfect logical mirroring.
    return generate_feedback(guess, word) == feedback


def find_best_guess(possible_words: list[str], allowed_guesses: list[str]) -> str:
    """Finds the best guess to maximize information gain."""
    best_guess = ""
    max_info_gain = -1

    total_possible = len(possible_words)
    if total_possible == 0:
        return ""
    if total_possible <= 2:
        return possible_words[0]

    # To speed up, if there are many possibilities, narrow the search space
    # for the guess. Otherwise, checking all 13k guesses takes too long.
    guess_pool = allowed_guesses
    if total_possible > 100 and len(allowed_guesses) > 1000:
        # A common heuristic is to only check from possible answers when the list is big
        guess_pool = list(set(allowed_guesses) & set(possible_words))
        if not guess_pool: # Fallback if there's no intersection
             guess_pool = allowed_guesses

    for guess in guess_pool:
        groups = {}
        for answer in possible_words:
            feedback = generate_feedback(guess, answer)
            if feedback not in groups:
                groups[feedback] = 0
            groups[feedback] += 1

        current_info_gain = 0
        for feedback in groups:
            p = groups[feedback] / total_possible
            current_info_gain -= p * math.log2(p)

        if current_info_gain > max_info_gain:
            max_info_gain = current_info_gain
            best_guess = guess

    return best_guess

my_first_guess = "SOARE"
possible_answers = FULL_WORD_LIST
ALLOWED_GUESSES = FULL_WORD_LIST

def play_wordle(my_guess: str, secret_word: str, possible_answers: list[str], nb_plays: int = 1) -> str:
    """
    Plays a single round of Wordle with the given guess and answer.

    Args:
        guess (str): The guessed word.
        answer (str): The correct answer word.
        nb_plays (int): number of plays

    Returns:
        str: The feedback string based on the guess.
    """
    if not my_guess:
        return None
    feedback = generate_feedback(my_guess, secret_word)
    if feedback == "âœ“âœ“âœ“âœ“âœ“":
        print(f"\nðŸŽ‰ ðŸŽ‰ ðŸŽ‰ Congratulations! '{my_guess}' is the correct answer, after {nb_plays} plays!")
        return None
    feedback_str = ""
    for i, j in zip(my_guess, feedback):
        feedback_str += (f" {i} ({j})")
    print(f"Played '{my_guess}', got feedback {feedback_str}, {feedback}'")
    possible_answers = [word for word in possible_answers if is_consistent(word, my_guess, feedback)]
    print(f"\nPossible words remaining: {len(possible_answers)} -> {possible_answers}")
    my_guess = find_best_guess(possible_answers, FULL_WORD_LIST)
    nb_plays+=1
    return play_wordle(my_guess, secret_word, possible_answers, nb_plays)


Lets Play, pick a secret word:

In [9]:
secret_word = "ANGLE"
if not secret_word in FULL_WORD_LIST:
    print(f"The word {secret_word} is NOT in the full word list. Please select a valid word.")
else:
    play_wordle(my_first_guess, secret_word, possible_answers)

Played 'SOARE', got feedback  S (x) O (x) A (-) R (x) E (âœ“), xx-xâœ“'

Possible words remaining: 40 -> ['ANGLE', 'GAFFE', 'VALUE', 'MAYBE', 'HAUTE', 'MANGE', 'ACUTE', 'ANIME', 'MAPLE', 'BADGE', 'LATTE', 'AMBLE', 'APPLE', 'ALIKE', 'VALVE', 'ALGAE', 'WAIVE', 'DANCE', 'MAIZE', 'MAUVE', 'VAGUE', 'PAYEE', 'ANKLE', 'HALVE', 'LADLE', 'GAUGE', 'AGILE', 'CABLE', 'TABLE', 'GAUZE', 'FABLE', 'EAGLE', 'AMPLE', 'ALIVE', 'ABIDE', 'BATHE', 'CACHE', 'LANCE', 'NAIVE', 'LATHE']
Played 'LAUGH', got feedback  L (-) A (-) U (x) G (-) H (x), --x-x'

Possible words remaining: 3 -> ['ANGLE', 'ALGAE', 'AGILE']

ðŸŽ‰ ðŸŽ‰ ðŸŽ‰ Congratulations! 'ANGLE' is the correct answer, after 3 plays!


In [8]:
guess="ROTAS"
answer="BRATS"
feedback = generate_feedback(guess, answer)
feedback_str = ""
for i, j in zip(guess, feedback):
    feedback_str += (f" {i} ({j})")
print("your feedback is: ", feedback_str)

your feedback is:   R (-) O (x) T (-) A (-) S (âœ“)
