| import pickle |
| import numpy as np |
| import pandas as pd |
|
|
|
|
| def test_cosine_similarity(target): |
| successful_cases = 0 |
| failed_cases = [] |
|
|
| word_embeddings = pickle.load(open("./data/word_embeddings_subset.p", "rb")) |
|
|
| test_cases = [ |
| { |
| "name": "cosine_score_1", |
| "input": {"A": word_embeddings["king"], "B": word_embeddings["queen"]}, |
| "expected": [0.650, 0.6512, 0.6510957], |
| }, |
| { |
| "name": "cosine_score_2", |
| "input": {"A": word_embeddings["Japan"], "B": word_embeddings["Tokyo"]}, |
| "expected": [0.699, 0.701, 0.70022535], |
| }, |
| { |
| "name": "cosine_score_3", |
| "input": {"A": word_embeddings["Germany"], "B": word_embeddings["Beirut"]}, |
| "expected": [0.172, 0.174, 0.17339969], |
| }, |
| { |
| "name": "cosine_score_4_to_catch_alternate_solution", |
| "input": {"A": word_embeddings["China"], "B": word_embeddings["Chile"]}, |
| "expected": [0.32, 0.381, 0.3801232], |
| }, |
| ] |
|
|
| for test_case in test_cases: |
| result = target(**test_case["input"]) |
| try: |
| |
| assert np.isclose(result, test_case["expected"][2]) or ( |
| test_case["expected"][0] <= result <= test_case["expected"][1] |
| ) |
| successful_cases += 1 |
| except: |
| failed_cases.append( |
| { |
| "name": test_case["name"], |
| "expected": test_case["expected"][2], |
| "got": result, |
| } |
| ) |
| print( |
| f"Wrong output in cosine similarity function. \n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." |
| ) |
|
|
| if len(failed_cases) == 0: |
| print("\033[92m All tests passed") |
| else: |
| print("\033[92m", successful_cases, " Tests passed") |
| print("\033[91m", len(failed_cases), " Tests failed") |
|
|
| |
|
|
|
|
| def test_euclidean(target): |
| successful_cases = 0 |
| failed_cases = [] |
|
|
| word_embeddings = pickle.load(open("./data/word_embeddings_subset.p", "rb")) |
|
|
| test_cases = [ |
| { |
| "name": "euclidean_score_1", |
| "input": {"A": word_embeddings["king"], "B": word_embeddings["queen"]}, |
| "expected": [2.47, 2.48, 2.4796925], |
| }, |
| { |
| "name": "euclidean_score_2", |
| "input": {"A": word_embeddings["Japan"], "B": word_embeddings["Tokyo"]}, |
| "expected": [2.43, 2.44, 2.4345345], |
| }, |
| { |
| "name": "euclidean_score_3", |
| "input": {"A": word_embeddings["Germany"], "B": word_embeddings["Beirut"]}, |
| "expected": [4.0, 4.1, 4.0416517], |
| }, |
| { |
| "name": "euclidean_score_4", |
| "input": {"A": word_embeddings["China"], "B": word_embeddings["Chile"]}, |
| "expected": [3.2, 3.3, 3.2326782], |
| }, |
| ] |
|
|
| for test_case in test_cases: |
| result = target(**test_case["input"]) |
|
|
| try: |
| assert np.isclose( |
| result, test_case["expected"][2], rtol=1e-3, atol=1e-05 |
| ) or (test_case["expected"][0] <= result <= test_case["expected"][1]) |
| successful_cases += 1 |
| except: |
| failed_cases.append( |
| { |
| "name": test_case["name"], |
| "expected": test_case["expected"][2], |
| "got": result, |
| } |
| ) |
| print( |
| f"Wrong output in the euclidean distance function. \n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." |
| ) |
|
|
| if len(failed_cases) == 0: |
| print("\033[92m All tests passed") |
| else: |
| print("\033[92m", successful_cases, " Tests passed") |
| print("\033[91m", len(failed_cases), " Tests failed") |
|
|
| |
|
|
|
|
| def test_get_country(target): |
| successful_cases = 0 |
| failed_cases = [] |
|
|
| word_embeddings = pickle.load(open("./data/word_embeddings_subset.p", "rb")) |
|
|
| test_cases = [ |
| { |
| "name": "get_country_score_1", |
| "input": { |
| "city1": "Athens", |
| "country1": "Greece", |
| "city2": "Cairo", |
| "embeddings": word_embeddings, |
| }, |
| "expected": ("Egypt", 0.7626821), |
| }, |
| { |
| "name": "get_country_score_2_for_wrong_cosine_similarity", |
| "input": { |
| "city1": "oil", |
| "country1": "gas", |
| "city2": "town", |
| "embeddings": word_embeddings, |
| }, |
| "expected": ("village", 0.5611889), |
| }, |
| { |
| "name": "get_country_score_3", |
| "input": { |
| "city1": "Doha", |
| "country1": "Qatar", |
| "city2": "Jakarta", |
| "embeddings": word_embeddings, |
| }, |
| "expected": ("Indonesia", 0.6782036), |
| }, |
| { |
| "name": "get_country_score_4", |
| "input": { |
| "city1": "Tokyo", |
| "country1": "Japan", |
| "city2": "Canberra", |
| "embeddings": word_embeddings, |
| }, |
| "expected": ("Australia", 0.7139509), |
| }, |
| { |
| "name": "get_country_score_5_for_wrong_cosine_similarity", |
| "input": { |
| "city1": "joyful", |
| "country1": "happy", |
| "city2": "sad", |
| "embeddings": word_embeddings, |
| }, |
| "expected": ("king", 0.09570546), |
| }, |
| { |
| "name": "get_country_score_6_for_wrong_cosine_similarity", |
| "input": { |
| "city1": "happy", |
| "country1": "joyful", |
| "city2": "sad", |
| "embeddings": word_embeddings, |
| }, |
| "expected": ("Lebanon", 0.14527377), |
| }, |
| ] |
|
|
| for test_case in test_cases: |
| result = target(**test_case["input"]) |
|
|
| try: |
| assert isinstance(result, tuple) |
| successful_cases += 1 |
| except: |
| failed_cases.append( |
| { |
| "name": test_case["name"], |
| "expected": type(test_case["expected"]), |
| "got": type(result), |
| } |
| ) |
| print( |
| f"Wrong output type. \n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." |
| ) |
|
|
| try: |
| assert result[0] == test_case["expected"][0] |
| successful_cases += 1 |
| except: |
| failed_cases.append( |
| { |
| "name": test_case["name"], |
| "expected": test_case["expected"][0], |
| "got": result[0], |
| } |
| ) |
| print( |
| f"Wrong output word. \n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." |
| ) |
|
|
| try: |
| assert np.isclose(result[1], test_case["expected"][1]) |
| successful_cases += 1 |
| except: |
| failed_cases.append( |
| { |
| "name": test_case["name"], |
| "expected": test_case["expected"][1], |
| "got": result[1], |
| } |
| ) |
| print( |
| f"Wrong output similarity. Maybe you should check your cosine_similarity implementation. \n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." |
| ) |
|
|
| if len(failed_cases) == 0: |
| print("\033[92m All tests passed") |
| else: |
| print("\033[92m", successful_cases, " Tests passed") |
| print("\033[91m", len(failed_cases), " Tests failed") |
|
|
| |
|
|
|
|
| def test_get_accuracy(target, data): |
| successful_cases = 0 |
| failed_cases = [] |
|
|
| word_embeddings = pickle.load(open("./data/word_embeddings_subset.p", "rb")) |
|
|
| test_cases = [ |
| { |
| "name": "default_check", |
| "input": {"word_embeddings": word_embeddings, "data": data}, |
| "expected": 0.9192082407594425, |
| }, |
| { |
| "name": "smaller_check", |
| "input": { |
| "word_embeddings": word_embeddings, |
| "data": data.sample(frac=0.15, random_state=3), |
| }, |
| "expected": 0.9125168236877523, |
| }, |
| ] |
|
|
| for test_case in test_cases: |
| result = target(**test_case["input"]) |
|
|
| try: |
| assert np.isclose(result, test_case["expected"]) |
| successful_cases += 1 |
| except: |
| failed_cases.append( |
| { |
| "name": test_case["name"], |
| "expected": test_case["expected"], |
| "got": result, |
| } |
| ) |
| print( |
| f"Wrong accuracy output. \n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." |
| ) |
|
|
| if len(failed_cases) == 0: |
| print("\033[92m All tests passed") |
| else: |
| print("\033[92m", successful_cases, " Tests passed") |
| print("\033[91m", len(failed_cases), " Tests failed") |
|
|
| |
|
|
|
|
| def test_compute_pca(target): |
| successful_cases = 0 |
| failed_cases = [] |
|
|
| test_cases = [ |
| { |
| "name": "default_check", |
| "input": { |
| "X": np.array( |
| [ |
| [ |
| 4.17022005e-01, |
| 7.20324493e-01, |
| 1.14374817e-04, |
| 3.02332573e-01, |
| 1.46755891e-01, |
| 9.23385948e-02, |
| 1.86260211e-01, |
| 3.45560727e-01, |
| 3.96767474e-01, |
| 5.38816734e-01, |
| ], |
| [ |
| 4.19194514e-01, |
| 6.85219500e-01, |
| 2.04452250e-01, |
| 8.78117436e-01, |
| 2.73875932e-02, |
| 6.70467510e-01, |
| 4.17304802e-01, |
| 5.58689828e-01, |
| 1.40386939e-01, |
| 1.98101489e-01, |
| ], |
| [ |
| 8.00744569e-01, |
| 9.68261576e-01, |
| 3.13424178e-01, |
| 6.92322616e-01, |
| 8.76389152e-01, |
| 8.94606664e-01, |
| 8.50442114e-02, |
| 3.90547832e-02, |
| 1.69830420e-01, |
| 8.78142503e-01, |
| ], |
| ] |
| ), |
| "n_components": 2, |
| }, |
| "expected": np.array( |
| [ |
| [0.43437323, 0.49820384], |
| [0.42077249, -0.50351448], |
| [-0.85514571, 0.00531064], |
| ] |
| ), |
| }, |
| { |
| "name": "larger_check", |
| "input": { |
| "X": np.array( |
| [ |
| [ |
| 0.4359949, |
| 0.02592623, |
| 0.54966248, |
| 0.43532239, |
| 0.4203678, |
| 0.33033482, |
| 0.20464863, |
| 0.61927097, |
| 0.29965467, |
| 0.26682728, |
| 0.62113383, |
| 0.52914209, |
| 0.13457995, |
| 0.51357812, |
| 0.18443987, |
| ], |
| [ |
| 0.78533515, |
| 0.85397529, |
| 0.49423684, |
| 0.84656149, |
| 0.07964548, |
| 0.50524609, |
| 0.0652865, |
| 0.42812233, |
| 0.09653092, |
| 0.12715997, |
| 0.59674531, |
| 0.226012, |
| 0.10694568, |
| 0.22030621, |
| 0.34982629, |
| ], |
| [ |
| 0.46778748, |
| 0.20174323, |
| 0.64040673, |
| 0.48306984, |
| 0.50523672, |
| 0.38689265, |
| 0.79363745, |
| 0.58000418, |
| 0.1622986, |
| 0.70075235, |
| 0.96455108, |
| 0.50000836, |
| 0.88952006, |
| 0.34161365, |
| 0.56714413, |
| ], |
| [ |
| 0.42754596, |
| 0.43674726, |
| 0.77655918, |
| 0.53560417, |
| 0.95374223, |
| 0.54420816, |
| 0.08209492, |
| 0.3663424, |
| 0.8508505, |
| 0.40627504, |
| 0.02720237, |
| 0.24717724, |
| 0.06714437, |
| 0.99385201, |
| 0.97058031, |
| ], |
| [ |
| 0.80025835, |
| 0.60181712, |
| 0.76495986, |
| 0.16922545, |
| 0.29302323, |
| 0.52406688, |
| 0.35662428, |
| 0.04567897, |
| 0.98315345, |
| 0.44135492, |
| 0.50400044, |
| 0.32354132, |
| 0.25974475, |
| 0.38688989, |
| 0.8320169, |
| ], |
| ] |
| ), |
| "n_components": 3, |
| }, |
| "expected": np.array( |
| [ |
| [-0.32462796, 0.01881248, -0.51389463], |
| [-0.36781354, 0.88364184, 0.05985815], |
| [-0.75767901, -0.69452194, 0.12223214], |
| [1.01698298, -0.17990871, -0.33555475], |
| [0.43313753, -0.02802368, 0.66735909], |
| ] |
| ), |
| }, |
| ] |
|
|
| for test_case in test_cases: |
| result = target(**test_case["input"]) |
|
|
| try: |
| assert isinstance(result, np.ndarray) |
| successful_cases += 1 |
| except: |
| failed_cases.append( |
| { |
| "name": test_case["name"], |
| "expected": np.ndarray, |
| "got": type(result), |
| } |
| ) |
| print( |
| f"Wrong output type. \n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." |
| ) |
|
|
| try: |
| assert result.shape == test_case["expected"].shape |
| successful_cases += 1 |
| except: |
| failed_cases.append( |
| { |
| "name": test_case["name"], |
| "expected": test_case["expected"].shape, |
| "got": result.shape, |
| } |
| ) |
| print( |
| f"Wrong output shape. Check if you are taking the proper number of dimensions.\n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." |
| ) |
|
|
| try: |
| assert np.allclose(result, test_case["expected"]) |
| successful_cases += 1 |
| except: |
| failed_cases.append( |
| { |
| "name": test_case["name"], |
| "expected": test_case["expected"], |
| "got": result, |
| } |
| ) |
| print( |
| f"Wrong accuracy output.\n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." |
| ) |
|
|
| if len(failed_cases) == 0: |
| print("\033[92m All tests passed") |
| else: |
| print("\033[92m", successful_cases, " Tests passed") |
| print("\033[91m", len(failed_cases), " Tests failed") |
|
|
| |
|
|
|
|