Source code for fuzzy_types.utils

# !/usr/bin/env python
# -*- coding: utf-8 -*-
# 
# Filename: utils.py
# Project: fuzzy_types
# Author: Brian Cherinka
# Created: Tuesday, 7th April 2020 3:34:46 pm
# License: BSD 3-clause "New" or "Revised" License
# Copyright (c) 2020 Brian Cherinka
# Last Modified: Friday, 10th April 2020 6:46:29 pm
# Modified By: Brian Cherinka


from __future__ import print_function, division, absolute_import
import six
from fuzzywuzzy import fuzz as fuzz_fuzz
from fuzzywuzzy import process as fuzz_proc
from fuzzy_types import config


[docs]def get_best_fuzzy(value, choices, min_score=None, scorer=fuzz_fuzz.WRatio, return_score=False): """ Returns the best match in a list of choices using fuzzywuzzy. Parameters: value (str): A string to match on choices (list): A list of string choices to match from min_score (int): The score cutoff threshold. The minimum score to consider when matching. scorer (fuzzywuzzy.Ratio): The fuzzywuzzy score ratio to use. Default is WRatio. return_score (bool): If True, also returns the score value of the match """ assert isinstance(value, six.string_types), 'Invalid value. Must be a string.' min_score = min_score or config.get('fuzzy_score_cutoff', 75) minfuzz = config.get('minimum_fuzzy_characters', 3) assert len(value) >= minfuzz, f'Your fuzzy search value must be at least {minfuzz} characters long.' bests = fuzz_proc.extractBests(value, choices, scorer=scorer, score_cutoff=min_score) if len(bests) == 0: best = None elif len(bests) == 1: best = bests[0] else: if bests[0][1] == bests[1][1]: best = None else: best = bests[0] if best is None: raise ValueError('Cannot find a good match for {0!r}. ' 'Your input value is too ambiguous.'.format(value)) return best if return_score else best[0]