1
0
mirror of https://github.com/bitwarden/server.git synced 2025-04-06 05:28:15 -05:00

icon fetch fixes

This commit is contained in:
Kyle Spearrin 2018-05-25 12:50:21 -04:00
parent 6859f3ebbc
commit 1cfc95df7c
5 changed files with 12038 additions and 26 deletions

View File

@ -37,15 +37,21 @@ namespace Bit.Icons.Controllers
}
var url = $"http://{hostname}";
if(!Uri.TryCreate(url, UriKind.Absolute, out Uri uri))
if(!Uri.TryCreate(url, UriKind.Absolute, out var uri))
{
return new BadRequestResult();
}
var mappedDomain = _domainMappingService.MapDomain(uri.Host);
var domain = uri.Host;
if(DomainName.TryParseBaseDomain(domain, out var baseDomain))
{
domain = baseDomain;
}
var mappedDomain = _domainMappingService.MapDomain(domain);
if(!_memoryCache.TryGetValue(mappedDomain, out Icon icon))
{
var result = await _iconFetchingService.GetIconAsync(mappedDomain);
var result = await _iconFetchingService.GetIconAsync(domain);
if(result == null)
{
icon = null;

View File

@ -18,4 +18,8 @@
<DotNetCliToolReference Include="Microsoft.Extensions.SecretManager.Tools" Version="2.0.0" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="Resources\public_suffix_list.dat" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,331 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text.RegularExpressions;
namespace Bit.Icons.Models
{
// ref: https://github.com/danesparza/domainname-parser
public class DomainName
{
private const string IpRegex = "^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." +
"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." +
"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\." +
"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$";
private string _subDomain = string.Empty;
private string _domain = string.Empty;
private string _tld = string.Empty;
private TLDRule _tldRule = null;
public string SubDomain => _subDomain;
public string Domain => _domain;
public string SLD => _domain;
public string TLD => _tld;
public TLDRule Rule => _tldRule;
public string BaseDomain => $"{_domain}.{_tld}";
public DomainName(string TLD, string SLD, string SubDomain, TLDRule TLDRule)
{
_tld = TLD;
_domain = SLD;
_subDomain = SubDomain;
_tldRule = TLDRule;
}
public static bool TryParse(string domainString, out DomainName result)
{
var retval = false;
// Our temporary domain parts:
var tld = string.Empty;
var sld = string.Empty;
var subdomain = string.Empty;
TLDRule _tldrule = null;
result = null;
try
{
// Try parsing the domain name ... this might throw formatting exceptions
ParseDomainName(domainString, out tld, out sld, out subdomain, out _tldrule);
// Construct a new DomainName object and return it
result = new DomainName(tld, sld, subdomain, _tldrule);
// Return 'true'
retval = true;
}
catch
{
// Looks like something bad happened -- return 'false'
retval = false;
}
return retval;
}
public static bool TryParseBaseDomain(string domainString, out string result)
{
if(Regex.IsMatch(domainString, IpRegex))
{
result = domainString;
return true;
}
DomainName domain;
var retval = TryParse(domainString, out domain);
result = domain?.BaseDomain;
return retval;
}
private static void ParseDomainName(string domainString, out string TLD, out string SLD,
out string SubDomain, out TLDRule MatchingRule)
{
// Make sure domain is all lowercase
domainString = domainString.ToLower();
TLD = string.Empty;
SLD = string.Empty;
SubDomain = string.Empty;
MatchingRule = null;
// If the fqdn is empty, we have a problem already
if(domainString.Trim() == string.Empty)
{
throw new ArgumentException("The domain cannot be blank");
}
// Next, find the matching rule:
MatchingRule = FindMatchingTLDRule(domainString);
// At this point, no rules match, we have a problem
if(MatchingRule == null)
{
throw new FormatException("The domain does not have a recognized TLD");
}
// Based on the tld rule found, get the domain (and possibly the subdomain)
var tempSudomainAndDomain = string.Empty;
var tldIndex = 0;
// First, determine what type of rule we have, and set the TLD accordingly
switch(MatchingRule.Type)
{
case TLDRule.RuleType.Normal:
tldIndex = domainString.LastIndexOf("." + MatchingRule.Name);
tempSudomainAndDomain = domainString.Substring(0, tldIndex);
TLD = domainString.Substring(tldIndex + 1);
break;
case TLDRule.RuleType.Wildcard:
// This finds the last portion of the TLD...
tldIndex = domainString.LastIndexOf("." + MatchingRule.Name);
tempSudomainAndDomain = domainString.Substring(0, tldIndex);
// But we need to find the wildcard portion of it:
tldIndex = tempSudomainAndDomain.LastIndexOf(".");
tempSudomainAndDomain = domainString.Substring(0, tldIndex);
TLD = domainString.Substring(tldIndex + 1);
break;
case TLDRule.RuleType.Exception:
tldIndex = domainString.LastIndexOf(".");
tempSudomainAndDomain = domainString.Substring(0, tldIndex);
TLD = domainString.Substring(tldIndex + 1);
break;
}
// See if we have a subdomain:
List<string> lstRemainingParts = new List<string>(tempSudomainAndDomain.Split('.'));
// If we have 0 parts left, there is just a tld and no domain or subdomain
// If we have 1 part, it's the domain, and there is no subdomain
// If we have 2+ parts, the last part is the domain, the other parts (combined) are the subdomain
if(lstRemainingParts.Count > 0)
{
// Set the domain:
SLD = lstRemainingParts[lstRemainingParts.Count - 1];
// Set the subdomain, if there is one to set:
if(lstRemainingParts.Count > 1)
{
// We strip off the trailing period, too
SubDomain = tempSudomainAndDomain.Substring(0, tempSudomainAndDomain.Length - SLD.Length - 1);
}
}
}
private static TLDRule FindMatchingTLDRule(string domainString)
{
// Split our domain into parts (based on the '.')
// ...Put these parts in a list
// ...Make sure these parts are in reverse order
// (we'll be checking rules from the right-most pat of the domain)
var lstDomainParts = domainString.Split('.').ToList();
lstDomainParts.Reverse();
// Begin building our partial domain to check rules with:
var checkAgainst = string.Empty;
// Our 'matches' collection:
var ruleMatches = new List<TLDRule>();
foreach(string domainPart in lstDomainParts)
{
// Add on our next domain part:
checkAgainst = string.Format("{0}.{1}", domainPart, checkAgainst);
// If we end in a period, strip it off:
if(checkAgainst.EndsWith("."))
{
checkAgainst = checkAgainst.Substring(0, checkAgainst.Length - 1);
}
var rules = Enum.GetValues(typeof(TLDRule.RuleType)).Cast<TLDRule.RuleType>();
foreach(var rule in rules)
{
// Try to match rule:
TLDRule result;
if(TLDRulesCache.Instance.TLDRuleLists[rule].TryGetValue(checkAgainst, out result))
{
ruleMatches.Add(result);
}
}
}
// Sort our matches list (longest rule wins, according to :
var results = from match in ruleMatches
orderby match.Name.Length descending
select match;
// Take the top result (our primary match):
var primaryMatch = results.Take(1).SingleOrDefault();
return primaryMatch;
}
public class TLDRule : IComparable<TLDRule>
{
public string Name { get; private set; }
public RuleType Type { get; private set; }
public TLDRule(string RuleInfo)
{
// Parse the rule and set properties accordingly:
if(RuleInfo.StartsWith("*"))
{
Type = RuleType.Wildcard;
Name = RuleInfo.Substring(2);
}
else if(RuleInfo.StartsWith("!"))
{
Type = RuleType.Exception;
Name = RuleInfo.Substring(1);
}
else
{
Type = RuleType.Normal;
Name = RuleInfo;
}
}
public int CompareTo(TLDRule other)
{
if(other == null)
{
return -1;
}
return Name.CompareTo(other.Name);
}
public enum RuleType
{
Normal,
Wildcard,
Exception
}
}
public class TLDRulesCache
{
private static volatile TLDRulesCache _uniqueInstance;
private static object _syncObj = new object();
private static object _syncList = new object();
private TLDRulesCache()
{
// Initialize our internal list:
TLDRuleLists = GetTLDRules();
}
public static TLDRulesCache Instance
{
get
{
if(_uniqueInstance == null)
{
lock(_syncObj)
{
if(_uniqueInstance == null)
{
_uniqueInstance = new TLDRulesCache();
}
}
}
return (_uniqueInstance);
}
}
public IDictionary<TLDRule.RuleType, IDictionary<string, TLDRule>> TLDRuleLists { get; set; }
public static void Reset()
{
lock(_syncObj)
{
_uniqueInstance = null;
}
}
private IDictionary<TLDRule.RuleType, IDictionary<string, TLDRule>> GetTLDRules()
{
var results = new Dictionary<TLDRule.RuleType, IDictionary<string, TLDRule>>();
var rules = Enum.GetValues(typeof(TLDRule.RuleType)).Cast<TLDRule.RuleType>();
foreach(var rule in rules)
{
results[rule] = new Dictionary<string, TLDRule>(StringComparer.CurrentCultureIgnoreCase);
}
var ruleStrings = ReadRulesData();
// Strip out any lines that are:
// a.) A comment
// b.) Blank
var rulesStrings = ruleStrings
.Where(ruleString => !ruleString.StartsWith("//") && ruleString.Trim().Length != 0);
foreach(var ruleString in rulesStrings)
{
var result = new TLDRule(ruleString);
results[result.Type][result.Name] = result;
}
// Return our results:
Debug.WriteLine(string.Format("Loaded {0} rules into cache.",
results.Values.Sum(r => r.Values.Count)));
return results;
}
private IEnumerable<string> ReadRulesData()
{
var assembly = typeof(TLDRulesCache).GetTypeInfo().Assembly;
var stream = assembly.GetManifestResourceStream("Bit.Icons.Resources.public_suffix_list.dat");
string line;
using(var reader = new StreamReader(stream))
{
while((line = reader.ReadLine()) != null)
{
yield return line;
}
}
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -17,7 +17,7 @@ namespace Bit.Icons.Services
private static readonly HttpClient _httpClient = new HttpClient(new HttpClientHandler
{
AllowAutoRedirect = false,
AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate
AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate,
});
private static string _pngMediaType = "image/png";
private static byte[] _pngHeader = new byte[] { 137, 80, 78, 71 };
@ -27,22 +27,34 @@ namespace Bit.Icons.Services
private static string _jpegMediaType = "image/jpeg";
private static byte[] _jpegHeader = new byte[] { 255, 216, 255 };
private static string _octetMediaType = "application/octet-stream";
private static string _textMediaType = "text/plain";
private static readonly HashSet<string> _allowedMediaTypes = new HashSet<string>{
_pngMediaType,
_icoMediaType,
_icoAltMediaType,
_jpegMediaType,
_octetMediaType
_octetMediaType,
_textMediaType
};
public IconFetchingService()
{
_httpClient.Timeout = TimeSpan.FromSeconds(20);
}
public async Task<IconResult> GetIconAsync(string domain)
{
var uri = new Uri($"http://{domain}");
var uri = new Uri($"https://{domain}");
var response = await GetAndFollowAsync(uri, 2);
if(response == null || !response.IsSuccessStatusCode)
{
uri = new Uri($"https://{domain}");
uri = new Uri($"http://{domain}");
response = await GetAndFollowAsync(uri, 2);
if(response == null || !response.IsSuccessStatusCode)
{
uri = new Uri($"https://www.{domain}");
response = await GetAndFollowAsync(uri, 2);
}
}
if(response?.Content == null || !response.IsSuccessStatusCode)
@ -50,18 +62,36 @@ namespace Bit.Icons.Services
return null;
}
if(response.Content.Headers?.ContentType?.MediaType != "text/html")
uri = response.RequestMessage.RequestUri;
var doc = new HtmlDocument();
try
{
var html = await response.Content.ReadAsStringAsync();
if(html == null)
{
return null;
}
doc.LoadHtml(html);
if(doc.DocumentNode == null)
{
return null;
}
}
catch
{
return null;
}
uri = response.RequestMessage.RequestUri;
var html = await response.Content.ReadAsStringAsync();
var doc = new HtmlDocument();
doc.LoadHtml(html);
if(doc.DocumentNode == null)
var baseUrl = "/";
var baseUrlNodes = doc.DocumentNode.SelectNodes(@"//base[@href]");
if(baseUrlNodes != null && baseUrlNodes.Count > 0)
{
return null;
var hrefAttr = baseUrlNodes[0].Attributes["href"];
if(!string.IsNullOrWhiteSpace(hrefAttr?.Value))
{
baseUrl = hrefAttr.Value;
}
}
var icons = new List<IconResult>();
@ -77,17 +107,21 @@ namespace Bit.Icons.Services
}
var relAttr = link.Attributes["rel"];
if(relAttr != null && _iconRels.Contains(relAttr.Value))
if(relAttr != null && _iconRels.Contains(relAttr.Value.ToLower()))
{
icons.Add(new IconResult(hrefAttr.Value, link));
}
else
{
var extension = Path.GetExtension(hrefAttr.Value);
if(_iconExtensions.Contains(extension))
try
{
icons.Add(new IconResult(hrefAttr.Value, link));
var extension = Path.GetExtension(hrefAttr.Value);
if(_iconExtensions.Contains(extension.ToLower()))
{
icons.Add(new IconResult(hrefAttr.Value, link));
}
}
catch(ArgumentException) { }
}
}
}
@ -96,9 +130,13 @@ namespace Bit.Icons.Services
foreach(var icon in icons)
{
Uri iconUri = null;
if(Uri.TryCreate(icon.Path, UriKind.Relative, out Uri relUri))
if(icon.Path.StartsWith("//"))
{
iconUri = new Uri($"{uri.Scheme}://{uri.Host}/{relUri.OriginalString}");
iconUri = new Uri($"{uri.Scheme}://{icon.Path.Substring(2)}");
}
else if(Uri.TryCreate(icon.Path, UriKind.Relative, out Uri relUri))
{
iconUri = ResolveUri($"{uri.Scheme}://{uri.Host}", baseUrl, relUri.OriginalString);
}
else if(Uri.TryCreate(icon.Path, UriKind.Absolute, out Uri absUri))
{
@ -123,7 +161,7 @@ namespace Bit.Icons.Services
await Task.WhenAll(iconResultTasks);
if(!icons.Any(i => i.Icon != null))
{
var faviconUri = new Uri($"{uri.Scheme}://{uri.Host}/favicon.ico");
var faviconUri = ResolveUri($"{uri.Scheme}://{uri.Host}", "favicon.ico");
var result = await GetIconAsync(faviconUri);
if(result != null)
{
@ -153,7 +191,7 @@ namespace Bit.Icons.Services
}
var bytes = await response.Content.ReadAsByteArrayAsync();
if(format == _octetMediaType)
if(format == _octetMediaType || format == _textMediaType)
{
if(HeaderMatch(bytes, _icoHeader))
{
@ -197,11 +235,12 @@ namespace Bit.Icons.Services
// Let's add some headers to look like we're coming from a web browser request. Some websites
// will block our request without these.
message.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36");
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36");
message.Headers.Add("Accept-Language", "en-US,en;q=0.8");
message.Headers.Add("Cache-Control", "no-cache");
message.Headers.Add("Pragma", "no-cache");
message.Headers.Add("Accept", "image/webp,image/apng,image/*,*/*;q=0.8");
message.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;" +
"q=0.9,image/webp,image/apng,*/*;q=0.8");
try
{
@ -223,7 +262,8 @@ namespace Bit.Icons.Services
if(!(response.StatusCode == HttpStatusCode.Redirect ||
response.StatusCode == HttpStatusCode.MovedPermanently ||
response.StatusCode == HttpStatusCode.RedirectKeepVerb) ||
response.StatusCode == HttpStatusCode.RedirectKeepVerb ||
response.StatusCode == HttpStatusCode.SeeOther) ||
!response.Headers.Contains("Location"))
{
return null;
@ -237,7 +277,7 @@ namespace Bit.Icons.Services
if(Uri.TryCreate(locationHeader, UriKind.Relative, out Uri relLocation))
{
var requestUri = response.RequestMessage.RequestUri;
location = new Uri($"{requestUri.Scheme}://{requestUri.Host}/{relLocation.OriginalString}");
location = ResolveUri($"{requestUri.Scheme}://{requestUri.Host}", relLocation.OriginalString);
}
else
{
@ -263,5 +303,18 @@ namespace Bit.Icons.Services
{
return imageBytes.Length >= header.Length && header.SequenceEqual(imageBytes.Take(header.Length));
}
private Uri ResolveUri(string baseUrl, params string[] paths)
{
var url = baseUrl;
foreach(var path in paths)
{
if(Uri.TryCreate(new Uri(url), path, out var r))
{
url = r.ToString();
}
}
return new Uri(url);
}
}
}