forked from MirrorHub/synapse
Merge pull request #721 from matrix-org/erikj/spider
Sanitize the optional dependencies for spider API
This commit is contained in:
commit
ceeb5b909f
4 changed files with 42 additions and 39 deletions
|
@ -13,10 +13,24 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from ._base import Config
|
from ._base import Config, ConfigError
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
import sys
|
|
||||||
|
MISSING_NETADDR = (
|
||||||
|
"Missing netaddr library. This is required for URL preview API."
|
||||||
|
)
|
||||||
|
|
||||||
|
MISSING_LXML = (
|
||||||
|
"""Missing lxml library. This is required for URL preview API.
|
||||||
|
|
||||||
|
Install by running:
|
||||||
|
pip install lxml
|
||||||
|
|
||||||
|
Requires libxslt1-dev system package.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
ThumbnailRequirement = namedtuple(
|
ThumbnailRequirement = namedtuple(
|
||||||
"ThumbnailRequirement", ["width", "height", "method", "media_type"]
|
"ThumbnailRequirement", ["width", "height", "method", "media_type"]
|
||||||
|
@ -62,18 +76,32 @@ class ContentRepositoryConfig(Config):
|
||||||
self.thumbnail_requirements = parse_thumbnail_requirements(
|
self.thumbnail_requirements = parse_thumbnail_requirements(
|
||||||
config["thumbnail_sizes"]
|
config["thumbnail_sizes"]
|
||||||
)
|
)
|
||||||
self.url_preview_enabled = config["url_preview_enabled"]
|
self.url_preview_enabled = config.get("url_preview_enabled", False)
|
||||||
if self.url_preview_enabled:
|
if self.url_preview_enabled:
|
||||||
try:
|
try:
|
||||||
from netaddr import IPSet
|
import lxml
|
||||||
if "url_preview_ip_range_blacklist" in config:
|
lxml # To stop unused lint.
|
||||||
self.url_preview_ip_range_blacklist = IPSet(
|
|
||||||
config["url_preview_ip_range_blacklist"]
|
|
||||||
)
|
|
||||||
if "url_preview_url_blacklist" in config:
|
|
||||||
self.url_preview_url_blacklist = config["url_preview_url_blacklist"]
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
sys.stderr.write("\nmissing netaddr dep - disabling preview_url API\n")
|
raise ConfigError(MISSING_LXML)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from netaddr import IPSet
|
||||||
|
except ImportError:
|
||||||
|
raise ConfigError(MISSING_NETADDR)
|
||||||
|
|
||||||
|
if "url_preview_ip_range_blacklist" in config:
|
||||||
|
self.url_preview_ip_range_blacklist = IPSet(
|
||||||
|
config["url_preview_ip_range_blacklist"]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ConfigError(
|
||||||
|
"For security, you must specify an explicit target IP address "
|
||||||
|
"blacklist in url_preview_ip_range_blacklist for url previewing "
|
||||||
|
"to work"
|
||||||
|
)
|
||||||
|
|
||||||
|
if "url_preview_url_blacklist" in config:
|
||||||
|
self.url_preview_url_blacklist = config["url_preview_url_blacklist"]
|
||||||
|
|
||||||
def default_config(self, **kwargs):
|
def default_config(self, **kwargs):
|
||||||
media_store = self.default_path("media_store")
|
media_store = self.default_path("media_store")
|
||||||
|
|
|
@ -43,7 +43,6 @@ CONDITIONAL_REQUIREMENTS = {
|
||||||
"matrix_angular_sdk>=0.6.8": ["syweb>=0.6.8"],
|
"matrix_angular_sdk>=0.6.8": ["syweb>=0.6.8"],
|
||||||
},
|
},
|
||||||
"preview_url": {
|
"preview_url": {
|
||||||
"lxml>=3.6.0": ["lxml"],
|
|
||||||
"netaddr>=0.7.18": ["netaddr"],
|
"netaddr>=0.7.18": ["netaddr"],
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -80,8 +80,4 @@ class MediaRepositoryResource(Resource):
|
||||||
self.putChild("thumbnail", ThumbnailResource(hs, filepaths))
|
self.putChild("thumbnail", ThumbnailResource(hs, filepaths))
|
||||||
self.putChild("identicon", IdenticonResource())
|
self.putChild("identicon", IdenticonResource())
|
||||||
if hs.config.url_preview_enabled:
|
if hs.config.url_preview_enabled:
|
||||||
try:
|
self.putChild("preview_url", PreviewUrlResource(hs, filepaths))
|
||||||
self.putChild("preview_url", PreviewUrlResource(hs, filepaths))
|
|
||||||
except Exception as e:
|
|
||||||
logger.warn("Failed to mount preview_url")
|
|
||||||
logger.exception(e)
|
|
||||||
|
|
|
@ -40,33 +40,11 @@ import ujson as json
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
try:
|
|
||||||
from lxml import html
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class PreviewUrlResource(BaseMediaResource):
|
class PreviewUrlResource(BaseMediaResource):
|
||||||
isLeaf = True
|
isLeaf = True
|
||||||
|
|
||||||
def __init__(self, hs, filepaths):
|
def __init__(self, hs, filepaths):
|
||||||
try:
|
|
||||||
if html:
|
|
||||||
pass
|
|
||||||
except:
|
|
||||||
raise RuntimeError("Disabling PreviewUrlResource as lxml not available")
|
|
||||||
|
|
||||||
if not hasattr(hs.config, "url_preview_ip_range_blacklist"):
|
|
||||||
logger.warn(
|
|
||||||
"For security, you must specify an explicit target IP address "
|
|
||||||
"blacklist in url_preview_ip_range_blacklist for url previewing "
|
|
||||||
"to work"
|
|
||||||
)
|
|
||||||
raise RuntimeError(
|
|
||||||
"Disabling PreviewUrlResource as "
|
|
||||||
"url_preview_ip_range_blacklist not specified"
|
|
||||||
)
|
|
||||||
|
|
||||||
BaseMediaResource.__init__(self, hs, filepaths)
|
BaseMediaResource.__init__(self, hs, filepaths)
|
||||||
self.client = SpiderHttpClient(hs)
|
self.client = SpiderHttpClient(hs)
|
||||||
if hasattr(hs.config, "url_preview_url_blacklist"):
|
if hasattr(hs.config, "url_preview_url_blacklist"):
|
||||||
|
@ -201,6 +179,8 @@ class PreviewUrlResource(BaseMediaResource):
|
||||||
elif self._is_html(media_info['media_type']):
|
elif self._is_html(media_info['media_type']):
|
||||||
# TODO: somehow stop a big HTML tree from exploding synapse's RAM
|
# TODO: somehow stop a big HTML tree from exploding synapse's RAM
|
||||||
|
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tree = html.parse(media_info['filename'])
|
tree = html.parse(media_info['filename'])
|
||||||
og = yield self._calc_og(tree, media_info, requester)
|
og = yield self._calc_og(tree, media_info, requester)
|
||||||
|
|
Loading…
Reference in a new issue