// mautrix-whatsapp - A Matrix-WhatsApp puppeting bridge. // Copyright (C) 2023 Tulir Asokan // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . package main import ( "fmt" "html" "regexp" "strings" "go.mau.fi/whatsmeow/types" "maunium.net/go/mautrix/event" "maunium.net/go/mautrix/format" "maunium.net/go/mautrix/id" ) var italicRegex = regexp.MustCompile("([\\s>~*]|^)_(.+?)_([^a-zA-Z\\d]|$)") var boldRegex = regexp.MustCompile("([\\s>_~]|^)\\*(.+?)\\*([^a-zA-Z\\d]|$)") var strikethroughRegex = regexp.MustCompile("([\\s>_*]|^)~(.+?)~([^a-zA-Z\\d]|$)") var codeBlockRegex = regexp.MustCompile("```(?:.|\n)+?```") var inlineURLRegex = regexp.MustCompile(`\[(.+?)]\((.+?)\)`) const mentionedJIDsContextKey = "fi.mau.whatsapp.mentioned_jids" const disableMentionsContextKey = "fi.mau.whatsapp.no_mentions" type Formatter struct { bridge *WABridge matrixHTMLParser *format.HTMLParser waReplString map[*regexp.Regexp]string waReplFunc map[*regexp.Regexp]func(string) string waReplFuncText map[*regexp.Regexp]func(string) string } func NewFormatter(bridge *WABridge) *Formatter { formatter := &Formatter{ bridge: bridge, matrixHTMLParser: &format.HTMLParser{ TabsToSpaces: 4, Newline: "\n", PillConverter: func(displayname, mxid, eventID string, ctx format.Context) string { _, disableMentions := ctx.ReturnData[disableMentionsContextKey] if mxid[0] == '@' && !disableMentions { puppet := bridge.GetPuppetByMXID(id.UserID(mxid)) if puppet != nil { jids, ok := ctx.ReturnData[mentionedJIDsContextKey].([]string) if !ok { ctx.ReturnData[mentionedJIDsContextKey] = []string{puppet.JID.String()} } else { ctx.ReturnData[mentionedJIDsContextKey] = append(jids, puppet.JID.String()) } return "@" + puppet.JID.User } } return displayname }, BoldConverter: func(text string, _ format.Context) string { return fmt.Sprintf("*%s*", text) }, ItalicConverter: func(text string, _ format.Context) string { return fmt.Sprintf("_%s_", text) }, StrikethroughConverter: func(text string, _ format.Context) string { return fmt.Sprintf("~%s~", text) }, MonospaceConverter: func(text string, _ format.Context) string { return fmt.Sprintf("```%s```", text) }, MonospaceBlockConverter: func(text, language string, _ format.Context) string { return fmt.Sprintf("```%s```", text) }, }, waReplString: map[*regexp.Regexp]string{ italicRegex: "$1$2$3", boldRegex: "$1$2$3", strikethroughRegex: "$1$2$3", }, } formatter.waReplFunc = map[*regexp.Regexp]func(string) string{ codeBlockRegex: func(str string) string { str = str[3 : len(str)-3] if strings.ContainsRune(str, '\n') { return fmt.Sprintf("
%s
", str) } return fmt.Sprintf("%s", str) }, } formatter.waReplFuncText = map[*regexp.Regexp]func(string) string{} return formatter } func (formatter *Formatter) getMatrixInfoByJID(roomID id.RoomID, jid types.JID) (mxid id.UserID, displayname string) { if puppet := formatter.bridge.GetPuppetByJID(jid); puppet != nil { mxid = puppet.MXID displayname = puppet.Displayname } if user := formatter.bridge.GetUserByJID(jid); user != nil { mxid = user.MXID member := formatter.bridge.StateStore.GetMember(roomID, user.MXID) if len(member.Displayname) > 0 { displayname = member.Displayname } } return } func (formatter *Formatter) ParseWhatsApp(roomID id.RoomID, content *event.MessageEventContent, mentionedJIDs []string, allowInlineURL, forceHTML bool) { output := html.EscapeString(content.Body) for regex, replacement := range formatter.waReplString { output = regex.ReplaceAllString(output, replacement) } for regex, replacer := range formatter.waReplFunc { output = regex.ReplaceAllStringFunc(output, replacer) } if allowInlineURL { output = inlineURLRegex.ReplaceAllStringFunc(output, func(s string) string { groups := inlineURLRegex.FindStringSubmatch(s) return fmt.Sprintf(`%s`, groups[2], groups[1]) }) } alreadyMentioned := make(map[id.UserID]struct{}) content.Mentions = &event.Mentions{} for _, rawJID := range mentionedJIDs { jid, err := types.ParseJID(rawJID) if err != nil { continue } else if jid.Server == types.LegacyUserServer { jid.Server = types.DefaultUserServer } mxid, displayname := formatter.getMatrixInfoByJID(roomID, jid) number := "@" + jid.User output = strings.ReplaceAll(output, number, fmt.Sprintf(`%s`, mxid, displayname)) content.Body = strings.ReplaceAll(content.Body, number, displayname) if _, ok := alreadyMentioned[mxid]; !ok { alreadyMentioned[mxid] = struct{}{} content.Mentions.UserIDs = append(content.Mentions.UserIDs, mxid) } } content.UnstableMentions = content.Mentions if output != content.Body || forceHTML { output = strings.ReplaceAll(output, "\n", "
") content.FormattedBody = output content.Format = event.FormatHTML for regex, replacer := range formatter.waReplFuncText { content.Body = regex.ReplaceAllStringFunc(content.Body, replacer) } } } func (formatter *Formatter) ParseMatrix(html string) (string, []string) { ctx := format.NewContext() result := formatter.matrixHTMLParser.Parse(html, ctx) mentionedJIDs, _ := ctx.ReturnData[mentionedJIDsContextKey].([]string) return result, mentionedJIDs } func (formatter *Formatter) ParseMatrixWithoutMentions(html string) string { ctx := format.NewContext() ctx.ReturnData[disableMentionsContextKey] = true return formatter.matrixHTMLParser.Parse(html, ctx) }