refactor (backend): port nodeinfo fetcher to backend-rs

This commit is contained in:
naskya 2024-05-06 08:12:21 +09:00
parent 711618b42c
commit dd74eabae1
No known key found for this signature in database
GPG key ID: 712D413B3A9FED5C
8 changed files with 308 additions and 105 deletions

View file

@ -1155,8 +1155,106 @@ export interface Webhook {
latestStatus: number | null
}
export function initializeRustLogger(): void
export function fetchNodeinfo(host: string): Promise<Nodeinfo>
export function nodeinfo_2_1(): Promise<any>
export function nodeinfo_2_0(): Promise<any>
/** NodeInfo schema version 2.0. https://nodeinfo.diaspora.software/docson/index.html#/ns/schema/2.0 */
export interface Nodeinfo {
/** The schema version, must be 2.0. */
version: string
/** Metadata about server software in use. */
software: Software20
/** The protocols supported on this server. */
protocols: Array<Protocol>
/** The third party sites this server can connect to via their application API. */
services: Services
/** Whether this server allows open self-registration. */
openRegistrations: boolean
/** Usage statistics for this server. */
usage: Usage
/** Free form key value pairs for software specific values. Clients should not rely on any specific key present. */
metadata: Record<string, any>
}
/** Metadata about server software in use (version 2.0). */
export interface Software20 {
/** The canonical name of this server software. */
name: string
/** The version of this server software. */
version: string
}
export enum Protocol {
Activitypub = 'activitypub',
Buddycloud = 'buddycloud',
Dfrn = 'dfrn',
Diaspora = 'diaspora',
Libertree = 'libertree',
Ostatus = 'ostatus',
Pumpio = 'pumpio',
Tent = 'tent',
Xmpp = 'xmpp',
Zot = 'zot'
}
/** The third party sites this server can connect to via their application API. */
export interface Services {
/** The third party sites this server can retrieve messages from for combined display with regular traffic. */
inbound: Array<Inbound>
/** The third party sites this server can publish messages to on the behalf of a user. */
outbound: Array<Outbound>
}
/** The third party sites this server can retrieve messages from for combined display with regular traffic. */
export enum Inbound {
Atom1 = 'atom1',
Gnusocial = 'gnusocial',
Imap = 'imap',
Pnut = 'pnut',
Pop3 = 'pop3',
Pumpio = 'pumpio',
Rss2 = 'rss2',
Twitter = 'twitter'
}
/** The third party sites this server can publish messages to on the behalf of a user. */
export enum Outbound {
Atom1 = 'atom1',
Blogger = 'blogger',
Buddycloud = 'buddycloud',
Diaspora = 'diaspora',
Dreamwidth = 'dreamwidth',
Drupal = 'drupal',
Facebook = 'facebook',
Friendica = 'friendica',
Gnusocial = 'gnusocial',
Google = 'google',
Insanejournal = 'insanejournal',
Libertree = 'libertree',
Linkedin = 'linkedin',
Livejournal = 'livejournal',
Mediagoblin = 'mediagoblin',
Myspace = 'myspace',
Pinterest = 'pinterest',
Pnut = 'pnut',
Posterous = 'posterous',
Pumpio = 'pumpio',
Redmatrix = 'redmatrix',
Rss2 = 'rss2',
Smtp = 'smtp',
Tent = 'tent',
Tumblr = 'tumblr',
Twitter = 'twitter',
Wordpress = 'wordpress',
Xmpp = 'xmpp'
}
/** Usage statistics for this server. */
export interface Usage {
users: Users
localPosts: number | null
localComments: number | null
}
/** statistics about the users of this server. */
export interface Users {
total: number | null
activeHalfyear: number | null
activeMonth: number | null
}
export function watchNote(watcherId: string, noteAuthorId: string, noteId: string): Promise<void>
export function unwatchNote(watcherId: string, noteId: string): Promise<void>
export function publishToChannelStream(channelId: string, userId: string): void

View file

@ -310,7 +310,7 @@ if (!nativeBinding) {
throw new Error(`Failed to load native binding`)
}
const { SECOND, MINUTE, HOUR, DAY, USER_ONLINE_THRESHOLD, USER_ACTIVE_THRESHOLD, FILE_TYPE_BROWSERSAFE, loadEnv, loadConfig, stringToAcct, acctToString, addNoteToAntenna, isBlockedServer, isSilencedServer, isAllowedServer, checkWordMute, getFullApAccount, isSelfHost, isSameOrigin, extractHost, toPuny, isUnicodeEmoji, sqlLikeEscape, safeForSql, formatMilliseconds, getImageSizeFromUrl, getNoteSummary, latestVersion, toMastodonId, fromMastodonId, fetchMeta, metaToPugArgs, nyaify, hashPassword, verifyPassword, isOldPasswordAlgorithm, decodeReaction, countReactions, toDbReaction, removeOldAttestationChallenges, AntennaSrcEnum, DriveFileUsageHintEnum, MutedNoteReasonEnum, NoteVisibilityEnum, NotificationTypeEnum, PageVisibilityEnum, PollNotevisibilityEnum, RelayStatusEnum, UserEmojimodpermEnum, UserProfileFfvisibilityEnum, UserProfileMutingnotificationtypesEnum, initializeRustLogger, nodeinfo_2_1, nodeinfo_2_0, watchNote, unwatchNote, publishToChannelStream, ChatEvent, publishToChatStream, ChatIndexEvent, publishToChatIndexStream, publishToBroadcastStream, publishToGroupChatStream, publishToModerationStream, getTimestamp, genId, genIdAt, secureRndstr } = nativeBinding
const { SECOND, MINUTE, HOUR, DAY, USER_ONLINE_THRESHOLD, USER_ACTIVE_THRESHOLD, FILE_TYPE_BROWSERSAFE, loadEnv, loadConfig, stringToAcct, acctToString, addNoteToAntenna, isBlockedServer, isSilencedServer, isAllowedServer, checkWordMute, getFullApAccount, isSelfHost, isSameOrigin, extractHost, toPuny, isUnicodeEmoji, sqlLikeEscape, safeForSql, formatMilliseconds, getImageSizeFromUrl, getNoteSummary, latestVersion, toMastodonId, fromMastodonId, fetchMeta, metaToPugArgs, nyaify, hashPassword, verifyPassword, isOldPasswordAlgorithm, decodeReaction, countReactions, toDbReaction, removeOldAttestationChallenges, AntennaSrcEnum, DriveFileUsageHintEnum, MutedNoteReasonEnum, NoteVisibilityEnum, NotificationTypeEnum, PageVisibilityEnum, PollNotevisibilityEnum, RelayStatusEnum, UserEmojimodpermEnum, UserProfileFfvisibilityEnum, UserProfileMutingnotificationtypesEnum, initializeRustLogger, fetchNodeinfo, nodeinfo_2_1, nodeinfo_2_0, Protocol, Inbound, Outbound, watchNote, unwatchNote, publishToChannelStream, ChatEvent, publishToChatStream, ChatIndexEvent, publishToChatIndexStream, publishToBroadcastStream, publishToGroupChatStream, publishToModerationStream, getTimestamp, genId, genIdAt, secureRndstr } = nativeBinding
module.exports.SECOND = SECOND
module.exports.MINUTE = MINUTE
@ -364,8 +364,12 @@ module.exports.UserEmojimodpermEnum = UserEmojimodpermEnum
module.exports.UserProfileFfvisibilityEnum = UserProfileFfvisibilityEnum
module.exports.UserProfileMutingnotificationtypesEnum = UserProfileMutingnotificationtypesEnum
module.exports.initializeRustLogger = initializeRustLogger
module.exports.fetchNodeinfo = fetchNodeinfo
module.exports.nodeinfo_2_1 = nodeinfo_2_1
module.exports.nodeinfo_2_0 = nodeinfo_2_0
module.exports.Protocol = Protocol
module.exports.Inbound = Inbound
module.exports.Outbound = Outbound
module.exports.watchNote = watchNote
module.exports.unwatchNote = unwatchNote
module.exports.publishToChannelStream = publishToChannelStream

View file

@ -0,0 +1,161 @@
use crate::service::nodeinfo::schema::*;
use crate::util::http_client;
use isahc::AsyncReadResponseExt;
use serde::{Deserialize, Serialize};
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Http client aquisition error: {0}")]
HttpClientErr(#[from] http_client::Error),
#[error("Http error: {0}")]
HttpErr(#[from] isahc::Error),
#[error("Bad status: {0}")]
BadStatus(String),
#[error("Failed to parse response body as text: {0}")]
ResponseErr(#[from] std::io::Error),
#[error("Failed to parse response body as json: {0}")]
JsonErr(#[from] serde_json::Error),
#[error("No nodeinfo provided")]
MissingNodeinfo,
}
#[derive(Deserialize, Serialize, Debug)]
pub struct NodeinfoLinks {
links: Vec<NodeinfoLink>,
}
#[derive(Deserialize, Serialize, Debug)]
pub struct NodeinfoLink {
rel: String,
href: String,
}
#[inline]
fn wellknown_nodeinfo_url(host: &str) -> String {
format!("https://{}/.well-known/nodeinfo", host)
}
async fn fetch_nodeinfo_links(host: &str) -> Result<NodeinfoLinks, Error> {
let client = http_client::client()?;
let wellknown_url = wellknown_nodeinfo_url(host);
let mut wellknown_response = client.get_async(&wellknown_url).await?;
if !wellknown_response.status().is_success() {
tracing::debug!("{:#?}", wellknown_response.body());
return Err(Error::BadStatus(format!(
"{} returned {}",
wellknown_url,
wellknown_response.status()
)));
}
Ok(serde_json::from_str(&wellknown_response.text().await?)?)
}
fn check_nodeinfo_link(links: NodeinfoLinks) -> Result<String, Error> {
for link in links.links {
if link.rel == "http://nodeinfo.diaspora.software/ns/schema/2.1"
|| link.rel == "http://nodeinfo.diaspora.software/ns/schema/2.0"
{
return Ok(link.href);
}
}
Err(Error::MissingNodeinfo)
}
async fn fetch_nodeinfo_impl(nodeinfo_link: &str) -> Result<Nodeinfo20, Error> {
let client = http_client::client()?;
let mut response = client.get_async(nodeinfo_link).await?;
if !response.status().is_success() {
tracing::debug!("{:#?}", response.body());
return Err(Error::BadStatus(format!(
"{} returned {}",
nodeinfo_link,
response.status()
)));
}
Ok(serde_json::from_str(&response.text().await?)?)
}
// for napi export
type Nodeinfo = Nodeinfo20;
#[crate::export]
pub async fn fetch_nodeinfo(host: &str) -> Result<Nodeinfo, Error> {
tracing::info!("fetching from {}", host);
let links = fetch_nodeinfo_links(host).await?;
let nodeinfo_link = check_nodeinfo_link(links)?;
fetch_nodeinfo_impl(&nodeinfo_link).await
}
#[cfg(test)]
mod unit_test {
use super::{check_nodeinfo_link, fetch_nodeinfo, NodeinfoLink, NodeinfoLinks};
use pretty_assertions::assert_eq;
#[test]
fn test_check_nodeinfo_link() {
let links_1 = NodeinfoLinks {
links: vec![
NodeinfoLink {
rel: "https://example.com/incorrect/schema/2.0".to_string(),
href: "https://example.com/dummy".to_string(),
},
NodeinfoLink {
rel: "http://nodeinfo.diaspora.software/ns/schema/2.0".to_string(),
href: "https://example.com/real".to_string(),
},
],
};
assert_eq!(
check_nodeinfo_link(links_1).unwrap(),
"https://example.com/real"
);
let links_2 = NodeinfoLinks {
links: vec![
NodeinfoLink {
rel: "https://example.com/incorrect/schema/2.0".to_string(),
href: "https://example.com/dummy".to_string(),
},
NodeinfoLink {
rel: "http://nodeinfo.diaspora.software/ns/schema/2.1".to_string(),
href: "https://example.com/real".to_string(),
},
],
};
assert_eq!(
check_nodeinfo_link(links_2).unwrap(),
"https://example.com/real"
);
let links_3 = NodeinfoLinks {
links: vec![
NodeinfoLink {
rel: "https://example.com/incorrect/schema/2.0".to_string(),
href: "https://example.com/dummy/2.0".to_string(),
},
NodeinfoLink {
rel: "https://example.com/incorrect/schema/2.1".to_string(),
href: "https://example.com/dummy/2.1".to_string(),
},
],
};
check_nodeinfo_link(links_3).expect_err("No nodeinfo");
}
#[tokio::test]
async fn test_fetch_nodeinfo() {
assert_eq!(
fetch_nodeinfo("info.firefish.dev")
.await
.unwrap()
.software
.name,
"firefish"
);
}
}

View file

@ -48,7 +48,7 @@ async fn statistics() -> Result<(u64, u64, u64, u64), DbErr> {
)
}
async fn get_new_nodeinfo_2_1() -> Result<Nodeinfo21, Error> {
async fn generate_nodeinfo_2_1() -> Result<Nodeinfo21, Error> {
let (local_users, local_active_halfyear, local_active_month, local_posts) =
statistics().await?;
let meta = fetch_meta(true).await?;
@ -75,8 +75,10 @@ async fn get_new_nodeinfo_2_1() -> Result<Nodeinfo21, Error> {
"enableGuestTimeline".to_string(),
json!(meta.enable_guest_timeline),
),
("maintainerName".to_string(), json!(meta.maintainer_name)),
("maintainerEmail".to_string(), json!(meta.maintainer_email)),
(
"maintainer".to_string(),
json!({"name":meta.maintainer_name,"email":meta.maintainer_email}),
),
("proxyAccountName".to_string(), json!(meta.proxy_account_id)),
(
"themeColor".to_string(),
@ -100,11 +102,11 @@ async fn get_new_nodeinfo_2_1() -> Result<Nodeinfo21, Error> {
open_registrations: !meta.disable_registration,
usage: Usage {
users: Users {
total: Some(local_users),
active_halfyear: Some(local_active_halfyear),
active_month: Some(local_active_month),
total: Some(local_users as u32),
active_halfyear: Some(local_active_halfyear as u32),
active_month: Some(local_active_month as u32),
},
local_posts: Some(local_posts),
local_posts: Some(local_posts as u32),
local_comments: None,
},
metadata,
@ -119,7 +121,7 @@ pub async fn nodeinfo_2_1() -> Result<Nodeinfo21, Error> {
if let Some(nodeinfo) = cached {
Ok(nodeinfo)
} else {
let nodeinfo = get_new_nodeinfo_2_1().await?;
let nodeinfo = generate_nodeinfo_2_1().await?;
cache::set(NODEINFO_2_1_CACHE_KEY, &nodeinfo, 60 * 60)?;
Ok(nodeinfo)
}

View file

@ -1,2 +1,3 @@
pub mod fetch;
pub mod generate;
pub mod schema;

View file

@ -28,6 +28,7 @@ pub struct Nodeinfo21 {
/// NodeInfo schema version 2.0. https://nodeinfo.diaspora.software/docson/index.html#/ns/schema/2.0
#[derive(Deserialize, Serialize, Debug, PartialEq)]
#[serde(rename_all = "camelCase")]
#[crate::export(object, js_name = "Nodeinfo")]
pub struct Nodeinfo20 {
/// The schema version, must be 2.0.
pub version: String,
@ -62,6 +63,7 @@ pub struct Software21 {
/// Metadata about server software in use (version 2.0).
#[derive(Deserialize, Serialize, Debug, PartialEq)]
#[serde(rename_all = "camelCase")]
#[crate::export(object)]
pub struct Software20 {
/// The canonical name of this server software.
pub name: String,
@ -71,6 +73,7 @@ pub struct Software20 {
#[derive(Deserialize, Serialize, Debug, PartialEq)]
#[serde(rename_all = "lowercase")]
#[crate::export(string_enum = "lowercase")]
pub enum Protocol {
Activitypub,
Buddycloud,
@ -87,6 +90,7 @@ pub enum Protocol {
/// The third party sites this server can connect to via their application API.
#[derive(Deserialize, Serialize, Debug, PartialEq)]
#[serde(rename_all = "camelCase")]
#[crate::export(object)]
pub struct Services {
/// The third party sites this server can retrieve messages from for combined display with regular traffic.
pub inbound: Vec<Inbound>,
@ -97,6 +101,7 @@ pub struct Services {
/// The third party sites this server can retrieve messages from for combined display with regular traffic.
#[derive(Deserialize, Serialize, Debug, PartialEq)]
#[serde(rename_all = "lowercase")]
#[crate::export(string_enum = "lowercase")]
pub enum Inbound {
#[serde(rename = "atom1.0")]
Atom1,
@ -114,6 +119,7 @@ pub enum Inbound {
/// The third party sites this server can publish messages to on the behalf of a user.
#[derive(Deserialize, Serialize, Debug, PartialEq)]
#[serde(rename_all = "lowercase")]
#[crate::export(string_enum = "lowercase")]
pub enum Outbound {
#[serde(rename = "atom1.0")]
Atom1,
@ -150,19 +156,21 @@ pub enum Outbound {
/// Usage statistics for this server.
#[derive(Deserialize, Serialize, Debug, PartialEq)]
#[serde(rename_all = "camelCase")]
#[crate::export(object)]
pub struct Usage {
pub users: Users,
pub local_posts: Option<u64>,
pub local_comments: Option<u64>,
pub local_posts: Option<u32>,
pub local_comments: Option<u32>,
}
/// statistics about the users of this server.
#[derive(Deserialize, Serialize, Debug, PartialEq)]
#[serde(rename_all = "camelCase")]
#[crate::export(object)]
pub struct Users {
pub total: Option<u64>,
pub active_halfyear: Option<u64>,
pub active_month: Option<u64>,
pub total: Option<u32>,
pub active_halfyear: Option<u32>,
pub active_month: Option<u32>,
}
impl From<Software21> for Software20 {
@ -243,7 +251,7 @@ mod unit_test {
assert_eq!(parsed_2.software.name, "meisskey");
assert_eq!(parsed_2.software.version, "10.102.699-m544");
let json_str_3 = r##"{"metadata":{"enableGlobalTimeline":true,"enableGuestTimeline":false,"enableLocalTimeline":true,"enableRecommendedTimeline":false,"maintainerEmail":"","maintainerName":"Firefish dev team","nodeDescription":"","nodeName":"Firefish","repositoryUrl":"https://firefish.dev/firefish/firefish","themeColor":"#F25A85"},"openRegistrations":false,"protocols":["activitypub"],"services":{"inbound":[],"outbound":["atom1.0","rss2.0"]},"software":{"homepage":"https://firefish.dev/firefish/firefish","name":"firefish","repository":"https://firefish.dev/firefish/firefish","version":"20240504"},"usage":{"localPosts":23857,"users":{"activeHalfyear":7,"activeMonth":7,"total":9}},"version":"2.1"}"##;
let json_str_3 = r##"{"metadata":{"enableGlobalTimeline":true,"enableGuestTimeline":false,"enableLocalTimeline":true,"enableRecommendedTimeline":false,"maintainer":{"name":"Firefish dev team"},"nodeDescription":"","nodeName":"Firefish","repositoryUrl":"https://firefish.dev/firefish/firefish","themeColor":"#F25A85"},"openRegistrations":false,"protocols":["activitypub"],"services":{"inbound":[],"outbound":["atom1.0","rss2.0"]},"software":{"homepage":"https://firefish.dev/firefish/firefish","name":"firefish","repository":"https://firefish.dev/firefish/firefish","version":"20240504"},"usage":{"localPosts":23857,"users":{"activeHalfyear":7,"activeMonth":7,"total":9}},"version":"2.1"}"##;
let parsed_3: Nodeinfo20 = serde_json::from_str(json_str_3).unwrap();
let serialized_3 = serde_json::to_string(&parsed_3).unwrap();
let reparsed_3: Nodeinfo20 = serde_json::from_str(&serialized_3).unwrap();

View file

@ -18,6 +18,7 @@ pub fn client() -> Result<HttpClient, Error> {
.get_or_try_init(|| {
let mut builder = HttpClient::builder()
.timeout(Duration::from_secs(10))
.default_header("user-agent", &CONFIG.user_agent)
.dns_cache(DnsCache::Timeout(Duration::from_secs(60 * 60)));
if let Some(proxy_url) = &CONFIG.proxy {

View file

@ -10,6 +10,7 @@ import {
import { Instances } from "@/models/index.js";
import { getFetchInstanceMetadataLock } from "@/misc/app-lock.js";
import Logger from "@/services/logger.js";
import { type Nodeinfo, fetchNodeinfo } from "backend-rs";
import { inspect } from "node:util";
const logger = new Logger("metadata", "cyan");
@ -36,7 +37,7 @@ export async function fetchInstanceMetadata(
try {
const [info, dom, manifest] = await Promise.all([
fetchNodeinfo(instance).catch(() => null),
fetchNodeinfo(instance.host).catch(() => null),
fetchDom(instance).catch(() => null),
fetchManifest(instance).catch(() => null),
]);
@ -57,30 +58,26 @@ export async function fetchInstanceMetadata(
if (info) {
updates.softwareName =
info.software?.name
?.toLowerCase()
info.software.name
.toLowerCase()
.substring(0, MAX_LENGTH_INSTANCE.softwareName) || null;
updates.softwareVersion =
info.software?.version?.substring(
info.software.version.substring(
0,
MAX_LENGTH_INSTANCE.softwareVersion,
) || null;
updates.openRegistrations = info.openRegistrations;
updates.maintainerName = info.metadata
? info.metadata.maintainer
? info.metadata.maintainer.name?.substring(
0,
MAX_LENGTH_INSTANCE.maintainerName,
) || null
: null
updates.maintainerName = info.metadata.maintainer
? info.metadata.maintainer.name?.substring(
0,
MAX_LENGTH_INSTANCE.maintainerName,
) || null
: null;
updates.maintainerEmail = info.metadata
? info.metadata.maintainer
? info.metadata.maintainer.email?.substring(
0,
MAX_LENGTH_INSTANCE.maintainerEmail,
) || null
: null
updates.maintainerEmail = info.metadata.maintainer
? info.metadata.maintainer.email?.substring(
0,
MAX_LENGTH_INSTANCE.maintainerEmail,
) || null
: null;
}
@ -115,75 +112,6 @@ export async function fetchInstanceMetadata(
}
}
type NodeInfo = {
openRegistrations?: boolean;
software?: {
name?: string;
version?: string;
};
metadata?: {
name?: string;
nodeName?: string;
nodeDescription?: string;
description?: string;
maintainer?: {
name?: string;
email?: string;
};
};
};
async function fetchNodeinfo(instance: Instance): Promise<NodeInfo> {
logger.info(`Fetching nodeinfo of ${instance.host} ...`);
try {
const wellknown = (await getJson(
`https://${instance.host}/.well-known/nodeinfo`,
).catch((e) => {
if (e.statusCode === 404) {
throw new Error("No nodeinfo provided");
} else {
throw new Error(inspect(e));
}
})) as Record<string, unknown>;
if (wellknown.links == null || !Array.isArray(wellknown.links)) {
throw new Error("No wellknown links");
}
const links = wellknown.links as any[];
const lnik1_0 = links.find(
(link) => link.rel === "http://nodeinfo.diaspora.software/ns/schema/1.0",
);
const lnik2_0 = links.find(
(link) => link.rel === "http://nodeinfo.diaspora.software/ns/schema/2.0",
);
const lnik2_1 = links.find(
(link) => link.rel === "http://nodeinfo.diaspora.software/ns/schema/2.1",
);
const link = lnik2_1 || lnik2_0 || lnik1_0;
if (link == null) {
throw new Error("No nodeinfo link provided");
}
const info = await getJson(link.href).catch((e) => {
throw new Error(inspect(e));
});
logger.info(`Successfuly fetched nodeinfo of ${instance.host}`);
return info as NodeInfo;
} catch (e) {
logger.error(
`Failed to fetch nodeinfo of ${instance.host}:\n${inspect(e)}`,
);
throw e;
}
}
async function fetchDom(instance: Instance): Promise<Window["document"]> {
logger.info(`Fetching HTML of ${instance.host} ...`);
@ -272,7 +200,7 @@ async function fetchIconUrl(
}
async function getThemeColor(
info: NodeInfo | null,
info: Nodeinfo | null,
doc: Window["document"] | null,
manifest: Record<string, any> | null,
): Promise<string | null> {
@ -290,7 +218,7 @@ async function getThemeColor(
}
async function getSiteName(
info: NodeInfo | null,
info: Nodeinfo | null,
doc: Window["document"] | null,
manifest: Record<string, any> | null,
): Promise<string | undefined | null> {
@ -318,7 +246,7 @@ async function getSiteName(
}
async function getDescription(
info: NodeInfo | null,
info: Nodeinfo | null,
doc: Window["document"] | null,
manifest: Record<string, any> | null,
): Promise<string | null> {