Skip to content

Commit

Permalink
Merge pull request #8858 from neondatabase/releases/2024-08-28-comput…
Browse files Browse the repository at this point in the history
…e-only

Compute release 2024-08-28
  • Loading branch information
lubennikovaav committed Aug 28, 2024
2 parents 4355dba + 7820c57 commit aa8c5d1
Show file tree
Hide file tree
Showing 46 changed files with 1,140 additions and 502 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ md5 = "0.7.0"
measured = { version = "0.0.22", features=["lasso"] }
measured-process = { version = "0.0.22" }
memoffset = "0.8"
nix = { version = "0.27", features = ["fs", "process", "socket", "signal", "poll"] }
nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
notify = "6.0.0"
num_cpus = "1.15"
num-traits = "0.2.15"
Expand Down
9 changes: 9 additions & 0 deletions libs/pageserver_api/src/key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,15 @@ impl Key {
field5: u8::MAX,
field6: u32::MAX,
};
/// A key slightly smaller than [`Key::MAX`] for use in layer key ranges to avoid them to be confused with L0 layers
pub const NON_L0_MAX: Key = Key {
field1: u8::MAX,
field2: u32::MAX,
field3: u32::MAX,
field4: u32::MAX,
field5: u8::MAX,
field6: u32::MAX - 1,
};

pub fn from_hex(s: &str) -> Result<Self> {
if s.len() != 36 {
Expand Down
1 change: 1 addition & 0 deletions libs/pageserver_api/src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,7 @@ pub struct TimelineInfo {
pub pg_version: u32,

pub state: TimelineState,
pub is_archived: bool,

pub walreceiver_status: String,

Expand Down
52 changes: 49 additions & 3 deletions pageserver/src/bin/pageserver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,56 @@ fn main() -> anyhow::Result<()> {
info!(?conf.virtual_file_direct_io, "starting with virtual_file Direct IO settings");
info!(?conf.compact_level0_phase1_value_access, "starting with setting for compact_level0_phase1_value_access");

// The tenants directory contains all the pageserver local disk state.
// Create if not exists and make sure all the contents are durable before proceeding.
// Ensuring durability eliminates a whole bug class where we come up after an unclean shutdown.
// After unclea shutdown, we don't know if all the filesystem content we can read via syscalls is actually durable or not.
// Examples for that: OOM kill, systemd killing us during shutdown, self abort due to unrecoverable IO error.
let tenants_path = conf.tenants_path();
if !tenants_path.exists() {
utils::crashsafe::create_dir_all(conf.tenants_path())
.with_context(|| format!("Failed to create tenants root dir at '{tenants_path}'"))?;
{
let open = || {
nix::dir::Dir::open(
tenants_path.as_std_path(),
nix::fcntl::OFlag::O_DIRECTORY | nix::fcntl::OFlag::O_RDONLY,
nix::sys::stat::Mode::empty(),
)
};
let dirfd = match open() {
Ok(dirfd) => dirfd,
Err(e) => match e {
nix::errno::Errno::ENOENT => {
utils::crashsafe::create_dir_all(&tenants_path).with_context(|| {
format!("Failed to create tenants root dir at '{tenants_path}'")
})?;
open().context("open tenants dir after creating it")?
}
e => anyhow::bail!(e),
},
};

let started = Instant::now();
// Linux guarantees durability for syncfs.
// POSIX doesn't have syncfs, and further does not actually guarantee durability of sync().
#[cfg(target_os = "linux")]
{
use std::os::fd::AsRawFd;
nix::unistd::syncfs(dirfd.as_raw_fd()).context("syncfs")?;
}
#[cfg(target_os = "macos")]
{
// macOS is not a production platform for Neon, don't even bother.
drop(dirfd);
}
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
{
compile_error!("Unsupported OS");
}

let elapsed = started.elapsed();
info!(
elapsed_ms = elapsed.as_millis(),
"made tenant directory contents durable"
);
}

// Initialize up failpoints support
Expand Down
25 changes: 22 additions & 3 deletions pageserver/src/http/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,24 @@ impl From<crate::tenant::DeleteTimelineError> for ApiError {
}
}

impl From<crate::tenant::TimelineArchivalError> for ApiError {
fn from(value: crate::tenant::TimelineArchivalError) -> Self {
use crate::tenant::TimelineArchivalError::*;
match value {
NotFound => ApiError::NotFound(anyhow::anyhow!("timeline not found").into()),
Timeout => ApiError::Timeout("hit pageserver internal timeout".into()),
HasUnarchivedChildren(children) => ApiError::PreconditionFailed(
format!(
"Cannot archive timeline which has non-archived child timelines: {children:?}"
)
.into_boxed_str(),
),
a @ AlreadyInProgress => ApiError::Conflict(a.to_string()),
Other(e) => ApiError::InternalServerError(e),
}
}
}

impl From<crate::tenant::mgr::DeleteTimelineError> for ApiError {
fn from(value: crate::tenant::mgr::DeleteTimelineError) -> Self {
use crate::tenant::mgr::DeleteTimelineError::*;
Expand Down Expand Up @@ -405,6 +423,8 @@ async fn build_timeline_info_common(
let current_logical_size = timeline.get_current_logical_size(logical_size_task_priority, ctx);
let current_physical_size = Some(timeline.layer_size_sum().await);
let state = timeline.current_state();
// Report is_archived = false if the timeline is still loading
let is_archived = timeline.is_archived().unwrap_or(false);
let remote_consistent_lsn_projected = timeline
.get_remote_consistent_lsn_projected()
.unwrap_or(Lsn(0));
Expand Down Expand Up @@ -445,6 +465,7 @@ async fn build_timeline_info_common(
pg_version: timeline.pg_version,

state,
is_archived,

walreceiver_status,

Expand Down Expand Up @@ -686,9 +707,7 @@ async fn timeline_archival_config_handler(

tenant
.apply_timeline_archival_config(timeline_id, request_data.state)
.await
.context("applying archival config")
.map_err(ApiError::InternalServerError)?;
.await?;
Ok::<_, ApiError>(())
}
.instrument(info_span!("timeline_archival_config",
Expand Down
Loading

1 comment on commit aa8c5d1

@github-actions
Copy link

@github-actions github-actions bot commented on aa8c5d1 Aug 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3260 tests run: 3176 passed, 0 failed, 84 skipped (full report)


Flaky tests (3)

Postgres 15

Postgres 14

Test coverage report is not available

The comment gets automatically updated with the latest test results
aa8c5d1 at 2024-08-29T12:28:12.450Z :recycle:

Please sign in to comment.