/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.regionserver.wal;

import static org.junit.Assert.assertThrows;
import static org.junit.Assert.assertTrue;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.NavigableMap;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.log.HBaseMarkers;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
import org.apache.hadoop.hbase.wal.WAL;
import org.apache.hadoop.hbase.wal.WALEdit;
import org.apache.hadoop.hbase.wal.WALFactory;
import org.apache.hadoop.hbase.wal.WALKeyImpl;
import org.apache.hadoop.hbase.wal.WALProvider;
import org.apache.hadoop.hbase.wal.WALSplitter;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.ipc.RemoteException;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameter;
import org.junit.runners.Parameterized.Parameters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.base.Throwables;

/**
 * Tests for conditions that should trigger RegionServer aborts when rolling the current WAL fails.
 */
@RunWith(Parameterized.class)
@Category({ RegionServerTests.class, MediumTests.class })
public class TestLogRollAbort {

  @ClassRule
  public static final HBaseClassTestRule CLASS_RULE =
    HBaseClassTestRule.forClass(TestLogRollAbort.class);

  private static final Logger LOG = LoggerFactory.getLogger(AbstractTestLogRolling.class);
  private static MiniDFSCluster dfsCluster;
  private static Admin admin;
  private static SingleProcessHBaseCluster cluster;
  protected final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();

  /* For the split-then-roll test */
  private static final Path HBASEDIR = new Path("/hbase");
  private static final Path HBASELOGDIR = new Path("/hbaselog");
  private static final Path OLDLOGDIR = new Path(HBASELOGDIR, HConstants.HREGION_OLDLOGDIR_NAME);

  // Need to override this setup so we can edit the config before it gets sent
  // to the HDFS & HBase cluster startup.
  @BeforeClass
  public static void setUpBeforeClass() throws Exception {
    // Tweak default timeout values down for faster recovery
    TEST_UTIL.getConfiguration().setInt("hbase.regionserver.logroll.errors.tolerated", 2);
    TEST_UTIL.getConfiguration().setInt("hbase.rpc.timeout", 10 * 1000);

    // Increase the amount of time between client retries
    TEST_UTIL.getConfiguration().setLong("hbase.client.pause", 5 * 1000);

    // lower the namenode & datanode heartbeat so the namenode
    // quickly detects datanode failures
    TEST_UTIL.getConfiguration().setInt("dfs.namenode.heartbeat.recheck-interval", 5000);
    TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1);
    // the namenode might still try to choose the recently-dead datanode
    // for a pipeline, so try to a new pipeline multiple times
    TEST_UTIL.getConfiguration().setInt("dfs.client.block.write.retries", 10);
    TEST_UTIL.getConfiguration().set(WALFactory.WAL_PROVIDER, "asyncfs");
  }

  @Parameters(name = "{index}: walProvider={0}")
  public static List<Object[]> params() {
    return Arrays.asList(new Object[] { "filesystem" }, new Object[] { "asyncfs" });
  }

  private Configuration conf;
  private FileSystem fs;

  @Parameter
  public String walProvider;

  @Before
  public void setUp() throws Exception {
    TEST_UTIL.getConfiguration().set(WALFactory.WAL_PROVIDER, walProvider);
    TEST_UTIL.startMiniCluster(2);

    cluster = TEST_UTIL.getHBaseCluster();
    dfsCluster = TEST_UTIL.getDFSCluster();
    admin = TEST_UTIL.getAdmin();
    conf = TEST_UTIL.getConfiguration();
    fs = TEST_UTIL.getDFSCluster().getFileSystem();

    // disable region rebalancing (interferes with log watching)
    cluster.getMaster().balanceSwitch(false);
    CommonFSUtils.setRootDir(conf, HBASEDIR);
    CommonFSUtils.setWALRootDir(conf, HBASELOGDIR);
  }

  @After
  public void tearDown() throws Exception {
    TEST_UTIL.shutdownMiniCluster();
  }

  /**
   * Tests that RegionServer aborts if we hit an error closing the WAL when there are unsynced WAL
   * edits. See HBASE-4282.
   */
  @Test
  public void testRSAbortWithUnflushedEdits() throws Exception {
    LOG.info("Starting testRSAbortWithUnflushedEdits()");

    // When the hbase:meta table can be opened, the region servers are running
    TEST_UTIL.getConnection().getTable(TableName.META_TABLE_NAME).close();

    // Create the test table and open it
    TableName tableName = TableName.valueOf(this.getClass().getSimpleName());
    TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();

    admin.createTable(desc);
    Table table = TEST_UTIL.getConnection().getTable(tableName);
    try {
      HRegionServer server = TEST_UTIL.getRSForFirstRegionInTable(tableName);
      WAL log = server.getWAL(null);

      Put p = new Put(Bytes.toBytes("row2001"));
      p.addColumn(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2001));
      table.put(p);

      log.sync();

      p = new Put(Bytes.toBytes("row2002"));
      p.addColumn(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2002));
      table.put(p);

      dfsCluster.restartDataNodes();
      LOG.info("Restarted datanodes");

      try {
        log.rollWriter(true);
      } catch (FailedLogCloseException flce) {
        // Expected exception. We used to expect that there would be unsynced appends but this
        // not reliable now that sync plays a roll in wall rolling. The above puts also now call
        // sync.
      } catch (Throwable t) {
        LOG.error(HBaseMarkers.FATAL, "FAILED TEST: Got wrong exception", t);
      }
    } finally {
      table.close();
    }
  }

  /**
   * Tests the case where a RegionServer enters a GC pause, comes back online after the master
   * declared it dead and started to split. Want log rolling after a master split to fail. See
   * HBASE-2312.
   */
  @Test
  public void testLogRollAfterSplitStart() throws IOException {
    LOG.info("Verify wal roll after split starts will fail.");
    String logName =
      ServerName.valueOf("testLogRollAfterSplitStart", 16010, EnvironmentEdgeManager.currentTime())
        .toString();
    Path thisTestsDir = new Path(HBASELOGDIR, AbstractFSWALProvider.getWALDirectoryName(logName));
    final WALFactory wals = new WALFactory(conf, logName);

    try {
      // put some entries in an WAL
      TableName tableName = TableName.valueOf(this.getClass().getName());
      RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tableName).build();
      WAL log = wals.getWAL(regionInfo);
      MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl(1);

      int total = 20;
      for (int i = 0; i < total; i++) {
        WALEdit kvs = new WALEdit();
        kvs.add(new KeyValue(Bytes.toBytes(i), tableName.getName(), tableName.getName()));
        NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
        scopes.put(Bytes.toBytes("column"), 0);
        log.appendData(regionInfo, new WALKeyImpl(regionInfo.getEncodedNameAsBytes(), tableName,
          EnvironmentEdgeManager.currentTime(), mvcc, scopes), kvs);
      }
      // Send the data to HDFS datanodes and close the HDFS writer
      log.sync();
      closeWriter((AbstractFSWAL<?>) log);

      // code taken from MasterFileSystem.getLogDirs(), which is called from
      // MasterFileSystem.splitLog() handles RS shutdowns (as observed by the splitting process)
      // rename the directory so a rogue RS doesn't create more WALs
      Path rsSplitDir = thisTestsDir.suffix(AbstractFSWALProvider.SPLITTING_EXT);
      if (!fs.rename(thisTestsDir, rsSplitDir)) {
        throw new IOException("Failed fs.rename for log split: " + thisTestsDir);
      }
      LOG.debug("Renamed region directory: " + rsSplitDir);

      LOG.debug("Processing the old log files.");
      WALSplitter.split(HBASELOGDIR, rsSplitDir, OLDLOGDIR, fs, conf, wals);

      LOG.debug("Trying to roll the WAL.");
      IOException error = assertThrows(IOException.class, () -> log.rollWriter());
      if (error instanceof RemoteException) {
        error = ((RemoteException) error).unwrapRemoteException();
      }
      assertTrue("unexpected error: " + Throwables.getStackTraceAsString(error),
        error instanceof FileNotFoundException
          || error.getCause() instanceof FileNotFoundException);
    } finally {
      wals.close();
      if (fs.exists(thisTestsDir)) {
        fs.delete(thisTestsDir, true);
      }
    }
  }

  private <W extends WALProvider.WriterBase> void closeWriter(AbstractFSWAL<W> wal) {
    wal.waitForSafePoint();
    long oldFileLen = wal.writer.getLength();
    wal.closeWriter(wal.writer, wal.getOldPath());
    wal.logRollAndSetupWalProps(wal.getOldPath(), null, oldFileLen);
    wal.writer = null;
    wal.onWriterReplaced(null);
    wal.rollRequested.set(false);
  }
}
