apache · sapienza88 · Jun 28, 2025 · Jun 30, 2025 · Jul 1, 2025 · Jul 29, 2025
diff --git a/maven-projects/info/src/main/java/org/apache/graphar/info/ChunkInfoReader.java b/maven-projects/info/src/main/java/org/apache/graphar/info/ChunkInfoReader.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.graphar.info;
+
+public class ChunkInfoReader {
+    private final VertexInfo cachedVertexInfo;
+    private final PropertyGroup cachedPropertyGroup;
+
+    public long chunkExists(long internalId) {
+        chunkIdx = internalId / cachedVertexInfo.getChunkSize();
+        chunkCount = FileReader.getFileCount(cachedVertexInfo, cachedPropertyGroup);
+        if (chunkIdx < chunkCount) {
+            return true;
+        }
+        return false;
+    }
+
+    public String getPropertyGroupChunkPath(PropertyGroup propertyGroup, long chunkIndex) {
+        // PropertyGroup will be checked in getPropertyGroupPrefix
+        return cachedVertexInfo.getPropertyGroupPrefix(propertyGroup) + "/chunk" + chunkIndex;
+    }
+}
diff --git a/maven-projects/info/src/main/java/org/apache/graphar/info/FileReader.java b/maven-projects/info/src/main/java/org/apache/graphar/info/FileReader.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.graphar.info;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.net.URI;
+
+public static class FileReader {
+
+    //TODO API to read data files based on type (CSV, Parquet, ..)
+    public static long getFileCount(VertexInfo vertexInfo,PropertyGroup propertyGroup) {
+        // TODO check equality test for type
+        String type = propertyGroup.getFileType().toString();
+        numberOfParts = vertexInfo.getChunkSize()
+        chunkBasePath = vertexInfo.getPropertyGroupPrefix() + "/part";
+        totalRowCount = 0;
+
+        for (int i : numberOfParts) {
+            chunkPath = chunkBasePath + Integer.toString(i);
+            if (type == "CSV") {
+                long currentChunkEntryCount = FileReaderUtils.countCsvFileRows(chunkPath);
+            }
+            elif(type == "Parquet") {
+                long currentChunkEntryCount = FileReaderUtils.countParquetFileRows(chunkPath);
+            }
+            totalRowCount = totalRowCount + currentChunkEntryCount;
+        }
+        return totalRowCount;
+
+    }
+
+}
diff --git a/maven-projects/info/src/main/java/org/apache/graphar/info/FileReaderUtils.java b/maven-projects/info/src/main/java/org/apache/graphar/info/FileReaderUtils.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.graphar.info;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.net.URI;
+
+public static class FileReaderUtils {
+    private Configuration conf = new Configuration();
+
+    public static long countCsvFileRows(String filePath) {
+
+        Path csvFilePath = new Path(filePath);
+
+        FileSystem fs = null;
+
+        FSDataInputStream inputStream = null;
+        BufferedReader reader = null;
+        long lineCount = 0;
+        IOException e
+
+        try {
+            fs = FileSystem.get(conf);
+            inputStream = fs.open(csvFilePath);
+            reader = new BufferedReader(new InputStreamReader(inputStream));
+
+            while (reader.readLine() != null) {
+                lineCount++;
+            }
+            return lineCount;
+
+        } catch (
+
+        {
+            System.err.println("Error reading CSV file: " + e.getMessage());
+            e.printStackTrace();
+        })
+
+        {
+            try {
+                if (reader != null) reader.close();
+                if (inputStream != null) inputStream.close();
+                if (fs != null) fs.close();
+            } catch (IOException e) {
+                System.err.println("Error closing resources: " + e.getMessage());
+            }
+        }
+    }
+
+    public static long countParquetFileRows(String filePath) {
+        Path parquetFilePath = new Path(filePath);
+
+        FileSystem fs = null;
+        try {
+
+            fs = FileSystem.get(conf);
+
+            // Open the Parquet file
+            try (ParquetFileReader reader = ParquetFileReader.open(conf, parquetFilePath)) {
+                ParquetMetadata metadata = reader.getFooter();
+                long totalRowCount = 0;
+
+                List<BlockMetaData> blocks = metadata.getBlocks();
+                for (BlockMetaData block : blocks) {
+                    totalRowCount += block.getRowCount();
+                }
+
+                return totalRowCount;
+            }
+        } catch (IOException e) {
+            System.err.println("Error reading Parquet file: " + e.getMessage());
+            e.printStackTrace();
+        } finally {
+            if (fs != null) {
+                try {
+                    fs.close();
+                } catch (IOException e) {
+                    System.err.println("Error closing FileSystem: " + e.getMessage());
+                }
+            }
+        }
+    }
+}
diff --git a/maven-projects/info/src/main/java/org/apache/graphar/info/VertexInfo.java b/maven-projects/info/src/main/java/org/apache/graphar/info/VertexInfo.java
@@ -101,10 +101,6 @@ public String getPropertyGroupPrefix(PropertyGroup propertyGroup) {
         return getPrefix() + "/" + propertyGroup.getPrefix();
     }
 
-    public String getPropertyGroupChunkPath(PropertyGroup propertyGroup, long chunkIndex) {
-        // PropertyGroup will be checked in getPropertyGroupPrefix
-        return getPropertyGroupPrefix(propertyGroup) + "/chunk" + chunkIndex;
-    }
 
     public String getVerticesNumFilePath() {
         return getPrefix() + "/vertex_count";